pub extern crate pulldown_cmark;
pub extern crate serde_yaml;
#[macro_use]
extern crate lazy_static;
mod context;
mod frontmatter;
mod references;
mod walker;
pub use context::Context;
pub use frontmatter::{Frontmatter, FrontmatterStrategy};
pub use walker::{vault_contents, WalkOptions};
use frontmatter::{frontmatter_from_str, frontmatter_to_str};
use pathdiff::diff_paths;
use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag};
use pulldown_cmark_to_cmark::cmark_with_options;
use rayon::prelude::*;
use references::*;
use slug::slugify;
use snafu::{ResultExt, Snafu};
use std::ffi::OsString;
use std::fmt;
use std::fs::{self, File};
use std::io::prelude::*;
use std::io::ErrorKind;
use std::path::{Path, PathBuf};
use std::str;
pub type MarkdownEvents<'a> = Vec<Event<'a>>;
pub type Postprocessor =
dyn Fn(Context, MarkdownEvents) -> (Context, MarkdownEvents, PostprocessorResult) + Send + Sync;
type Result<T, E = ExportError> = std::result::Result<T, E>;
const PERCENTENCODE_CHARS: &AsciiSet = &CONTROLS.add(b' ').add(b'(').add(b')').add(b'%').add(b'?');
const NOTE_RECURSION_LIMIT: usize = 10;
#[non_exhaustive]
#[derive(Debug, Snafu)]
pub enum ExportError {
#[snafu(display("failed to read from '{}'", path.display()))]
ReadError {
path: PathBuf,
source: std::io::Error,
},
#[snafu(display("failed to write to '{}'", path.display()))]
WriteError {
path: PathBuf,
source: std::io::Error,
},
#[snafu(display("Encountered an error while trying to walk '{}'", path.display()))]
WalkDirError {
path: PathBuf,
source: ignore::Error,
},
#[snafu(display("No such file or directory: {}", path.display()))]
PathDoesNotExist { path: PathBuf },
#[snafu(display("Invalid character encoding encountered"))]
CharacterEncodingError { source: str::Utf8Error },
#[snafu(display("Recursion limit exceeded"))]
RecursionLimitExceeded { file_tree: Vec<PathBuf> },
#[snafu(display("Failed to export '{}'", path.display()))]
FileExportError {
path: PathBuf,
#[snafu(source(from(ExportError, Box::new)))]
source: Box<ExportError>,
},
#[snafu(display("Failed to decode YAML frontmatter in '{}'", path.display()))]
FrontMatterDecodeError {
path: PathBuf,
#[snafu(source(from(serde_yaml::Error, Box::new)))]
source: Box<serde_yaml::Error>,
},
#[snafu(display("Failed to encode YAML frontmatter for '{}'", path.display()))]
FrontMatterEncodeError {
path: PathBuf,
#[snafu(source(from(serde_yaml::Error, Box::new)))]
source: Box<serde_yaml::Error>,
},
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum PostprocessorResult {
Continue,
StopHere,
StopAndSkipNote,
}
#[derive(Clone)]
pub struct Exporter<'a> {
root: PathBuf,
destination: PathBuf,
frontmatter_strategy: FrontmatterStrategy,
vault_contents: Option<Vec<PathBuf>>,
walk_options: WalkOptions<'a>,
process_embeds_recursively: bool,
postprocessors: Vec<&'a Postprocessor>,
}
impl<'a> fmt::Debug for Exporter<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
f.debug_struct("WalkOptions")
.field("root", &self.root)
.field("destination", &self.destination)
.field("frontmatter_strategy", &self.frontmatter_strategy)
.field("vault_contents", &self.vault_contents)
.field("walk_options", &self.walk_options)
.field(
"process_embeds_recursively",
&self.process_embeds_recursively,
)
.field(
"postprocessors",
&format!("<{} postprocessors active>", self.postprocessors.len()),
)
.finish()
}
}
impl<'a> Exporter<'a> {
pub fn new(source: PathBuf, destination: PathBuf) -> Exporter<'a> {
Exporter {
root: source,
destination,
frontmatter_strategy: FrontmatterStrategy::Auto,
walk_options: WalkOptions::default(),
process_embeds_recursively: true,
vault_contents: None,
postprocessors: vec![],
}
}
pub fn walk_options(&mut self, options: WalkOptions<'a>) -> &mut Exporter<'a> {
self.walk_options = options;
self
}
pub fn frontmatter_strategy(&mut self, strategy: FrontmatterStrategy) -> &mut Exporter<'a> {
self.frontmatter_strategy = strategy;
self
}
pub fn process_embeds_recursively(&mut self, recursive: bool) -> &mut Exporter<'a> {
self.process_embeds_recursively = recursive;
self
}
pub fn add_postprocessor(&mut self, processor: &'a Postprocessor) -> &mut Exporter<'a> {
self.postprocessors.push(processor);
self
}
pub fn run(&mut self) -> Result<()> {
if !self.root.exists() {
return Err(ExportError::PathDoesNotExist {
path: self.root.clone(),
});
}
if self.root.is_file() {
self.vault_contents = Some(vec![self.root.clone()]);
let source_filename = self
.root
.file_name()
.expect("File without a filename? How is that possible?")
.to_string_lossy();
let destination = match self.destination.is_dir() {
true => self.destination.join(String::from(source_filename)),
false => {
let parent = self.destination.parent().unwrap_or(&self.destination);
if !parent.exists() {
return Err(ExportError::PathDoesNotExist {
path: parent.to_path_buf(),
});
}
self.destination.clone()
}
};
return Ok(self.export_note(&self.root, &destination)?);
}
if !self.destination.exists() {
return Err(ExportError::PathDoesNotExist {
path: self.destination.clone(),
});
}
self.vault_contents = Some(vault_contents(
self.root.as_path(),
self.walk_options.clone(),
)?);
self.vault_contents
.as_ref()
.unwrap()
.clone()
.into_par_iter()
.try_for_each(|file| {
let relative_path = file
.strip_prefix(&self.root.clone())
.expect("file should always be nested under root")
.to_path_buf();
let destination = &self.destination.join(&relative_path);
self.export_note(&file, destination)
})?;
Ok(())
}
fn export_note(&self, src: &Path, dest: &Path) -> Result<()> {
match is_markdown_file(src) {
true => self.parse_and_export_obsidian_note(src, dest),
false => copy_file(src, dest),
}
.context(FileExportError { path: src })
}
fn parse_and_export_obsidian_note(&self, src: &Path, dest: &Path) -> Result<()> {
let mut context = Context::new(src.to_path_buf(), dest.to_path_buf());
let (frontmatter, mut markdown_events) = self.parse_obsidian_note(&src, &context)?;
context.frontmatter = frontmatter;
for func in &self.postprocessors {
let res = func(context, markdown_events);
context = res.0;
markdown_events = res.1;
match res.2 {
PostprocessorResult::StopHere => break,
PostprocessorResult::StopAndSkipNote => return Ok(()),
_ => (),
}
}
let dest = context.destination;
let mut outfile = create_file(&dest)?;
let write_frontmatter = match self.frontmatter_strategy {
FrontmatterStrategy::Always => true,
FrontmatterStrategy::Never => false,
FrontmatterStrategy::Auto => !context.frontmatter.is_empty(),
};
if write_frontmatter {
let mut frontmatter_str = frontmatter_to_str(context.frontmatter)
.context(FrontMatterEncodeError { path: src })?;
frontmatter_str.push('\n');
outfile
.write_all(frontmatter_str.as_bytes())
.context(WriteError { path: &dest })?;
}
outfile
.write_all(render_mdevents_to_mdtext(markdown_events).as_bytes())
.context(WriteError { path: &dest })?;
Ok(())
}
fn parse_obsidian_note<'b>(
&self,
path: &Path,
context: &Context,
) -> Result<(Frontmatter, MarkdownEvents<'b>)> {
if context.note_depth() > NOTE_RECURSION_LIMIT {
return Err(ExportError::RecursionLimitExceeded {
file_tree: context.file_tree(),
});
}
let content = fs::read_to_string(&path).context(ReadError { path })?;
let (frontmatter, content) =
matter::matter(&content).unwrap_or(("".to_string(), content.to_string()));
let frontmatter =
frontmatter_from_str(&frontmatter).context(FrontMatterDecodeError { path })?;
let mut parser_options = Options::empty();
parser_options.insert(Options::ENABLE_TABLES);
parser_options.insert(Options::ENABLE_FOOTNOTES);
parser_options.insert(Options::ENABLE_STRIKETHROUGH);
parser_options.insert(Options::ENABLE_TASKLISTS);
let mut ref_parser = RefParser::new();
let mut events = vec![];
let mut buffer = Vec::with_capacity(5);
for event in Parser::new_ext(&content, parser_options) {
if ref_parser.state == RefParserState::Resetting {
events.append(&mut buffer);
buffer.clear();
ref_parser.reset();
}
buffer.push(event.clone());
match ref_parser.state {
RefParserState::NoState => {
match event {
Event::Text(CowStr::Borrowed("![")) => {
ref_parser.ref_type = Some(RefType::Embed);
ref_parser.transition(RefParserState::ExpectSecondOpenBracket);
}
Event::Text(CowStr::Borrowed("[")) => {
ref_parser.ref_type = Some(RefType::Link);
ref_parser.transition(RefParserState::ExpectSecondOpenBracket);
}
_ => {
events.push(event);
buffer.clear();
},
};
}
RefParserState::ExpectSecondOpenBracket => match event {
Event::Text(CowStr::Borrowed("[")) => {
ref_parser.transition(RefParserState::ExpectRefText);
}
_ => {
ref_parser.transition(RefParserState::Resetting);
}
},
RefParserState::ExpectRefText => match event {
Event::Text(CowStr::Borrowed("]")) => {
ref_parser.transition(RefParserState::Resetting);
}
Event::Text(text) => {
ref_parser.ref_text.push_str(&text);
ref_parser.transition(RefParserState::ExpectRefTextOrCloseBracket);
}
_ => {
ref_parser.transition(RefParserState::Resetting);
}
},
RefParserState::ExpectRefTextOrCloseBracket => match event {
Event::Text(CowStr::Borrowed("]")) => {
ref_parser.transition(RefParserState::ExpectFinalCloseBracket);
}
Event::Text(text) => {
ref_parser.ref_text.push_str(&text);
}
_ => {
ref_parser.transition(RefParserState::Resetting);
}
},
RefParserState::ExpectFinalCloseBracket => match event {
Event::Text(CowStr::Borrowed("]")) => match ref_parser.ref_type {
Some(RefType::Link) => {
let mut elements = self.make_link_to_file(
ObsidianNoteReference::from_str(
ref_parser.ref_text.clone().as_ref()
),
context,
);
events.append(&mut elements);
buffer.clear();
ref_parser.transition(RefParserState::Resetting);
}
Some(RefType::Embed) => {
let mut elements = self.embed_file(
ref_parser.ref_text.clone().as_ref(),
context
)?;
events.append(&mut elements);
buffer.clear();
ref_parser.transition(RefParserState::Resetting);
}
None => panic!("In state ExpectFinalCloseBracket but ref_type is None"),
},
_ => {
ref_parser.transition(RefParserState::Resetting);
}
},
RefParserState::Resetting => panic!("Reached Resetting state, but it should have been handled prior to this match block"),
}
}
if !buffer.is_empty() {
events.append(&mut buffer);
}
Ok((
frontmatter,
events.into_iter().map(event_to_owned).collect(),
))
}
fn embed_file<'b>(
&self,
link_text: &'a str,
context: &'a Context,
) -> Result<MarkdownEvents<'b>> {
let note_ref = ObsidianNoteReference::from_str(link_text);
let path = match note_ref.file {
Some(file) => lookup_filename_in_vault(file, &self.vault_contents.as_ref().unwrap()),
None => return Ok(self.make_link_to_file(note_ref, &context)),
};
if path.is_none() {
eprintln!(
"Warning: Unable to find embedded note\n\tReference: '{}'\n\tSource: '{}'\n",
note_ref
.file
.unwrap_or_else(|| context.current_file().to_str().unwrap()),
context.current_file().display(),
);
return Ok(vec![]);
}
let path = path.unwrap();
let child_context = Context::from_parent(context, path);
let no_ext = OsString::new();
if !self.process_embeds_recursively && context.file_tree().contains(path) {
return Ok([
vec![Event::Text(CowStr::Borrowed("→ "))],
self.make_link_to_file(note_ref, &child_context),
]
.concat());
}
let events = match path.extension().unwrap_or(&no_ext).to_str() {
Some("md") => {
let (_frontmatter, mut events) = self.parse_obsidian_note(&path, &child_context)?;
if let Some(section) = note_ref.section {
events = reduce_to_section(events, section);
}
events
}
Some("png") | Some("jpg") | Some("jpeg") | Some("gif") | Some("webp") => {
self.make_link_to_file(note_ref, &child_context)
.into_iter()
.map(|event| match event {
Event::Start(Tag::Link(linktype, cowstr1, cowstr2)) => {
Event::Start(Tag::Image(
linktype,
CowStr::from(cowstr1.into_string()),
CowStr::from(cowstr2.into_string()),
))
}
Event::End(Tag::Link(linktype, cowstr1, cowstr2)) => {
Event::End(Tag::Image(
linktype,
CowStr::from(cowstr1.into_string()),
CowStr::from(cowstr2.into_string()),
))
}
_ => event,
})
.collect()
}
_ => self.make_link_to_file(note_ref, &child_context),
};
Ok(events)
}
fn make_link_to_file<'b, 'c>(
&self,
reference: ObsidianNoteReference<'b>,
context: &Context,
) -> MarkdownEvents<'c> {
let target_file = reference
.file
.map(|file| lookup_filename_in_vault(file, &self.vault_contents.as_ref().unwrap()))
.unwrap_or_else(|| Some(context.current_file()));
if target_file.is_none() {
eprintln!(
"Warning: Unable to find referenced note\n\tReference: '{}'\n\tSource: '{}'\n",
reference
.file
.unwrap_or_else(|| context.current_file().to_str().unwrap()),
context.current_file().display(),
);
return vec![
Event::Start(Tag::Emphasis),
Event::Text(CowStr::from(reference.display())),
Event::End(Tag::Emphasis),
];
}
let target_file = target_file.unwrap();
let rel_link = diff_paths(
target_file,
&context
.root_file()
.parent()
.expect("obsidian content files should always have a parent"),
)
.expect("should be able to build relative path when target file is found in vault");
let rel_link = rel_link.to_string_lossy();
let mut link = utf8_percent_encode(&rel_link, PERCENTENCODE_CHARS).to_string();
if let Some(section) = reference.section {
link.push('#');
link.push_str(&slugify(section));
}
let link_tag = pulldown_cmark::Tag::Link(
pulldown_cmark::LinkType::Inline,
CowStr::from(link),
CowStr::from(""),
);
vec![
Event::Start(link_tag.clone()),
Event::Text(CowStr::from(reference.display())),
Event::End(link_tag.clone()),
]
}
}
fn lookup_filename_in_vault<'a>(
filename: &str,
vault_contents: &'a [PathBuf],
) -> Option<&'a PathBuf> {
vault_contents.iter().find(|path| {
let path_lowered = PathBuf::from(path.to_string_lossy().to_lowercase());
path.ends_with(&filename)
|| path_lowered.ends_with(&filename.to_lowercase())
|| path.ends_with(format!("{}.md", &filename))
|| path_lowered.ends_with(format!("{}.md", &filename.to_lowercase()))
})
}
fn render_mdevents_to_mdtext(markdown: MarkdownEvents) -> String {
let mut buffer = String::new();
cmark_with_options(
markdown.iter(),
&mut buffer,
None,
pulldown_cmark_to_cmark::Options::default(),
)
.expect("formatting to string not expected to fail");
buffer.push('\n');
buffer
}
fn create_file(dest: &Path) -> Result<File> {
let file = File::create(&dest)
.or_else(|err| {
if err.kind() == ErrorKind::NotFound {
let parent = dest.parent().expect("file should have a parent directory");
if let Err(err) = std::fs::create_dir_all(&parent) {
return Err(err);
}
}
File::create(&dest)
})
.context(WriteError { path: dest })?;
Ok(file)
}
fn copy_file(src: &Path, dest: &Path) -> Result<()> {
std::fs::copy(&src, &dest)
.or_else(|err| {
if err.kind() == ErrorKind::NotFound {
let parent = dest.parent().expect("file should have a parent directory");
if let Err(err) = std::fs::create_dir_all(&parent) {
return Err(err);
}
}
std::fs::copy(&src, &dest)
})
.context(WriteError { path: dest })?;
Ok(())
}
fn is_markdown_file(file: &Path) -> bool {
let no_ext = OsString::new();
let ext = file.extension().unwrap_or(&no_ext).to_string_lossy();
ext == "md"
}
fn reduce_to_section<'a, 'b>(events: MarkdownEvents<'a>, section: &'b str) -> MarkdownEvents<'a> {
let mut filtered_events = Vec::with_capacity(events.len());
let mut target_section_encountered = false;
let mut currently_in_target_section = false;
let mut section_level = 0;
let mut last_level = 0;
let mut last_tag_was_heading = false;
for event in events.into_iter() {
filtered_events.push(event.clone());
match event {
Event::Start(Tag::Heading(level)) => {
last_tag_was_heading = true;
last_level = level;
if currently_in_target_section && level <= section_level {
currently_in_target_section = false;
filtered_events.pop();
}
}
Event::Text(cowstr) => {
if !last_tag_was_heading {
last_tag_was_heading = false;
continue;
}
last_tag_was_heading = false;
if cowstr.to_string().to_lowercase() == section.to_lowercase() {
target_section_encountered = true;
currently_in_target_section = true;
section_level = last_level;
let current_event = filtered_events.pop().unwrap();
let heading_start_event = filtered_events.pop().unwrap();
filtered_events.clear();
filtered_events.push(heading_start_event);
filtered_events.push(current_event);
}
}
_ => {}
}
if target_section_encountered && !currently_in_target_section {
return filtered_events;
}
}
filtered_events
}
fn event_to_owned<'a>(event: Event) -> Event<'a> {
match event {
Event::Start(tag) => Event::Start(tag_to_owned(tag)),
Event::End(tag) => Event::End(tag_to_owned(tag)),
Event::Text(cowstr) => Event::Text(CowStr::from(cowstr.into_string())),
Event::Code(cowstr) => Event::Code(CowStr::from(cowstr.into_string())),
Event::Html(cowstr) => Event::Html(CowStr::from(cowstr.into_string())),
Event::FootnoteReference(cowstr) => {
Event::FootnoteReference(CowStr::from(cowstr.into_string()))
}
Event::SoftBreak => Event::SoftBreak,
Event::HardBreak => Event::HardBreak,
Event::Rule => Event::Rule,
Event::TaskListMarker(checked) => Event::TaskListMarker(checked),
}
}
fn tag_to_owned<'a>(tag: Tag) -> Tag<'a> {
match tag {
Tag::Paragraph => Tag::Paragraph,
Tag::Heading(level) => Tag::Heading(level),
Tag::BlockQuote => Tag::BlockQuote,
Tag::CodeBlock(codeblock_kind) => Tag::CodeBlock(codeblock_kind_to_owned(codeblock_kind)),
Tag::List(optional) => Tag::List(optional),
Tag::Item => Tag::Item,
Tag::FootnoteDefinition(cowstr) => {
Tag::FootnoteDefinition(CowStr::from(cowstr.into_string()))
}
Tag::Table(alignment_vector) => Tag::Table(alignment_vector),
Tag::TableHead => Tag::TableHead,
Tag::TableRow => Tag::TableRow,
Tag::TableCell => Tag::TableCell,
Tag::Emphasis => Tag::Emphasis,
Tag::Strong => Tag::Strong,
Tag::Strikethrough => Tag::Strikethrough,
Tag::Link(linktype, cowstr1, cowstr2) => Tag::Link(
linktype,
CowStr::from(cowstr1.into_string()),
CowStr::from(cowstr2.into_string()),
),
Tag::Image(linktype, cowstr1, cowstr2) => Tag::Image(
linktype,
CowStr::from(cowstr1.into_string()),
CowStr::from(cowstr2.into_string()),
),
}
}
fn codeblock_kind_to_owned<'a>(codeblock_kind: CodeBlockKind) -> CodeBlockKind<'a> {
match codeblock_kind {
CodeBlockKind::Indented => CodeBlockKind::Indented,
CodeBlockKind::Fenced(cowstr) => CodeBlockKind::Fenced(CowStr::from(cowstr.into_string())),
}
}