c6o_obsidian_export/
lib.rs

1pub extern crate pulldown_cmark;
2pub extern crate serde_yaml;
3
4#[macro_use]
5extern crate lazy_static;
6
7mod context;
8mod frontmatter;
9mod references;
10mod walker;
11
12pub use context::Context;
13pub use frontmatter::{Frontmatter, FrontmatterStrategy};
14pub use walker::{vault_contents, WalkOptions};
15
16use frontmatter::{frontmatter_from_str, frontmatter_to_str};
17use pathdiff::diff_paths;
18use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
19use pulldown_cmark::{CodeBlockKind, CowStr, Event, Options, Parser, Tag};
20use pulldown_cmark_to_cmark::cmark_with_options;
21use rayon::prelude::*;
22use references::*;
23use slug::slugify;
24use snafu::{ResultExt, Snafu};
25use std::ffi::OsString;
26use std::fmt;
27use std::fs::{self, File};
28use std::io::prelude::*;
29use std::io::ErrorKind;
30use std::path::{Path, PathBuf};
31use std::str;
32
33/// A series of markdown [Event]s that are generated while traversing an Obsidian markdown note.
34pub type MarkdownEvents<'a> = Vec<Event<'a>>;
35
36/// A post-processing function that is to be called after an Obsidian note has been fully parsed and
37/// converted to regular markdown syntax.
38///
39/// Postprocessors are called in the order they've been added through [Exporter::add_postprocessor]
40/// just before notes are written out to their final destination.
41/// They may be used to achieve the following:
42///
43/// 1. Modify a note's [Context], for example to change the destination filename or update its [Frontmatter] (see [Context::frontmatter]).
44/// 2. Change a note's contents by altering [MarkdownEvents].
45/// 3. Prevent later postprocessors from running ([PostprocessorResult::StopHere]) or cause a note
46///    to be skipped entirely ([PostprocessorResult::StopAndSkipNote]).
47///
48/// # Postprocessors and embeds
49///
50/// Postprocessors normally run at the end of the export phase, once notes have been fully parsed.
51/// This means that any embedded notes have been resolved and merged into the final note already.
52///
53/// In some cases it may be desirable to change the contents of these embedded notes *before* they
54/// are inserted into the final document. This is possible through the use of
55/// [Exporter::add_embed_postprocessor].
56/// These "embed postprocessors" run much the same way as regular postprocessors, but they're run on
57/// the note that is about to be embedded in another note. In addition:
58///
59/// - Changes to context carry over to later embed postprocessors, but are then discarded. This
60///   means that changes to frontmatter do not propagate to the root note for example.
61/// - [PostprocessorResult::StopAndSkipNote] prevents the embedded note from being included (it's
62///   replaced with a blank document) but doesn't affect the root note.
63///
64/// It's possible to pass the same functions to [Exporter::add_postprocessor] and
65/// [Exporter::add_embed_postprocessor]. The [Context::note_depth] method may be used to determine
66/// whether a note is a root note or an embedded note in this situation.
67///
68/// # Examples
69///
70/// ## Update frontmatter
71///
72/// This example shows how to make changes a note's frontmatter. In this case, the postprocessor is
73/// defined inline as a closure.
74///
75/// ```
76/// use obsidian_export::{Context, Exporter, MarkdownEvents, PostprocessorResult};
77/// use obsidian_export::pulldown_cmark::{CowStr, Event};
78/// use obsidian_export::serde_yaml::Value;
79/// # use std::path::PathBuf;
80/// # use tempfile::TempDir;
81///
82/// # let tmp_dir = TempDir::new().expect("failed to make tempdir");
83/// # let source = PathBuf::from("tests/testdata/input/postprocessors");
84/// # let destination = tmp_dir.path().to_path_buf();
85/// let mut exporter = Exporter::new(source, destination);
86///
87/// // add_postprocessor registers a new postprocessor. In this example we use a closure.
88/// exporter.add_postprocessor(&|mut context, events| {
89///     // This is the key we'll insert into the frontmatter. In this case, the string "foo".
90///     let key = Value::String("foo".to_string());
91///     // This is the value we'll insert into the frontmatter. In this case, the string "bar".
92///     let value = Value::String("baz".to_string());
93///
94///     // Frontmatter can be updated in-place, so we can call insert on it directly.
95///     context.frontmatter.insert(key, value);
96///
97///     // Postprocessors must return their (modified) context, the markdown events that make
98///     // up the note and a next action to take.
99///     (context, events, PostprocessorResult::Continue)
100/// });
101///
102/// exporter.run().unwrap();
103/// ```
104///
105/// ## Change note contents
106///
107/// In this example a note's markdown content is changed by iterating over the [MarkdownEvents] and
108/// changing the text when we encounter a [text element][Event::Text].
109///
110/// Instead of using a closure like above, this example shows how to use a separate function
111/// definition.
112/// ```
113/// # use obsidian_export::{Context, Exporter, MarkdownEvents, PostprocessorResult};
114/// # use pulldown_cmark::{CowStr, Event};
115/// # use std::path::PathBuf;
116/// # use tempfile::TempDir;
117/// #
118/// /// This postprocessor replaces any instance of "foo" with "bar" in the note body.
119/// fn foo_to_bar(
120///     context: Context,
121///     events: MarkdownEvents,
122/// ) -> (Context, MarkdownEvents, PostprocessorResult) {
123///     let events = events
124///         .into_iter()
125///         .map(|event| match event {
126///             Event::Text(text) => Event::Text(CowStr::from(text.replace("foo", "bar"))),
127///             event => event,
128///         })
129///         .collect();
130///     (context, events, PostprocessorResult::Continue)
131/// }
132///
133/// # let tmp_dir = TempDir::new().expect("failed to make tempdir");
134/// # let source = PathBuf::from("tests/testdata/input/postprocessors");
135/// # let destination = tmp_dir.path().to_path_buf();
136/// # let mut exporter = Exporter::new(source, destination);
137/// exporter.add_postprocessor(&foo_to_bar);
138/// # exporter.run().unwrap();
139/// ```
140
141pub type Postprocessor =
142    dyn Fn(Context, MarkdownEvents) -> (Context, MarkdownEvents, PostprocessorResult) + Send + Sync;
143type Result<T, E = ExportError> = std::result::Result<T, E>;
144
145const PERCENTENCODE_CHARS: &AsciiSet = &CONTROLS.add(b' ').add(b'(').add(b')').add(b'%').add(b'?');
146const NOTE_RECURSION_LIMIT: usize = 10;
147
148#[non_exhaustive]
149#[derive(Debug, Snafu)]
150/// ExportError represents all errors which may be returned when using this crate.
151pub enum ExportError {
152    #[snafu(display("failed to read from '{}'", path.display()))]
153    /// This occurs when a read IO operation fails.
154    ReadError {
155        path: PathBuf,
156        source: std::io::Error,
157    },
158
159    #[snafu(display("failed to write to '{}'", path.display()))]
160    /// This occurs when a write IO operation fails.
161    WriteError {
162        path: PathBuf,
163        source: std::io::Error,
164    },
165
166    #[snafu(display("Encountered an error while trying to walk '{}'", path.display()))]
167    /// This occurs when an error is encountered while trying to walk a directory.
168    WalkDirError {
169        path: PathBuf,
170        source: ignore::Error,
171    },
172
173    #[snafu(display("No such file or directory: {}", path.display()))]
174    /// This occurs when an operation is requested on a file or directory which does not exist.
175    PathDoesNotExist { path: PathBuf },
176
177    #[snafu(display("Invalid character encoding encountered"))]
178    /// This error may occur when invalid UTF8 is encountered.
179    ///
180    /// Currently, operations which assume UTF8 perform lossy encoding however.
181    CharacterEncodingError { source: str::Utf8Error },
182
183    #[snafu(display("Recursion limit exceeded"))]
184    /// This error occurs when embedded notes are too deeply nested or cause an infinite loop.
185    ///
186    /// When this happens, `file_tree` contains a list of all the files which were processed
187    /// leading up to this error.
188    RecursionLimitExceeded { file_tree: Vec<PathBuf> },
189
190    #[snafu(display("Failed to export '{}'", path.display()))]
191    /// This occurs when a file fails to export successfully.
192    FileExportError {
193        path: PathBuf,
194        #[snafu(source(from(ExportError, Box::new)))]
195        source: Box<ExportError>,
196    },
197
198    #[snafu(display("Failed to decode YAML frontmatter in '{}'", path.display()))]
199    FrontMatterDecodeError {
200        path: PathBuf,
201        #[snafu(source(from(serde_yaml::Error, Box::new)))]
202        source: Box<serde_yaml::Error>,
203    },
204
205    #[snafu(display("Failed to encode YAML frontmatter for '{}'", path.display()))]
206    FrontMatterEncodeError {
207        path: PathBuf,
208        #[snafu(source(from(serde_yaml::Error, Box::new)))]
209        source: Box<serde_yaml::Error>,
210    },
211}
212
213#[derive(Debug, Clone, Copy, PartialEq)]
214/// Emitted by [Postprocessor]s to signal the next action to take.
215pub enum PostprocessorResult {
216    /// Continue with the next post-processor (if any).
217    Continue,
218    /// Use this note, but don't run any more post-processors after this one.
219    StopHere,
220    /// Skip this note (don't export it) and don't run any more post-processors.
221    StopAndSkipNote,
222}
223
224#[derive(Clone)]
225/// Exporter provides the main interface to this library.
226///
227/// Users are expected to create an Exporter using [`Exporter::new`], optionally followed by
228/// customization using [`Exporter::frontmatter_strategy`] and [`Exporter::walk_options`].
229///
230/// After that, calling [`Exporter::run`] will start the export process.
231pub struct Exporter<'a> {
232    root: PathBuf,
233    destination: PathBuf,
234    start_at: PathBuf,
235    frontmatter_strategy: FrontmatterStrategy,
236    vault_contents: Option<Vec<PathBuf>>,
237    walk_options: WalkOptions<'a>,
238    process_embeds_recursively: bool,
239    postprocessors: Vec<&'a Postprocessor>,
240    embed_postprocessors: Vec<&'a Postprocessor>,
241}
242
243impl<'a> fmt::Debug for Exporter<'a> {
244    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
245        f.debug_struct("WalkOptions")
246            .field("root", &self.root)
247            .field("destination", &self.destination)
248            .field("frontmatter_strategy", &self.frontmatter_strategy)
249            .field("vault_contents", &self.vault_contents)
250            .field("walk_options", &self.walk_options)
251            .field(
252                "process_embeds_recursively",
253                &self.process_embeds_recursively,
254            )
255            .field(
256                "postprocessors",
257                &format!("<{} postprocessors active>", self.postprocessors.len()),
258            )
259            .field(
260                "embed_postprocessors",
261                &format!(
262                    "<{} postprocessors active>",
263                    self.embed_postprocessors.len()
264                ),
265            )
266            .finish()
267    }
268}
269
270impl<'a> Exporter<'a> {
271    /// Create a new exporter which reads notes from `root` and exports these to
272    /// `destination`.
273    pub fn new(root: PathBuf, destination: PathBuf) -> Exporter<'a> {
274        Exporter {
275            start_at: root.clone(),
276            root,
277            destination,
278            frontmatter_strategy: FrontmatterStrategy::Auto,
279            walk_options: WalkOptions::default(),
280            process_embeds_recursively: true,
281            vault_contents: None,
282            postprocessors: vec![],
283            embed_postprocessors: vec![],
284        }
285    }
286
287    /// Set a custom starting point for the export.
288    ///
289    /// Normally all notes under `root` (except for notes excluded by ignore rules) will be exported.
290    /// When `start_at` is set, only notes under this path will be exported to the target destination.
291    pub fn start_at(&mut self, start_at: PathBuf) -> &mut Exporter<'a> {
292        self.start_at = start_at;
293        self
294    }
295
296    /// Set the [`WalkOptions`] to be used for this exporter.
297    pub fn walk_options(&mut self, options: WalkOptions<'a>) -> &mut Exporter<'a> {
298        self.walk_options = options;
299        self
300    }
301
302    /// Set the [`FrontmatterStrategy`] to be used for this exporter.
303    pub fn frontmatter_strategy(&mut self, strategy: FrontmatterStrategy) -> &mut Exporter<'a> {
304        self.frontmatter_strategy = strategy;
305        self
306    }
307
308    /// Set the behavior when recursive embeds are encountered.
309    ///
310    /// When `recursive` is true (the default), emdeds are always processed recursively. This may
311    /// lead to infinite recursion when note A embeds B, but B also embeds A.
312    /// (When this happens, [ExportError::RecursionLimitExceeded] will be returned by [Exporter::run]).
313    ///
314    /// When `recursive` is false, if a note is encountered for a second time while processing the
315    /// original note, instead of embedding it again a link to the note is inserted instead.
316    pub fn process_embeds_recursively(&mut self, recursive: bool) -> &mut Exporter<'a> {
317        self.process_embeds_recursively = recursive;
318        self
319    }
320
321    /// Append a function to the chain of [postprocessors][Postprocessor] to run on exported Obsidian Markdown notes.
322    pub fn add_postprocessor(&mut self, processor: &'a Postprocessor) -> &mut Exporter<'a> {
323        self.postprocessors.push(processor);
324        self
325    }
326
327    /// Append a function to the chain of [postprocessors][Postprocessor] for embeds.
328    pub fn add_embed_postprocessor(&mut self, processor: &'a Postprocessor) -> &mut Exporter<'a> {
329        self.embed_postprocessors.push(processor);
330        self
331    }
332
333    /// Export notes using the settings configured on this exporter.
334    pub fn run(&mut self) -> Result<()> {
335        if !self.root.exists() {
336            return Err(ExportError::PathDoesNotExist {
337                path: self.root.clone(),
338            });
339        }
340
341        self.vault_contents = Some(vault_contents(
342            self.root.as_path(),
343            self.walk_options.clone(),
344        )?);
345
346        // When a single file is specified, just need to export that specific file instead of
347        // iterating over all discovered files. This also allows us to accept destination as either
348        // a file or a directory name.
349        if self.root.is_file() || self.start_at.is_file() {
350            let source_filename = self
351                .start_at
352                .file_name()
353                .expect("File without a filename? How is that possible?")
354                .to_string_lossy();
355
356            let destination = match self.destination.is_dir() {
357                true => self.destination.join(String::from(source_filename)),
358                false => {
359                    let parent = self.destination.parent().unwrap_or(&self.destination);
360                    // Avoid recursively creating self.destination through the call to
361                    // export_note when the parent directory doesn't exist.
362                    if !parent.exists() {
363                        return Err(ExportError::PathDoesNotExist {
364                            path: parent.to_path_buf(),
365                        });
366                    }
367                    self.destination.clone()
368                }
369            };
370            return self.export_note(&self.start_at, &destination);
371        }
372
373        if !self.destination.exists() {
374            return Err(ExportError::PathDoesNotExist {
375                path: self.destination.clone(),
376            });
377        }
378        self.vault_contents
379            .as_ref()
380            .unwrap()
381            .clone()
382            .into_par_iter()
383            .filter(|file| file.starts_with(&self.start_at))
384            .try_for_each(|file| {
385                let relative_path = file
386                    .strip_prefix(&self.start_at.clone())
387                    .expect("file should always be nested under root")
388                    .to_path_buf();
389                let destination = &self.destination.join(&relative_path);
390                self.export_note(&file, destination)
391            })?;
392        Ok(())
393    }
394
395    fn export_note(&self, src: &Path, dest: &Path) -> Result<()> {
396        match is_markdown_file(src) {
397            true => self.parse_and_export_obsidian_note(src, dest),
398            false => copy_file(src, dest),
399        }
400        .context(FileExportError { path: src })
401    }
402
403    fn parse_and_export_obsidian_note(&self, src: &Path, dest: &Path) -> Result<()> {
404        let mut context = Context::new(src.to_path_buf(), dest.to_path_buf());
405
406        let (frontmatter, mut markdown_events) = self.parse_obsidian_note(src, &context)?;
407        context.frontmatter = frontmatter;
408        for func in &self.postprocessors {
409            let res = func(context, markdown_events);
410            context = res.0;
411            markdown_events = res.1;
412            match res.2 {
413                PostprocessorResult::StopHere => break,
414                PostprocessorResult::StopAndSkipNote => return Ok(()),
415                PostprocessorResult::Continue => (),
416            }
417        }
418
419        let dest = context.destination;
420        let mut outfile = create_file(&dest)?;
421        let write_frontmatter = match self.frontmatter_strategy {
422            FrontmatterStrategy::Always => true,
423            FrontmatterStrategy::Never => false,
424            FrontmatterStrategy::Auto => !context.frontmatter.is_empty(),
425        };
426        if write_frontmatter {
427            let mut frontmatter_str = frontmatter_to_str(context.frontmatter)
428                .context(FrontMatterEncodeError { path: src })?;
429            frontmatter_str.push('\n');
430            outfile
431                .write_all(frontmatter_str.as_bytes())
432                .context(WriteError { path: &dest })?;
433        }
434        outfile
435            .write_all(render_mdevents_to_mdtext(markdown_events).as_bytes())
436            .context(WriteError { path: &dest })?;
437        Ok(())
438    }
439
440    fn parse_obsidian_note<'b>(
441        &self,
442        path: &Path,
443        context: &Context,
444    ) -> Result<(Frontmatter, MarkdownEvents<'b>)> {
445        if context.note_depth() > NOTE_RECURSION_LIMIT {
446            return Err(ExportError::RecursionLimitExceeded {
447                file_tree: context.file_tree(),
448            });
449        }
450        let content = fs::read_to_string(&path).context(ReadError { path })?;
451        let (frontmatter, content) =
452            matter::matter(&content).unwrap_or(("".to_string(), content.to_string()));
453        let frontmatter =
454            frontmatter_from_str(&frontmatter).context(FrontMatterDecodeError { path })?;
455
456        let mut parser_options = Options::empty();
457        parser_options.insert(Options::ENABLE_TABLES);
458        parser_options.insert(Options::ENABLE_FOOTNOTES);
459        parser_options.insert(Options::ENABLE_STRIKETHROUGH);
460        parser_options.insert(Options::ENABLE_TASKLISTS);
461
462        let mut ref_parser = RefParser::new();
463        let mut events = vec![];
464        // Most of the time, a reference triggers 5 events: [ or ![, [, <text>, ], ]
465        let mut buffer = Vec::with_capacity(5);
466
467        for event in Parser::new_ext(&content, parser_options) {
468            if ref_parser.state == RefParserState::Resetting {
469                events.append(&mut buffer);
470                buffer.clear();
471                ref_parser.reset();
472            }
473            buffer.push(event.clone());
474            match ref_parser.state {
475                RefParserState::NoState => {
476                    match event {
477                        Event::Text(CowStr::Borrowed("![")) => {
478                            ref_parser.ref_type = Some(RefType::Embed);
479                            ref_parser.transition(RefParserState::ExpectSecondOpenBracket);
480                        }
481                        Event::Text(CowStr::Borrowed("[")) => {
482                            ref_parser.ref_type = Some(RefType::Link);
483                            ref_parser.transition(RefParserState::ExpectSecondOpenBracket);
484                        }
485                        _ => {
486                            events.push(event);
487                            buffer.clear();
488                        },
489                    };
490                }
491                RefParserState::ExpectSecondOpenBracket => match event {
492                    Event::Text(CowStr::Borrowed("[")) => {
493                        ref_parser.transition(RefParserState::ExpectRefText);
494                    }
495                    _ => {
496                        ref_parser.transition(RefParserState::Resetting);
497                    }
498                },
499                RefParserState::ExpectRefText => match event {
500                    Event::Text(CowStr::Borrowed("]")) => {
501                        ref_parser.transition(RefParserState::Resetting);
502                    }
503                    Event::Text(text) => {
504                        ref_parser.ref_text.push_str(&text);
505                        ref_parser.transition(RefParserState::ExpectRefTextOrCloseBracket);
506                    }
507                    _ => {
508                        ref_parser.transition(RefParserState::Resetting);
509                    }
510                },
511                RefParserState::ExpectRefTextOrCloseBracket => match event {
512                    Event::Text(CowStr::Borrowed("]")) => {
513                        ref_parser.transition(RefParserState::ExpectFinalCloseBracket);
514                    }
515                    Event::Text(text) => {
516                        ref_parser.ref_text.push_str(&text);
517                    }
518                    _ => {
519                        ref_parser.transition(RefParserState::Resetting);
520                    }
521                },
522                RefParserState::ExpectFinalCloseBracket => match event {
523                    Event::Text(CowStr::Borrowed("]")) => match ref_parser.ref_type {
524                        Some(RefType::Link) => {
525                            let mut elements = self.make_link_to_file(
526                                ObsidianNoteReference::from_str(
527                                    ref_parser.ref_text.clone().as_ref()
528                                ),
529                                context,
530                            );
531                            events.append(&mut elements);
532                            buffer.clear();
533                            ref_parser.transition(RefParserState::Resetting);
534                        }
535                        Some(RefType::Embed) => {
536                            let mut elements = self.embed_file(
537                                ref_parser.ref_text.clone().as_ref(),
538                                context
539                            )?;
540                            events.append(&mut elements);
541                            buffer.clear();
542                            ref_parser.transition(RefParserState::Resetting);
543                        }
544                        None => panic!("In state ExpectFinalCloseBracket but ref_type is None"),
545                    },
546                    _ => {
547                        ref_parser.transition(RefParserState::Resetting);
548                    }
549                },
550                RefParserState::Resetting => panic!("Reached Resetting state, but it should have been handled prior to this match block"),
551            }
552        }
553        if !buffer.is_empty() {
554            events.append(&mut buffer);
555        }
556        Ok((
557            frontmatter,
558            events.into_iter().map(event_to_owned).collect(),
559        ))
560    }
561
562    // Generate markdown elements for a file that is embedded within another note.
563    //
564    // - If the file being embedded is a note, it's content is included at the point of embed.
565    // - If the file is an image, an image tag is generated.
566    // - For other types of file, a regular link is created instead.
567    fn embed_file<'b>(
568        &self,
569        link_text: &'a str,
570        context: &'a Context,
571    ) -> Result<MarkdownEvents<'b>> {
572        let note_ref = ObsidianNoteReference::from_str(link_text);
573
574        let path = match note_ref.file {
575            Some(file) => lookup_filename_in_vault(file, self.vault_contents.as_ref().unwrap()),
576
577            // If we have None file it is either to a section or id within the same file and thus
578            // the current embed logic will fail, recurssing until it reaches it's limit.
579            // For now we just bail early.
580            None => return Ok(self.make_link_to_file(note_ref, context)),
581        };
582
583        if path.is_none() {
584            // TODO: Extract into configurable function.
585            eprintln!(
586                "Warning: Unable to find embedded note\n\tReference: '{}'\n\tSource: '{}'\n",
587                note_ref
588                    .file
589                    .unwrap_or_else(|| context.current_file().to_str().unwrap()),
590                context.current_file().display(),
591            );
592            return Ok(vec![]);
593        }
594
595        let path = path.unwrap();
596        let mut child_context = Context::from_parent(context, path);
597        let no_ext = OsString::new();
598
599        if !self.process_embeds_recursively && context.file_tree().contains(path) {
600            return Ok([
601                vec![Event::Text(CowStr::Borrowed("→ "))],
602                self.make_link_to_file(note_ref, &child_context),
603            ]
604            .concat());
605        }
606
607        let events = match path.extension().unwrap_or(&no_ext).to_str() {
608            Some("md") => {
609                let (frontmatter, mut events) = self.parse_obsidian_note(path, &child_context)?;
610                child_context.frontmatter = frontmatter;
611                if let Some(section) = note_ref.section {
612                    events = reduce_to_section(events, section);
613                }
614                for func in &self.embed_postprocessors {
615                    // Postprocessors running on embeds shouldn't be able to change frontmatter (or
616                    // any other metadata), so we give them a clone of the context.
617                    let res = func(child_context, events);
618                    child_context = res.0;
619                    events = res.1;
620                    match res.2 {
621                        PostprocessorResult::StopHere => break,
622                        PostprocessorResult::StopAndSkipNote => {
623                            events = vec![];
624                        }
625                        PostprocessorResult::Continue => (),
626                    }
627                }
628                events
629            }
630            Some("png") | Some("jpg") | Some("jpeg") | Some("gif") | Some("webp") | Some("svg") => {
631                self.make_link_to_file(note_ref, &child_context)
632                    .into_iter()
633                    .map(|event| match event {
634                        // make_link_to_file returns a link to a file. With this we turn the link
635                        // into an image reference instead. Slightly hacky, but avoids needing
636                        // to keep another utility function around for this, or introducing an
637                        // extra parameter on make_link_to_file.
638                        Event::Start(Tag::Link(linktype, cowstr1, cowstr2)) => {
639                            Event::Start(Tag::Image(
640                                linktype,
641                                CowStr::from(cowstr1.into_string()),
642                                CowStr::from(cowstr2.into_string()),
643                            ))
644                        }
645                        Event::End(Tag::Link(linktype, cowstr1, cowstr2)) => {
646                            Event::End(Tag::Image(
647                                linktype,
648                                CowStr::from(cowstr1.into_string()),
649                                CowStr::from(cowstr2.into_string()),
650                            ))
651                        }
652                        _ => event,
653                    })
654                    .collect()
655            }
656            _ => self.make_link_to_file(note_ref, &child_context),
657        };
658        Ok(events)
659    }
660
661    fn make_link_to_file<'b, 'c>(
662        &self,
663        reference: ObsidianNoteReference<'b>,
664        context: &Context,
665    ) -> MarkdownEvents<'c> {
666        let target_file = reference
667            .file
668            .map(|file| lookup_filename_in_vault(file, self.vault_contents.as_ref().unwrap()))
669            .unwrap_or_else(|| Some(context.current_file()));
670
671        if target_file.is_none() {
672            // TODO: Extract into configurable function.
673            eprintln!(
674                "Warning: Unable to find referenced note\n\tReference: '{}'\n\tSource: '{}'\n",
675                reference
676                    .file
677                    .unwrap_or_else(|| context.current_file().to_str().unwrap()),
678                context.current_file().display(),
679            );
680            return vec![
681                Event::Start(Tag::Emphasis),
682                Event::Text(CowStr::from(reference.display())),
683                Event::End(Tag::Emphasis),
684            ];
685        }
686        let target_file = target_file.unwrap();
687        // We use root_file() rather than current_file() here to make sure links are always
688        // relative to the outer-most note, which is the note which this content is inserted into
689        // in case of embedded notes.
690        let rel_link = diff_paths(
691            target_file,
692            &context
693                .root_file()
694                .parent()
695                .expect("obsidian content files should always have a parent"),
696        )
697        .expect("should be able to build relative path when target file is found in vault");
698
699        let rel_link = rel_link.to_string_lossy();
700        let mut link = utf8_percent_encode(&rel_link, PERCENTENCODE_CHARS).to_string();
701
702        if let Some(section) = reference.section {
703            link.push('#');
704            link.push_str(&slugify(section));
705        }
706
707        let link_tag = pulldown_cmark::Tag::Link(
708            pulldown_cmark::LinkType::Inline,
709            CowStr::from(link),
710            CowStr::from(""),
711        );
712
713        vec![
714            Event::Start(link_tag.clone()),
715            Event::Text(CowStr::from(reference.display())),
716            Event::End(link_tag.clone()),
717        ]
718    }
719}
720
721fn lookup_filename_in_vault<'a>(
722    filename: &str,
723    vault_contents: &'a [PathBuf],
724) -> Option<&'a PathBuf> {
725    // Markdown files don't have their .md extension added by Obsidian, but other files (images,
726    // PDFs, etc) do so we match on both possibilities.
727    //
728    // References can also refer to notes in a different case (to lowercase text in a
729    // sentence even if the note is capitalized for example) so we also try a case-insensitive
730    // lookup.
731    vault_contents.iter().find(|path| {
732        let path_lowered = PathBuf::from(path.to_string_lossy().to_lowercase());
733        path.ends_with(&filename)
734            || path_lowered.ends_with(&filename.to_lowercase())
735            || path.ends_with(format!("{}.md", &filename))
736            || path_lowered.ends_with(format!("{}.md", &filename.to_lowercase()))
737    })
738}
739
740fn render_mdevents_to_mdtext(markdown: MarkdownEvents) -> String {
741    let mut buffer = String::new();
742    cmark_with_options(
743        markdown.iter(),
744        &mut buffer,
745        None,
746        pulldown_cmark_to_cmark::Options::default(),
747    )
748    .expect("formatting to string not expected to fail");
749    buffer.push('\n');
750    buffer
751}
752
753fn create_file(dest: &Path) -> Result<File> {
754    let file = File::create(&dest)
755        .or_else(|err| {
756            if err.kind() == ErrorKind::NotFound {
757                let parent = dest.parent().expect("file should have a parent directory");
758                if let Err(err) = std::fs::create_dir_all(&parent) {
759                    return Err(err);
760                }
761            }
762            File::create(&dest)
763        })
764        .context(WriteError { path: dest })?;
765    Ok(file)
766}
767
768fn copy_file(src: &Path, dest: &Path) -> Result<()> {
769    std::fs::copy(&src, &dest)
770        .or_else(|err| {
771            if err.kind() == ErrorKind::NotFound {
772                let parent = dest.parent().expect("file should have a parent directory");
773                if let Err(err) = std::fs::create_dir_all(&parent) {
774                    return Err(err);
775                }
776            }
777            std::fs::copy(&src, &dest)
778        })
779        .context(WriteError { path: dest })?;
780    Ok(())
781}
782
783fn is_markdown_file(file: &Path) -> bool {
784    let no_ext = OsString::new();
785    let ext = file.extension().unwrap_or(&no_ext).to_string_lossy();
786    ext == "md"
787}
788
789/// Reduce a given `MarkdownEvents` to just those elements which are children of the given section
790/// (heading name).
791fn reduce_to_section<'a, 'b>(events: MarkdownEvents<'a>, section: &'b str) -> MarkdownEvents<'a> {
792    let mut filtered_events = Vec::with_capacity(events.len());
793    let mut target_section_encountered = false;
794    let mut currently_in_target_section = false;
795    let mut section_level = 0;
796    let mut last_level = 0;
797    let mut last_tag_was_heading = false;
798
799    for event in events.into_iter() {
800        filtered_events.push(event.clone());
801        match event {
802            Event::Start(Tag::Heading(level)) => {
803                last_tag_was_heading = true;
804                last_level = level;
805                if currently_in_target_section && level <= section_level {
806                    currently_in_target_section = false;
807                    filtered_events.pop();
808                }
809            }
810            Event::Text(cowstr) => {
811                if !last_tag_was_heading {
812                    last_tag_was_heading = false;
813                    continue;
814                }
815                last_tag_was_heading = false;
816
817                if cowstr.to_string().to_lowercase() == section.to_lowercase() {
818                    target_section_encountered = true;
819                    currently_in_target_section = true;
820                    section_level = last_level;
821
822                    let current_event = filtered_events.pop().unwrap();
823                    let heading_start_event = filtered_events.pop().unwrap();
824                    filtered_events.clear();
825                    filtered_events.push(heading_start_event);
826                    filtered_events.push(current_event);
827                }
828            }
829            _ => {}
830        }
831        if target_section_encountered && !currently_in_target_section {
832            return filtered_events;
833        }
834    }
835    filtered_events
836}
837
838fn event_to_owned<'a>(event: Event) -> Event<'a> {
839    match event {
840        Event::Start(tag) => Event::Start(tag_to_owned(tag)),
841        Event::End(tag) => Event::End(tag_to_owned(tag)),
842        Event::Text(cowstr) => Event::Text(CowStr::from(cowstr.into_string())),
843        Event::Code(cowstr) => Event::Code(CowStr::from(cowstr.into_string())),
844        Event::Html(cowstr) => Event::Html(CowStr::from(cowstr.into_string())),
845        Event::FootnoteReference(cowstr) => {
846            Event::FootnoteReference(CowStr::from(cowstr.into_string()))
847        }
848        Event::SoftBreak => Event::SoftBreak,
849        Event::HardBreak => Event::HardBreak,
850        Event::Rule => Event::Rule,
851        Event::TaskListMarker(checked) => Event::TaskListMarker(checked),
852    }
853}
854
855fn tag_to_owned<'a>(tag: Tag) -> Tag<'a> {
856    match tag {
857        Tag::Paragraph => Tag::Paragraph,
858        Tag::Heading(level) => Tag::Heading(level),
859        Tag::BlockQuote => Tag::BlockQuote,
860        Tag::CodeBlock(codeblock_kind) => Tag::CodeBlock(codeblock_kind_to_owned(codeblock_kind)),
861        Tag::List(optional) => Tag::List(optional),
862        Tag::Item => Tag::Item,
863        Tag::FootnoteDefinition(cowstr) => {
864            Tag::FootnoteDefinition(CowStr::from(cowstr.into_string()))
865        }
866        Tag::Table(alignment_vector) => Tag::Table(alignment_vector),
867        Tag::TableHead => Tag::TableHead,
868        Tag::TableRow => Tag::TableRow,
869        Tag::TableCell => Tag::TableCell,
870        Tag::Emphasis => Tag::Emphasis,
871        Tag::Strong => Tag::Strong,
872        Tag::Strikethrough => Tag::Strikethrough,
873        Tag::Link(linktype, cowstr1, cowstr2) => Tag::Link(
874            linktype,
875            CowStr::from(cowstr1.into_string()),
876            CowStr::from(cowstr2.into_string()),
877        ),
878        Tag::Image(linktype, cowstr1, cowstr2) => Tag::Image(
879            linktype,
880            CowStr::from(cowstr1.into_string()),
881            CowStr::from(cowstr2.into_string()),
882        ),
883    }
884}
885
886fn codeblock_kind_to_owned<'a>(codeblock_kind: CodeBlockKind) -> CodeBlockKind<'a> {
887    match codeblock_kind {
888        CodeBlockKind::Indented => CodeBlockKind::Indented,
889        CodeBlockKind::Fenced(cowstr) => CodeBlockKind::Fenced(CowStr::from(cowstr.into_string())),
890    }
891}