obsidian_export/
lib.rs

1pub use {pulldown_cmark, serde_yaml};
2
3mod context;
4mod frontmatter;
5pub mod postprocessors;
6mod references;
7mod walker;
8
9use std::ffi::OsString;
10use std::fs::{self, File};
11use std::io::prelude::*;
12use std::io::ErrorKind;
13use std::path::{Path, PathBuf};
14use std::{fmt, str};
15
16pub use context::Context;
17use filetime::set_file_mtime;
18use frontmatter::{frontmatter_from_str, frontmatter_to_str};
19pub use frontmatter::{Frontmatter, FrontmatterStrategy};
20use pathdiff::diff_paths;
21use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
22use pulldown_cmark::{CodeBlockKind, CowStr, Event, HeadingLevel, Options, Parser, Tag, TagEnd};
23use pulldown_cmark_to_cmark::cmark_with_options;
24use rayon::prelude::*;
25use references::{ObsidianNoteReference, RefParser, RefParserState, RefType};
26use slug::slugify;
27use snafu::{ResultExt, Snafu};
28use unicode_normalization::UnicodeNormalization;
29pub use walker::{vault_contents, WalkOptions};
30
31/// A series of markdown [Event]s that are generated while traversing an Obsidian markdown note.
32pub type MarkdownEvents<'a> = Vec<Event<'a>>;
33
34/// A post-processing function that is to be called after an Obsidian note has been fully parsed and
35/// converted to regular markdown syntax.
36///
37/// Postprocessors are called in the order they've been added through
38/// [`Exporter::add_postprocessor`] just before notes are written out to their final destination.
39/// They may be used to achieve the following:
40///
41/// 1. Modify a note's [Context], for example to change the destination filename or update its
42///    [Frontmatter] (see [`Context::frontmatter`]).
43/// 2. Change a note's contents by altering [`MarkdownEvents`].
44/// 3. Prevent later postprocessors from running ([`PostprocessorResult::StopHere`]) or cause a note
45///    to be skipped entirely ([`PostprocessorResult::StopAndSkipNote`]).
46///
47/// # Postprocessors and embeds
48///
49/// Postprocessors normally run at the end of the export phase, once notes have been fully parsed.
50/// This means that any embedded notes have been resolved and merged into the final note already.
51///
52/// In some cases it may be desirable to change the contents of these embedded notes *before* they
53/// are inserted into the final document. This is possible through the use of
54/// [`Exporter::add_embed_postprocessor`].
55/// These "embed postprocessors" run much the same way as regular postprocessors, but they're run on
56/// the note that is about to be embedded in another note. In addition:
57///
58/// - Changes to context carry over to later embed postprocessors, but are then discarded. This
59///   means that changes to frontmatter do not propagate to the root note for example.
60/// - [`PostprocessorResult::StopAndSkipNote`] prevents the embedded note from being included (it's
61///   replaced with a blank document) but doesn't affect the root note.
62///
63/// It's possible to pass the same functions to [`Exporter::add_postprocessor`] and
64/// [`Exporter::add_embed_postprocessor`]. The [`Context::note_depth`] method may be used to
65/// determine whether a note is a root note or an embedded note in this situation.
66///
67/// # Examples
68///
69/// ## Update frontmatter
70///
71/// This example shows how to make changes a note's frontmatter. In this case, the postprocessor is
72/// defined inline as a closure.
73///
74/// ```
75/// use obsidian_export::serde_yaml::Value;
76/// use obsidian_export::{Exporter, PostprocessorResult};
77/// # use std::path::PathBuf;
78/// # use tempfile::TempDir;
79///
80/// # let tmp_dir = TempDir::new().expect("failed to make tempdir");
81/// # let source = PathBuf::from("tests/testdata/input/postprocessors");
82/// # let destination = tmp_dir.path().to_path_buf();
83/// let mut exporter = Exporter::new(source, destination);
84///
85/// // add_postprocessor registers a new postprocessor. In this example we use a closure.
86/// exporter.add_postprocessor(&|context, _events| {
87///     // This is the key we'll insert into the frontmatter. In this case, the string "foo".
88///     let key = Value::String("foo".to_string());
89///     // This is the value we'll insert into the frontmatter. In this case, the string "bar".
90///     let value = Value::String("baz".to_string());
91///
92///     // Frontmatter can be updated in-place, so we can call insert on it directly.
93///     context.frontmatter.insert(key, value);
94///
95///     // This return value indicates processing should continue.
96///     PostprocessorResult::Continue
97/// });
98///
99/// exporter.run().unwrap();
100/// ```
101///
102/// ## Change note contents
103///
104/// In this example a note's markdown content is changed by iterating over the [`MarkdownEvents`]
105/// and changing the text when we encounter a [text element][Event::Text].
106///
107/// Instead of using a closure like above, this example shows how to use a separate function
108/// definition.
109/// ```
110/// # use obsidian_export::{Context, Exporter, MarkdownEvents, PostprocessorResult};
111/// # use pulldown_cmark::{CowStr, Event};
112/// # use std::path::PathBuf;
113/// # use tempfile::TempDir;
114/// #
115/// /// This postprocessor replaces any instance of "foo" with "bar" in the note body.
116/// fn foo_to_bar(context: &mut Context, events: &mut MarkdownEvents) -> PostprocessorResult {
117///     for event in events.iter_mut() {
118///         if let Event::Text(text) = event {
119///             *event = Event::Text(CowStr::from(text.replace("foo", "bar")))
120///         }
121///     }
122///     PostprocessorResult::Continue
123/// }
124///
125/// # let tmp_dir = TempDir::new().expect("failed to make tempdir");
126/// # let source = PathBuf::from("tests/testdata/input/postprocessors");
127/// # let destination = tmp_dir.path().to_path_buf();
128/// # let mut exporter = Exporter::new(source, destination);
129/// exporter.add_postprocessor(&foo_to_bar);
130/// # exporter.run().unwrap();
131/// ```
132pub type Postprocessor<'f> =
133    dyn Fn(&mut Context, &mut MarkdownEvents<'_>) -> PostprocessorResult + Send + Sync + 'f;
134type Result<T, E = ExportError> = std::result::Result<T, E>;
135
136const PERCENTENCODE_CHARS: &AsciiSet = &CONTROLS.add(b' ').add(b'(').add(b')').add(b'%').add(b'?');
137const NOTE_RECURSION_LIMIT: usize = 10;
138
139#[non_exhaustive]
140#[derive(Debug, Snafu)]
141/// `ExportError` represents all errors which may be returned when using this crate.
142pub enum ExportError {
143    #[snafu(display("failed to read from '{}'", path.display()))]
144    /// This occurs when a read IO operation fails.
145    ReadError {
146        path: PathBuf,
147        source: std::io::Error,
148    },
149
150    #[snafu(display("failed to write to '{}'", path.display()))]
151    /// This occurs when a write IO operation fails.
152    WriteError {
153        path: PathBuf,
154        source: std::io::Error,
155    },
156
157    #[snafu(display("Encountered an error while trying to walk '{}'", path.display()))]
158    /// This occurs when an error is encountered while trying to walk a directory.
159    WalkDirError {
160        path: PathBuf,
161        source: ignore::Error,
162    },
163
164    #[snafu(display("Failed to read the mtime of '{}'", path.display()))]
165    /// This occurs when a file's modified time cannot be read
166    ModTimeReadError {
167        path: PathBuf,
168        source: std::io::Error,
169    },
170
171    #[snafu(display("Failed to set the mtime of '{}'", path.display()))]
172    /// This occurs when a file's modified time cannot be set
173    ModTimeSetError {
174        path: PathBuf,
175        source: std::io::Error,
176    },
177
178    #[snafu(display("No such file or directory: {}", path.display()))]
179    /// This occurs when an operation is requested on a file or directory which does not exist.
180    PathDoesNotExist { path: PathBuf },
181
182    #[snafu(display("Invalid character encoding encountered"))]
183    /// This error may occur when invalid UTF8 is encountered.
184    ///
185    /// Currently, operations which assume UTF8 perform lossy encoding however.
186    CharacterEncodingError { source: str::Utf8Error },
187
188    #[snafu(display("Recursion limit exceeded"))]
189    /// This error occurs when embedded notes are too deeply nested or cause an infinite loop.
190    ///
191    /// When this happens, `file_tree` contains a list of all the files which were processed
192    /// leading up to this error.
193    RecursionLimitExceeded { file_tree: Vec<PathBuf> },
194
195    #[snafu(display("Failed to export '{}'", path.display()))]
196    /// This occurs when a file fails to export successfully.
197    FileExportError {
198        path: PathBuf,
199        #[snafu(source(from(ExportError, Box::new)))]
200        source: Box<ExportError>,
201    },
202
203    #[snafu(display("Failed to decode YAML frontmatter in '{}'", path.display()))]
204    FrontMatterDecodeError {
205        path: PathBuf,
206        #[snafu(source(from(serde_yaml::Error, Box::new)))]
207        source: Box<serde_yaml::Error>,
208    },
209
210    #[snafu(display("Failed to encode YAML frontmatter for '{}'", path.display()))]
211    FrontMatterEncodeError {
212        path: PathBuf,
213        #[snafu(source(from(serde_yaml::Error, Box::new)))]
214        source: Box<serde_yaml::Error>,
215    },
216}
217
218/// Emitted by [Postprocessor]s to signal the next action to take.
219#[derive(Debug, Clone, Copy, PartialEq, Eq)]
220#[non_exhaustive]
221pub enum PostprocessorResult {
222    /// Continue with the next post-processor (if any).
223    Continue,
224    /// Use this note, but don't run any more post-processors after this one.
225    StopHere,
226    /// Skip this note (don't export it) and don't run any more post-processors.
227    StopAndSkipNote,
228}
229
230#[derive(Clone)]
231/// Exporter provides the main interface to this library.
232///
233/// Users are expected to create an Exporter using [`Exporter::new`], optionally followed by
234/// customization using [`Exporter::frontmatter_strategy`] and [`Exporter::walk_options`].
235///
236/// After that, calling [`Exporter::run`] will start the export process.
237pub struct Exporter<'a> {
238    root: PathBuf,
239    destination: PathBuf,
240    start_at: PathBuf,
241    frontmatter_strategy: FrontmatterStrategy,
242    vault_contents: Option<Vec<PathBuf>>,
243    walk_options: WalkOptions<'a>,
244    process_embeds_recursively: bool,
245    preserve_mtime: bool,
246    postprocessors: Vec<&'a Postprocessor<'a>>,
247    embed_postprocessors: Vec<&'a Postprocessor<'a>>,
248}
249
250impl<'a> fmt::Debug for Exporter<'a> {
251    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
252        f.debug_struct("WalkOptions")
253            .field("root", &self.root)
254            .field("destination", &self.destination)
255            .field("frontmatter_strategy", &self.frontmatter_strategy)
256            .field("vault_contents", &self.vault_contents)
257            .field("walk_options", &self.walk_options)
258            .field(
259                "process_embeds_recursively",
260                &self.process_embeds_recursively,
261            )
262            .field("preserve_mtime", &self.preserve_mtime)
263            .field(
264                "postprocessors",
265                &format!("<{} postprocessors active>", self.postprocessors.len()),
266            )
267            .field(
268                "embed_postprocessors",
269                &format!(
270                    "<{} postprocessors active>",
271                    self.embed_postprocessors.len()
272                ),
273            )
274            .finish()
275    }
276}
277
278impl<'a> Exporter<'a> {
279    /// Create a new exporter which reads notes from `root` and exports these to
280    /// `destination`.
281    #[must_use]
282    pub fn new(root: PathBuf, destination: PathBuf) -> Self {
283        Self {
284            start_at: root.clone(),
285            root,
286            destination,
287            frontmatter_strategy: FrontmatterStrategy::Auto,
288            walk_options: WalkOptions::default(),
289            process_embeds_recursively: true,
290            preserve_mtime: false,
291            vault_contents: None,
292            postprocessors: vec![],
293            embed_postprocessors: vec![],
294        }
295    }
296
297    /// Set a custom starting point for the export.
298    ///
299    /// Normally all notes under `root` (except for notes excluded by ignore rules) will be
300    /// exported. When `start_at` is set, only notes under this path will be exported to the
301    /// target destination.
302    pub fn start_at(&mut self, start_at: PathBuf) -> &mut Self {
303        self.start_at = start_at;
304        self
305    }
306
307    /// Set the [`WalkOptions`] to be used for this exporter.
308    pub fn walk_options(&mut self, options: WalkOptions<'a>) -> &mut Self {
309        self.walk_options = options;
310        self
311    }
312
313    /// Set the [`FrontmatterStrategy`] to be used for this exporter.
314    pub fn frontmatter_strategy(&mut self, strategy: FrontmatterStrategy) -> &mut Self {
315        self.frontmatter_strategy = strategy;
316        self
317    }
318
319    /// Set the behavior when recursive embeds are encountered.
320    ///
321    /// When `recursive` is true (the default), emdeds are always processed recursively. This may
322    /// lead to infinite recursion when note A embeds B, but B also embeds A.
323    /// (When this happens, [`ExportError::RecursionLimitExceeded`] will be returned by
324    /// [`Exporter::run`]).
325    ///
326    /// When `recursive` is false, if a note is encountered for a second time while processing the
327    /// original note, instead of embedding it again a link to the note is inserted instead.
328    pub fn process_embeds_recursively(&mut self, recursive: bool) -> &mut Self {
329        self.process_embeds_recursively = recursive;
330        self
331    }
332
333    /// Set whether the modified time of exported files should be preserved.
334    ///
335    /// When `preserve` is true, the modified time of exported files will be set to the modified
336    /// time of the source file.
337    pub fn preserve_mtime(&mut self, preserve: bool) -> &mut Self {
338        self.preserve_mtime = preserve;
339        self
340    }
341
342    /// Append a function to the chain of [postprocessors][Postprocessor] to run on exported
343    /// Obsidian Markdown notes.
344    pub fn add_postprocessor(&mut self, processor: &'a Postprocessor<'_>) -> &mut Self {
345        self.postprocessors.push(processor);
346        self
347    }
348
349    /// Append a function to the chain of [postprocessors][Postprocessor] for embeds.
350    pub fn add_embed_postprocessor(&mut self, processor: &'a Postprocessor<'_>) -> &mut Self {
351        self.embed_postprocessors.push(processor);
352        self
353    }
354
355    /// Export notes using the settings configured on this exporter.
356    pub fn run(&mut self) -> Result<()> {
357        if !self.root.exists() {
358            return Err(ExportError::PathDoesNotExist {
359                path: self.root.clone(),
360            });
361        }
362
363        self.vault_contents = Some(vault_contents(
364            self.root.as_path(),
365            self.walk_options.clone(),
366        )?);
367
368        // When a single file is specified, just need to export that specific file instead of
369        // iterating over all discovered files. This also allows us to accept destination as either
370        // a file or a directory name.
371        if self.root.is_file() || self.start_at.is_file() {
372            let source_filename = self
373                .start_at
374                .file_name()
375                .expect("File without a filename? How is that possible?")
376                .to_string_lossy();
377
378            let destination = match self.destination.is_dir() {
379                true => self.destination.join(String::from(source_filename)),
380                false => {
381                    let parent = self.destination.parent().unwrap_or(&self.destination);
382                    // Avoid recursively creating self.destination through the call to
383                    // export_note when the parent directory doesn't exist.
384                    if !parent.exists() {
385                        return Err(ExportError::PathDoesNotExist {
386                            path: parent.to_path_buf(),
387                        });
388                    }
389                    self.destination.clone()
390                }
391            };
392            return self.export_note(&self.start_at, &destination);
393        }
394
395        if !self.destination.exists() {
396            return Err(ExportError::PathDoesNotExist {
397                path: self.destination.clone(),
398            });
399        }
400        self.vault_contents
401            .as_ref()
402            .unwrap()
403            .clone()
404            .into_par_iter()
405            .filter(|file| file.starts_with(&self.start_at))
406            .try_for_each(|file| {
407                let relative_path = file
408                    .strip_prefix(self.start_at.clone())
409                    .expect("file should always be nested under root")
410                    .to_path_buf();
411                let destination = &self.destination.join(relative_path);
412                self.export_note(&file, destination)
413            })?;
414        Ok(())
415    }
416
417    fn export_note(&self, src: &Path, dest: &Path) -> Result<()> {
418        match is_markdown_file(src) {
419            true => self.parse_and_export_obsidian_note(src, dest),
420            false => copy_file(src, dest),
421        }
422        .context(FileExportSnafu { path: src })?;
423
424        if self.preserve_mtime {
425            copy_mtime(src, dest).context(FileExportSnafu { path: src })?;
426        }
427
428        Ok(())
429    }
430
431    fn parse_and_export_obsidian_note(&self, src: &Path, dest: &Path) -> Result<()> {
432        let mut context = Context::new(src.to_path_buf(), dest.to_path_buf());
433
434        let (frontmatter, mut markdown_events) = self.parse_obsidian_note(src, &context)?;
435        context.frontmatter = frontmatter;
436        for func in &self.postprocessors {
437            match func(&mut context, &mut markdown_events) {
438                PostprocessorResult::StopHere => break,
439                PostprocessorResult::StopAndSkipNote => return Ok(()),
440                PostprocessorResult::Continue => (),
441            }
442        }
443
444        let mut outfile = create_file(&context.destination)?;
445        let write_frontmatter = match self.frontmatter_strategy {
446            FrontmatterStrategy::Always => true,
447            FrontmatterStrategy::Never => false,
448            FrontmatterStrategy::Auto => !context.frontmatter.is_empty(),
449        };
450        if write_frontmatter {
451            let mut frontmatter_str = frontmatter_to_str(&context.frontmatter)
452                .context(FrontMatterEncodeSnafu { path: src })?;
453            frontmatter_str.push('\n');
454            outfile
455                .write_all(frontmatter_str.as_bytes())
456                .context(WriteSnafu {
457                    path: &context.destination,
458                })?;
459        }
460        outfile
461            .write_all(render_mdevents_to_mdtext(&markdown_events).as_bytes())
462            .context(WriteSnafu {
463                path: &context.destination,
464            })?;
465        Ok(())
466    }
467
468    #[allow(clippy::too_many_lines)]
469    #[allow(clippy::panic_in_result_fn)]
470    #[allow(clippy::shadow_unrelated)]
471    fn parse_obsidian_note<'b>(
472        &self,
473        path: &Path,
474        context: &Context,
475    ) -> Result<(Frontmatter, MarkdownEvents<'b>)> {
476        if context.note_depth() > NOTE_RECURSION_LIMIT {
477            return Err(ExportError::RecursionLimitExceeded {
478                file_tree: context.file_tree(),
479            });
480        }
481        let content = fs::read_to_string(path).context(ReadSnafu { path })?;
482        let mut frontmatter = String::new();
483
484        let parser_options = Options::ENABLE_TABLES
485            | Options::ENABLE_FOOTNOTES
486            | Options::ENABLE_STRIKETHROUGH
487            | Options::ENABLE_TASKLISTS
488            | Options::ENABLE_MATH
489            | Options::ENABLE_YAML_STYLE_METADATA_BLOCKS
490            | Options::ENABLE_GFM;
491
492        let mut ref_parser = RefParser::new();
493        let mut events = vec![];
494        // Most of the time, a reference triggers 5 events: [ or ![, [, <text>, ], ]
495        let mut buffer = Vec::with_capacity(5);
496
497        let mut parser = Parser::new_ext(&content, parser_options);
498        'outer: while let Some(event) = parser.next() {
499            // When encountering a metadata block (frontmatter), collect all events until getting
500            // to the end of the block, at which point the nested loop will break out to the outer
501            // loop again.
502            if matches!(event, Event::Start(Tag::MetadataBlock(_kind))) {
503                for event in parser.by_ref() {
504                    match event {
505                        Event::Text(cowstr) => frontmatter.push_str(&cowstr),
506                        Event::End(TagEnd::MetadataBlock(_kind)) => {
507                            continue 'outer;
508                        },
509                        _ => panic!(
510                            "Encountered an unexpected event while processing frontmatter in {}. Please report this as a bug with a copy of the note contents and this text: \n\nEvent: {:?}\n",
511                            path.display(),
512                            event
513                        ),
514                    }
515                }
516            }
517            if ref_parser.state == RefParserState::Resetting {
518                events.append(&mut buffer);
519                buffer.clear();
520                ref_parser.reset();
521            }
522            buffer.push(event.clone());
523            match ref_parser.state {
524                RefParserState::NoState => {
525                    match event {
526                        Event::Text(CowStr::Borrowed("![")) => {
527                            ref_parser.ref_type = Some(RefType::Embed);
528                            ref_parser.transition(RefParserState::ExpectSecondOpenBracket);
529                        }
530                        Event::Text(CowStr::Borrowed("[")) => {
531                            ref_parser.ref_type = Some(RefType::Link);
532                            ref_parser.transition(RefParserState::ExpectSecondOpenBracket);
533                        }
534                        _ => {
535                            events.push(event);
536                            buffer.clear();
537                        },
538                    };
539                }
540                RefParserState::ExpectSecondOpenBracket => match event {
541                    Event::Text(CowStr::Borrowed("[")) => {
542                        ref_parser.transition(RefParserState::ExpectRefText);
543                    }
544                    _ => {
545                        ref_parser.transition(RefParserState::Resetting);
546                    }
547                },
548                RefParserState::ExpectRefText => match event {
549                    Event::Text(CowStr::Borrowed("]")) => {
550                        ref_parser.transition(RefParserState::Resetting);
551                    }
552                    Event::Text(text) => {
553                        ref_parser.ref_text.push_str(&text);
554                        ref_parser.transition(RefParserState::ExpectRefTextOrCloseBracket);
555                    }
556                    _ => {
557                        ref_parser.transition(RefParserState::Resetting);
558                    }
559                },
560                RefParserState::ExpectRefTextOrCloseBracket => match event {
561                    Event::Text(CowStr::Borrowed("]")) => {
562                        ref_parser.transition(RefParserState::ExpectFinalCloseBracket);
563                    }
564                    Event::Text(text) => {
565                        ref_parser.ref_text.push_str(&text);
566                    }
567                    _ => {
568                        ref_parser.transition(RefParserState::Resetting);
569                    }
570                },
571                RefParserState::ExpectFinalCloseBracket => match event {
572                    Event::Text(CowStr::Borrowed("]")) => match ref_parser.ref_type {
573                        Some(RefType::Link) => {
574                            let mut elements = self.make_link_to_file(
575                                ObsidianNoteReference::from_str(
576                                    ref_parser.ref_text.clone().as_ref()
577                                ),
578                                context,
579                            );
580                            events.append(&mut elements);
581                            buffer.clear();
582                            ref_parser.transition(RefParserState::Resetting);
583                        }
584                        Some(RefType::Embed) => {
585                            let mut elements = self.embed_file(
586                                ref_parser.ref_text.clone().as_ref(),
587                                context
588                            )?;
589                            events.append(&mut elements);
590                            buffer.clear();
591                            ref_parser.transition(RefParserState::Resetting);
592                        }
593                        None => panic!("In state ExpectFinalCloseBracket but ref_type is None"),
594                    },
595                    _ => {
596                        ref_parser.transition(RefParserState::Resetting);
597                    }
598                },
599                RefParserState::Resetting => panic!("Reached Resetting state, but it should have been handled prior to this match block"),
600            }
601        }
602        if !buffer.is_empty() {
603            events.append(&mut buffer);
604        }
605
606        Ok((
607            frontmatter_from_str(&frontmatter).context(FrontMatterDecodeSnafu { path })?,
608            events.into_iter().map(event_to_owned).collect(),
609        ))
610    }
611
612    // Generate markdown elements for a file that is embedded within another note.
613    //
614    // - If the file being embedded is a note, it's content is included at the point of embed.
615    // - If the file is an image, an image tag is generated.
616    // - For other types of file, a regular link is created instead.
617    fn embed_file<'b>(
618        &self,
619        link_text: &'a str,
620        context: &'a Context,
621    ) -> Result<MarkdownEvents<'b>> {
622        let note_ref = ObsidianNoteReference::from_str(link_text);
623
624        let path = match note_ref.file {
625            Some(file) => lookup_filename_in_vault(file, self.vault_contents.as_ref().unwrap()),
626
627            // If we have None file it is either to a section or id within the same file and thus
628            // the current embed logic will fail, recurssing until it reaches it's limit.
629            // For now we just bail early.
630            None => return Ok(self.make_link_to_file(note_ref, context)),
631        };
632
633        if path.is_none() {
634            // TODO: Extract into configurable function.
635            eprintln!(
636                "Warning: Unable to find embedded note\n\tReference: '{}'\n\tSource: '{}'\n",
637                note_ref
638                    .file
639                    .unwrap_or_else(|| context.current_file().to_str().unwrap()),
640                context.current_file().display(),
641            );
642            return Ok(vec![]);
643        }
644
645        let path = path.unwrap();
646        let mut child_context = Context::from_parent(context, path);
647        let no_ext = OsString::new();
648
649        if !self.process_embeds_recursively && context.file_tree().contains(path) {
650            return Ok([
651                vec![Event::Text(CowStr::Borrowed("→ "))],
652                self.make_link_to_file(note_ref, &child_context),
653            ]
654            .concat());
655        }
656
657        let events = match path.extension().unwrap_or(&no_ext).to_str() {
658            Some("md") => {
659                let (frontmatter, mut events) = self.parse_obsidian_note(path, &child_context)?;
660                child_context.frontmatter = frontmatter;
661                if let Some(section) = note_ref.section {
662                    events = reduce_to_section(events, section);
663                }
664                for func in &self.embed_postprocessors {
665                    // Postprocessors running on embeds shouldn't be able to change frontmatter (or
666                    // any other metadata), so we give them a clone of the context.
667                    match func(&mut child_context, &mut events) {
668                        PostprocessorResult::StopHere => break,
669                        PostprocessorResult::StopAndSkipNote => {
670                            events = vec![];
671                        }
672                        PostprocessorResult::Continue => (),
673                    }
674                }
675                events
676            }
677            Some("png" | "jpg" | "jpeg" | "gif" | "webp" | "svg") => {
678                self.make_link_to_file(note_ref, &child_context)
679                    .into_iter()
680                    .map(|event| match event {
681                        // make_link_to_file returns a link to a file. With this we turn the link
682                        // into an image reference instead. Slightly hacky, but avoids needing
683                        // to keep another utility function around for this, or introducing an
684                        // extra parameter on make_link_to_file.
685                        Event::Start(Tag::Link {
686                            link_type,
687                            dest_url,
688                            title,
689                            id,
690                        }) => Event::Start(Tag::Image {
691                            link_type,
692                            dest_url: CowStr::from(dest_url.into_string()),
693                            title: CowStr::from(title.into_string()),
694                            id: CowStr::from(id.into_string()),
695                        }),
696                        Event::End(TagEnd::Link) => Event::End(TagEnd::Image),
697                        _ => event,
698                    })
699                    .collect()
700            }
701            _ => self.make_link_to_file(note_ref, &child_context),
702        };
703        Ok(events)
704    }
705
706    fn make_link_to_file<'c>(
707        &self,
708        reference: ObsidianNoteReference<'_>,
709        context: &Context,
710    ) -> MarkdownEvents<'c> {
711        let target_file = reference.file.map_or_else(
712            || Some(context.current_file()),
713            |file| lookup_filename_in_vault(file, self.vault_contents.as_ref().unwrap()),
714        );
715
716        if target_file.is_none() {
717            // TODO: Extract into configurable function.
718            eprintln!(
719                "Warning: Unable to find referenced note\n\tReference: '{}'\n\tSource: '{}'\n",
720                reference
721                    .file
722                    .unwrap_or_else(|| context.current_file().to_str().unwrap()),
723                context.current_file().display(),
724            );
725            return vec![
726                Event::Start(Tag::Emphasis),
727                Event::Text(CowStr::from(reference.display())),
728                Event::End(TagEnd::Emphasis),
729            ];
730        }
731        let target_file = target_file.unwrap();
732        // We use root_file() rather than current_file() here to make sure links are always
733        // relative to the outer-most note, which is the note which this content is inserted into
734        // in case of embedded notes.
735        let rel_link = diff_paths(
736            target_file,
737            context
738                .root_file()
739                .parent()
740                .expect("obsidian content files should always have a parent"),
741        )
742        .expect("should be able to build relative path when target file is found in vault");
743
744        let rel_link = rel_link.to_string_lossy();
745        let mut link = utf8_percent_encode(&rel_link, PERCENTENCODE_CHARS).to_string();
746
747        if let Some(section) = reference.section {
748            link.push('#');
749            link.push_str(&slugify(section));
750        }
751
752        let link_tag = Tag::Link {
753            link_type: pulldown_cmark::LinkType::Inline,
754            dest_url: CowStr::from(link),
755            title: CowStr::from(""),
756            id: CowStr::from(""),
757        };
758
759        vec![
760            Event::Start(link_tag),
761            Event::Text(CowStr::from(reference.display())),
762            Event::End(TagEnd::Link),
763        ]
764    }
765}
766
767/// Get the full path for the given filename when it's contained in `vault_contents`, taking into
768/// account:
769///
770/// 1. Standard Obsidian note references not including a .md extension.
771/// 2. Case-insensitive matching
772/// 3. Unicode normalization rules using normalization form C (<https://www.w3.org/TR/charmod-norm/#unicodeNormalization>)
773fn lookup_filename_in_vault<'a>(
774    filename: &str,
775    vault_contents: &'a [PathBuf],
776) -> Option<&'a PathBuf> {
777    let filename = PathBuf::from(filename);
778    let filename_normalized = filename.to_string_lossy().nfc().collect::<String>();
779
780    vault_contents.iter().find(|path| {
781        let path_normalized_str = path.to_string_lossy().nfc().collect::<String>();
782        let path_normalized = PathBuf::from(&path_normalized_str);
783        let path_normalized_lowered = PathBuf::from(&path_normalized_str.to_lowercase());
784
785        // It would be convenient if we could just do `filename.set_extension("md")` at the start
786        // of this funtion so we don't need multiple separate + ".md" match cases here, however
787        // that would break with a reference of `[[Note.1]]` linking to `[[Note.1.md]]`.
788
789        path_normalized.ends_with(&filename_normalized)
790            || path_normalized.ends_with(filename_normalized.clone() + ".md")
791            || path_normalized_lowered.ends_with(filename_normalized.to_lowercase())
792            || path_normalized_lowered.ends_with(filename_normalized.to_lowercase() + ".md")
793    })
794}
795
796fn render_mdevents_to_mdtext(markdown: &MarkdownEvents<'_>) -> String {
797    let mut buffer = String::new();
798    cmark_with_options(
799        markdown.iter(),
800        &mut buffer,
801        pulldown_cmark_to_cmark::Options::default(),
802    )
803    .expect("formatting to string not expected to fail");
804    buffer.push('\n');
805    buffer
806}
807
808fn create_file(dest: &Path) -> Result<File> {
809    let file = File::create(dest)
810        .or_else(|err| {
811            if err.kind() == ErrorKind::NotFound {
812                let parent = dest.parent().expect("file should have a parent directory");
813                fs::create_dir_all(parent)?;
814            }
815            File::create(dest)
816        })
817        .context(WriteSnafu { path: dest })?;
818    Ok(file)
819}
820
821fn copy_mtime(src: &Path, dest: &Path) -> Result<()> {
822    let metadata = fs::metadata(src).context(ModTimeReadSnafu { path: src })?;
823    let modified_time = metadata
824        .modified()
825        .context(ModTimeReadSnafu { path: src })?;
826
827    set_file_mtime(dest, modified_time.into()).context(ModTimeSetSnafu { path: dest })?;
828    Ok(())
829}
830
831fn copy_file(src: &Path, dest: &Path) -> Result<()> {
832    fs::copy(src, dest)
833        .or_else(|err| {
834            if err.kind() == ErrorKind::NotFound {
835                let parent = dest.parent().expect("file should have a parent directory");
836                fs::create_dir_all(parent)?;
837            }
838            fs::copy(src, dest)
839        })
840        .context(WriteSnafu { path: dest })?;
841    Ok(())
842}
843
844fn is_markdown_file(file: &Path) -> bool {
845    let no_ext = OsString::new();
846    let ext = file.extension().unwrap_or(&no_ext).to_string_lossy();
847    ext == "md"
848}
849
850/// Reduce a given `MarkdownEvents` to just those elements which are children of the given section
851/// (heading name).
852fn reduce_to_section<'a>(events: MarkdownEvents<'a>, section: &str) -> MarkdownEvents<'a> {
853    let mut filtered_events = Vec::with_capacity(events.len());
854    let mut target_section_encountered = false;
855    let mut currently_in_target_section = false;
856    let mut section_level = HeadingLevel::H1;
857    let mut last_level = HeadingLevel::H1;
858    let mut last_tag_was_heading = false;
859
860    for event in events {
861        filtered_events.push(event.clone());
862        match event {
863            Event::Start(Tag::Heading { level, .. }) => {
864                last_tag_was_heading = true;
865                last_level = level;
866                if currently_in_target_section && level <= section_level {
867                    currently_in_target_section = false;
868                    filtered_events.pop();
869                }
870            }
871            Event::Text(cowstr) => {
872                if !last_tag_was_heading {
873                    last_tag_was_heading = false;
874                    continue;
875                }
876                last_tag_was_heading = false;
877
878                if cowstr.to_string().to_lowercase() == section.to_lowercase() {
879                    target_section_encountered = true;
880                    currently_in_target_section = true;
881                    section_level = last_level;
882
883                    let current_event = filtered_events.pop().unwrap();
884                    let heading_start_event = filtered_events.pop().unwrap();
885                    filtered_events.clear();
886                    filtered_events.push(heading_start_event);
887                    filtered_events.push(current_event);
888                }
889            }
890            _ => {}
891        }
892        if target_section_encountered && !currently_in_target_section {
893            return filtered_events;
894        }
895    }
896    filtered_events
897}
898
899fn event_to_owned<'a>(event: Event<'_>) -> Event<'a> {
900    match event {
901        Event::Start(tag) => Event::Start(tag_to_owned(tag)),
902        Event::End(tag) => Event::End(tag),
903        Event::Text(cowstr) => Event::Text(CowStr::from(cowstr.into_string())),
904        Event::Code(cowstr) => Event::Code(CowStr::from(cowstr.into_string())),
905        Event::Html(cowstr) => Event::Html(CowStr::from(cowstr.into_string())),
906        Event::InlineHtml(cowstr) => Event::InlineHtml(CowStr::from(cowstr.into_string())),
907        Event::FootnoteReference(cowstr) => {
908            Event::FootnoteReference(CowStr::from(cowstr.into_string()))
909        }
910        Event::SoftBreak => Event::SoftBreak,
911        Event::HardBreak => Event::HardBreak,
912        Event::Rule => Event::Rule,
913        Event::TaskListMarker(checked) => Event::TaskListMarker(checked),
914        Event::InlineMath(cowstr) => Event::InlineMath(CowStr::from(cowstr.into_string())),
915        Event::DisplayMath(cowstr) => Event::DisplayMath(CowStr::from(cowstr.into_string())),
916    }
917}
918
919fn tag_to_owned<'a>(tag: Tag<'_>) -> Tag<'a> {
920    match tag {
921        Tag::Paragraph => Tag::Paragraph,
922        Tag::Heading {
923            level: heading_level,
924            id,
925            classes,
926            attrs,
927        } => Tag::Heading {
928            level: heading_level,
929            id: id.map(|cowstr| CowStr::from(cowstr.into_string())),
930            classes: classes
931                .into_iter()
932                .map(|cowstr| CowStr::from(cowstr.into_string()))
933                .collect(),
934            attrs: attrs
935                .into_iter()
936                .map(|(attr, value)| {
937                    (
938                        CowStr::from(attr.into_string()),
939                        value.map(|cowstr| CowStr::from(cowstr.into_string())),
940                    )
941                })
942                .collect(),
943        },
944        Tag::BlockQuote(blockquote_kind) => Tag::BlockQuote(blockquote_kind),
945        Tag::CodeBlock(codeblock_kind) => Tag::CodeBlock(codeblock_kind_to_owned(codeblock_kind)),
946        Tag::List(optional) => Tag::List(optional),
947        Tag::Item => Tag::Item,
948        Tag::FootnoteDefinition(cowstr) => {
949            Tag::FootnoteDefinition(CowStr::from(cowstr.into_string()))
950        }
951        Tag::Table(alignment_vector) => Tag::Table(alignment_vector),
952        Tag::TableHead => Tag::TableHead,
953        Tag::TableRow => Tag::TableRow,
954        Tag::TableCell => Tag::TableCell,
955        Tag::Emphasis => Tag::Emphasis,
956        Tag::Strong => Tag::Strong,
957        Tag::Strikethrough => Tag::Strikethrough,
958        Tag::Link {
959            link_type,
960            dest_url,
961            title,
962            id,
963        } => Tag::Link {
964            link_type,
965            dest_url: CowStr::from(dest_url.into_string()),
966            title: CowStr::from(title.into_string()),
967            id: CowStr::from(id.into_string()),
968        },
969        Tag::Image {
970            link_type,
971            dest_url,
972            title,
973            id,
974        } => Tag::Image {
975            link_type,
976            dest_url: CowStr::from(dest_url.into_string()),
977            title: CowStr::from(title.into_string()),
978            id: CowStr::from(id.into_string()),
979        },
980        Tag::HtmlBlock => Tag::HtmlBlock,
981        Tag::MetadataBlock(metadata_block_kind) => Tag::MetadataBlock(metadata_block_kind),
982        Tag::DefinitionList => Tag::DefinitionList,
983        Tag::DefinitionListTitle => Tag::DefinitionListTitle,
984        Tag::DefinitionListDefinition => Tag::DefinitionListDefinition,
985    }
986}
987
988fn codeblock_kind_to_owned<'a>(codeblock_kind: CodeBlockKind<'_>) -> CodeBlockKind<'a> {
989    match codeblock_kind {
990        CodeBlockKind::Indented => CodeBlockKind::Indented,
991        CodeBlockKind::Fenced(cowstr) => CodeBlockKind::Fenced(CowStr::from(cowstr.into_string())),
992    }
993}
994
995#[cfg(test)]
996mod tests {
997    use std::sync::LazyLock;
998
999    use pretty_assertions::assert_eq;
1000    use rstest::rstest;
1001
1002    use super::*;
1003
1004    static VAULT: LazyLock<Vec<PathBuf>> = LazyLock::new(|| {
1005        vec![
1006            PathBuf::from("NoteA.md"),
1007            PathBuf::from("Document.pdf"),
1008            PathBuf::from("Note.1.md"),
1009            PathBuf::from("nested/NoteA.md"),
1010            PathBuf::from("Note\u{E4}.md"), // Noteä.md, see also encodings() below
1011        ]
1012    });
1013
1014    #[test]
1015    #[allow(clippy::unicode_not_nfc)]
1016    fn encodings() {
1017        // Standard "Latin Small Letter A with Diaeresis" (U+00E4)
1018        // Encoded in UTF-8 as two bytes: 0xC3 0xA4
1019        assert_eq!(String::from_utf8(vec![0xC3, 0xA4]).unwrap(), "ä");
1020        assert_eq!("\u{E4}", "ä");
1021
1022        // Basic (ASCII) lowercase a followed by Unicode Character “◌̈” (U+0308)
1023        // Renders the same visual appearance but is encoded in UTF-8 as three bytes:
1024        // 0x61 0xCC 0x88
1025        assert_eq!(String::from_utf8(vec![0x61, 0xCC, 0x88]).unwrap(), "ä");
1026        assert_eq!("a\u{308}", "ä");
1027        assert_eq!("\u{61}\u{308}", "ä");
1028
1029        // For more examples and a better explanation of this concept, see
1030        // https://www.w3.org/TR/charmod-norm/#aringExample
1031    }
1032
1033    #[rstest]
1034    // Exact match
1035    #[case("NoteA.md", "NoteA.md")]
1036    #[case("NoteA", "NoteA.md")]
1037    // Same note in subdir, exact match should find it
1038    #[case("nested/NoteA.md", "nested/NoteA.md")]
1039    #[case("nested/NoteA", "nested/NoteA.md")]
1040    // Different extensions
1041    #[case("Document.pdf", "Document.pdf")]
1042    #[case("Note.1", "Note.1.md")]
1043    #[case("Note.1.md", "Note.1.md")]
1044    // Case-insensitive matches
1045    #[case("notea.md", "NoteA.md")]
1046    #[case("notea", "NoteA.md")]
1047    #[case("NESTED/notea.md", "nested/NoteA.md")]
1048    #[case("NESTED/notea", "nested/NoteA.md")]
1049    // "Latin Small Letter A with Diaeresis" (U+00E4)
1050    #[case("Note\u{E4}.md", "Note\u{E4}.md")]
1051    #[case("Note\u{E4}", "Note\u{E4}.md")]
1052    // Basic (ASCII) lowercase a followed by Unicode Character “◌̈” (U+0308)
1053    // The UTF-8 encoding is different but it renders the same visual appearance as the case above,
1054    // so we expect it to find the same file.
1055    #[case("Note\u{61}\u{308}.md", "Note\u{E4}.md")]
1056    #[case("Note\u{61}\u{308}", "Note\u{E4}.md")]
1057    // We should expect this to work with lowercasing as well, so NoteÄ should find Noteä
1058    // NoteÄ where Ä = Single Ä (U+00C4)
1059    #[case("Note\u{C4}.md", "Note\u{E4}.md")]
1060    #[case("Note\u{C4}", "Note\u{E4}.md")]
1061    // NoteÄ where Ä = decomposed to A (U+0041) + ◌̈ (U+0308)
1062    #[case("Note\u{41}\u{308}.md", "Note\u{E4}.md")]
1063    #[case("Note\u{41}\u{308}", "Note\u{E4}.md")]
1064    fn test_lookup_filename_in_vault(#[case] input: &str, #[case] expected: &str) {
1065        let result = lookup_filename_in_vault(input, &VAULT);
1066        println!("Test input: {input:?}");
1067        println!("Expecting: {expected:?}");
1068        println!("Got: {:?}", result.unwrap_or(&PathBuf::from("")));
1069        assert_eq!(result, Some(&PathBuf::from(expected)));
1070    }
1071}