obsidian_export/
lib.rs

1pub use {pulldown_cmark, serde_yaml};
2
3mod context;
4mod frontmatter;
5pub mod postprocessors;
6mod references;
7mod walker;
8
9use std::ffi::OsString;
10use std::fs::{self, File};
11use std::io::prelude::*;
12use std::io::ErrorKind;
13use std::path::{Path, PathBuf};
14use std::{fmt, str};
15
16pub use context::Context;
17use filetime::set_file_mtime;
18use frontmatter::{frontmatter_from_str, frontmatter_to_str};
19pub use frontmatter::{Frontmatter, FrontmatterStrategy};
20use pathdiff::diff_paths;
21use percent_encoding::{utf8_percent_encode, AsciiSet, CONTROLS};
22use pulldown_cmark::{CodeBlockKind, CowStr, Event, HeadingLevel, Options, Parser, Tag, TagEnd};
23use pulldown_cmark_to_cmark::cmark_with_options;
24use rayon::prelude::*;
25use references::{ObsidianNoteReference, RefParser, RefParserState, RefType};
26use slug::slugify;
27use snafu::{ResultExt, Snafu};
28use unicode_normalization::UnicodeNormalization;
29pub use walker::{vault_contents, WalkOptions};
30
31/// A series of markdown [Event]s that are generated while traversing an Obsidian markdown note.
32pub type MarkdownEvents<'a> = Vec<Event<'a>>;
33
34/// A post-processing function that is to be called after an Obsidian note has been fully parsed and
35/// converted to regular markdown syntax.
36///
37/// Postprocessors are called in the order they've been added through
38/// [`Exporter::add_postprocessor`] just before notes are written out to their final destination.
39/// They may be used to achieve the following:
40///
41/// 1. Modify a note's [Context], for example to change the destination filename or update its
42///    [Frontmatter] (see [`Context::frontmatter`]).
43/// 2. Change a note's contents by altering [`MarkdownEvents`].
44/// 3. Prevent later postprocessors from running ([`PostprocessorResult::StopHere`]) or cause a note
45///    to be skipped entirely ([`PostprocessorResult::StopAndSkipNote`]).
46///
47/// # Postprocessors and embeds
48///
49/// Postprocessors normally run at the end of the export phase, once notes have been fully parsed.
50/// This means that any embedded notes have been resolved and merged into the final note already.
51///
52/// In some cases it may be desirable to change the contents of these embedded notes *before* they
53/// are inserted into the final document. This is possible through the use of
54/// [`Exporter::add_embed_postprocessor`].
55/// These "embed postprocessors" run much the same way as regular postprocessors, but they're run on
56/// the note that is about to be embedded in another note. In addition:
57///
58/// - Changes to context carry over to later embed postprocessors, but are then discarded. This
59///   means that changes to frontmatter do not propagate to the root note for example.
60/// - [`PostprocessorResult::StopAndSkipNote`] prevents the embedded note from being included (it's
61///   replaced with a blank document) but doesn't affect the root note.
62///
63/// It's possible to pass the same functions to [`Exporter::add_postprocessor`] and
64/// [`Exporter::add_embed_postprocessor`]. The [`Context::note_depth`] method may be used to
65/// determine whether a note is a root note or an embedded note in this situation.
66///
67/// # Examples
68///
69/// ## Update frontmatter
70///
71/// This example shows how to make changes a note's frontmatter. In this case, the postprocessor is
72/// defined inline as a closure.
73///
74/// ```
75/// use obsidian_export::serde_yaml::Value;
76/// use obsidian_export::{Exporter, PostprocessorResult};
77/// # use std::path::PathBuf;
78/// # use tempfile::TempDir;
79///
80/// # let tmp_dir = TempDir::new().expect("failed to make tempdir");
81/// # let source = PathBuf::from("tests/testdata/input/postprocessors");
82/// # let destination = tmp_dir.path().to_path_buf();
83/// let mut exporter = Exporter::new(source, destination);
84///
85/// // add_postprocessor registers a new postprocessor. In this example we use a closure.
86/// exporter.add_postprocessor(&|context, _events| {
87///     // This is the key we'll insert into the frontmatter. In this case, the string "foo".
88///     let key = Value::String("foo".to_string());
89///     // This is the value we'll insert into the frontmatter. In this case, the string "bar".
90///     let value = Value::String("baz".to_string());
91///
92///     // Frontmatter can be updated in-place, so we can call insert on it directly.
93///     context.frontmatter.insert(key, value);
94///
95///     // This return value indicates processing should continue.
96///     PostprocessorResult::Continue
97/// });
98///
99/// exporter.run().unwrap();
100/// ```
101///
102/// ## Change note contents
103///
104/// In this example a note's markdown content is changed by iterating over the [`MarkdownEvents`]
105/// and changing the text when we encounter a [text element][Event::Text].
106///
107/// Instead of using a closure like above, this example shows how to use a separate function
108/// definition.
109/// ```
110/// # use obsidian_export::{Context, Exporter, MarkdownEvents, PostprocessorResult};
111/// # use pulldown_cmark::{CowStr, Event};
112/// # use std::path::PathBuf;
113/// # use tempfile::TempDir;
114/// #
115/// /// This postprocessor replaces any instance of "foo" with "bar" in the note body.
116/// fn foo_to_bar(context: &mut Context, events: &mut MarkdownEvents) -> PostprocessorResult {
117///     for event in events.iter_mut() {
118///         if let Event::Text(text) = event {
119///             *event = Event::Text(CowStr::from(text.replace("foo", "bar")))
120///         }
121///     }
122///     PostprocessorResult::Continue
123/// }
124///
125/// # let tmp_dir = TempDir::new().expect("failed to make tempdir");
126/// # let source = PathBuf::from("tests/testdata/input/postprocessors");
127/// # let destination = tmp_dir.path().to_path_buf();
128/// # let mut exporter = Exporter::new(source, destination);
129/// exporter.add_postprocessor(&foo_to_bar);
130/// # exporter.run().unwrap();
131/// ```
132pub type Postprocessor<'f> =
133    dyn Fn(&mut Context, &mut MarkdownEvents<'_>) -> PostprocessorResult + Send + Sync + 'f;
134type Result<T, E = ExportError> = std::result::Result<T, E>;
135
136const PERCENTENCODE_CHARS: &AsciiSet = &CONTROLS.add(b' ').add(b'(').add(b')').add(b'%').add(b'?');
137const NOTE_RECURSION_LIMIT: usize = 10;
138
139#[non_exhaustive]
140#[derive(Debug, Snafu)]
141/// `ExportError` represents all errors which may be returned when using this crate.
142pub enum ExportError {
143    #[snafu(display("failed to read from '{}'", path.display()))]
144    /// This occurs when a read IO operation fails.
145    ReadError {
146        path: PathBuf,
147        source: std::io::Error,
148    },
149
150    #[snafu(display("failed to write to '{}'", path.display()))]
151    /// This occurs when a write IO operation fails.
152    WriteError {
153        path: PathBuf,
154        source: std::io::Error,
155    },
156
157    #[snafu(display("Encountered an error while trying to walk '{}'", path.display()))]
158    /// This occurs when an error is encountered while trying to walk a directory.
159    WalkDirError {
160        path: PathBuf,
161        source: ignore::Error,
162    },
163
164    #[snafu(display("Failed to read the mtime of '{}'", path.display()))]
165    /// This occurs when a file's modified time cannot be read
166    ModTimeReadError {
167        path: PathBuf,
168        source: std::io::Error,
169    },
170
171    #[snafu(display("Failed to set the mtime of '{}'", path.display()))]
172    /// This occurs when a file's modified time cannot be set
173    ModTimeSetError {
174        path: PathBuf,
175        source: std::io::Error,
176    },
177
178    #[snafu(display("No such file or directory: {}", path.display()))]
179    /// This occurs when an operation is requested on a file or directory which does not exist.
180    PathDoesNotExist { path: PathBuf },
181
182    #[snafu(display("Invalid character encoding encountered"))]
183    /// This error may occur when invalid UTF8 is encountered.
184    ///
185    /// Currently, operations which assume UTF8 perform lossy encoding however.
186    CharacterEncodingError { source: str::Utf8Error },
187
188    #[snafu(display("Recursion limit exceeded"))]
189    /// This error occurs when embedded notes are too deeply nested or cause an infinite loop.
190    ///
191    /// When this happens, `file_tree` contains a list of all the files which were processed
192    /// leading up to this error.
193    RecursionLimitExceeded { file_tree: Vec<PathBuf> },
194
195    #[snafu(display("Failed to export '{}'", path.display()))]
196    /// This occurs when a file fails to export successfully.
197    FileExportError {
198        path: PathBuf,
199        #[snafu(source(from(ExportError, Box::new)))]
200        source: Box<ExportError>,
201    },
202
203    #[snafu(display("Failed to decode YAML frontmatter in '{}'", path.display()))]
204    FrontMatterDecodeError {
205        path: PathBuf,
206        #[snafu(source(from(serde_yaml::Error, Box::new)))]
207        source: Box<serde_yaml::Error>,
208    },
209
210    #[snafu(display("Failed to encode YAML frontmatter for '{}'", path.display()))]
211    FrontMatterEncodeError {
212        path: PathBuf,
213        #[snafu(source(from(serde_yaml::Error, Box::new)))]
214        source: Box<serde_yaml::Error>,
215    },
216}
217
218/// Emitted by [Postprocessor]s to signal the next action to take.
219#[derive(Debug, Clone, Copy, PartialEq, Eq)]
220#[non_exhaustive]
221pub enum PostprocessorResult {
222    /// Continue with the next post-processor (if any).
223    Continue,
224    /// Use this note, but don't run any more post-processors after this one.
225    StopHere,
226    /// Skip this note (don't export it) and don't run any more post-processors.
227    StopAndSkipNote,
228}
229
230#[derive(Clone)]
231/// Exporter provides the main interface to this library.
232///
233/// Users are expected to create an Exporter using [`Exporter::new`], optionally followed by
234/// customization using [`Exporter::frontmatter_strategy`] and [`Exporter::walk_options`].
235///
236/// After that, calling [`Exporter::run`] will start the export process.
237pub struct Exporter<'a> {
238    root: PathBuf,
239    destination: PathBuf,
240    start_at: PathBuf,
241    frontmatter_strategy: FrontmatterStrategy,
242    vault_contents: Option<Vec<PathBuf>>,
243    walk_options: WalkOptions<'a>,
244    process_embeds_recursively: bool,
245    preserve_mtime: bool,
246    postprocessors: Vec<&'a Postprocessor<'a>>,
247    embed_postprocessors: Vec<&'a Postprocessor<'a>>,
248}
249
250impl<'a> fmt::Debug for Exporter<'a> {
251    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
252        f.debug_struct("WalkOptions")
253            .field("root", &self.root)
254            .field("destination", &self.destination)
255            .field("frontmatter_strategy", &self.frontmatter_strategy)
256            .field("vault_contents", &self.vault_contents)
257            .field("walk_options", &self.walk_options)
258            .field(
259                "process_embeds_recursively",
260                &self.process_embeds_recursively,
261            )
262            .field("preserve_mtime", &self.preserve_mtime)
263            .field(
264                "postprocessors",
265                &format!("<{} postprocessors active>", self.postprocessors.len()),
266            )
267            .field(
268                "embed_postprocessors",
269                &format!(
270                    "<{} postprocessors active>",
271                    self.embed_postprocessors.len()
272                ),
273            )
274            .finish()
275    }
276}
277
278impl<'a> Exporter<'a> {
279    /// Create a new exporter which reads notes from `root` and exports these to
280    /// `destination`.
281    #[must_use]
282    pub fn new(root: PathBuf, destination: PathBuf) -> Self {
283        Self {
284            start_at: root.clone(),
285            root,
286            destination,
287            frontmatter_strategy: FrontmatterStrategy::Auto,
288            walk_options: WalkOptions::default(),
289            process_embeds_recursively: true,
290            preserve_mtime: false,
291            vault_contents: None,
292            postprocessors: vec![],
293            embed_postprocessors: vec![],
294        }
295    }
296
297    /// Set a custom starting point for the export.
298    ///
299    /// Normally all notes under `root` (except for notes excluded by ignore rules) will be
300    /// exported. When `start_at` is set, only notes under this path will be exported to the
301    /// target destination.
302    pub fn start_at(&mut self, start_at: PathBuf) -> &mut Self {
303        self.start_at = start_at;
304        self
305    }
306
307    /// Set the [`WalkOptions`] to be used for this exporter.
308    pub fn walk_options(&mut self, options: WalkOptions<'a>) -> &mut Self {
309        self.walk_options = options;
310        self
311    }
312
313    /// Set the [`FrontmatterStrategy`] to be used for this exporter.
314    pub fn frontmatter_strategy(&mut self, strategy: FrontmatterStrategy) -> &mut Self {
315        self.frontmatter_strategy = strategy;
316        self
317    }
318
319    /// Set the behavior when recursive embeds are encountered.
320    ///
321    /// When `recursive` is true (the default), emdeds are always processed recursively. This may
322    /// lead to infinite recursion when note A embeds B, but B also embeds A.
323    /// (When this happens, [`ExportError::RecursionLimitExceeded`] will be returned by
324    /// [`Exporter::run`]).
325    ///
326    /// When `recursive` is false, if a note is encountered for a second time while processing the
327    /// original note, instead of embedding it again a link to the note is inserted instead.
328    pub fn process_embeds_recursively(&mut self, recursive: bool) -> &mut Self {
329        self.process_embeds_recursively = recursive;
330        self
331    }
332
333    /// Set whether the modified time of exported files should be preserved.
334    ///
335    /// When `preserve` is true, the modified time of exported files will be set to the modified
336    /// time of the source file.
337    pub fn preserve_mtime(&mut self, preserve: bool) -> &mut Self {
338        self.preserve_mtime = preserve;
339        self
340    }
341
342    /// Append a function to the chain of [postprocessors][Postprocessor] to run on exported
343    /// Obsidian Markdown notes.
344    pub fn add_postprocessor(&mut self, processor: &'a Postprocessor<'_>) -> &mut Self {
345        self.postprocessors.push(processor);
346        self
347    }
348
349    /// Append a function to the chain of [postprocessors][Postprocessor] for embeds.
350    pub fn add_embed_postprocessor(&mut self, processor: &'a Postprocessor<'_>) -> &mut Self {
351        self.embed_postprocessors.push(processor);
352        self
353    }
354
355    /// Export notes using the settings configured on this exporter.
356    pub fn run(&mut self) -> Result<()> {
357        if !self.root.exists() {
358            return Err(ExportError::PathDoesNotExist {
359                path: self.root.clone(),
360            });
361        }
362
363        self.vault_contents = Some(vault_contents(
364            self.root.as_path(),
365            self.walk_options.clone(),
366        )?);
367
368        // When a single file is specified, just need to export that specific file instead of
369        // iterating over all discovered files. This also allows us to accept destination as either
370        // a file or a directory name.
371        if self.root.is_file() || self.start_at.is_file() {
372            let source_filename = self
373                .start_at
374                .file_name()
375                .expect("File without a filename? How is that possible?")
376                .to_string_lossy();
377
378            let destination = match self.destination.is_dir() {
379                true => self.destination.join(String::from(source_filename)),
380                false => {
381                    let parent = self.destination.parent().unwrap_or(&self.destination);
382                    // Avoid recursively creating self.destination through the call to
383                    // export_note when the parent directory doesn't exist.
384                    if !parent.exists() {
385                        return Err(ExportError::PathDoesNotExist {
386                            path: parent.to_path_buf(),
387                        });
388                    }
389                    self.destination.clone()
390                }
391            };
392            return self.export_note(&self.start_at, &destination);
393        }
394
395        if !self.destination.exists() {
396            return Err(ExportError::PathDoesNotExist {
397                path: self.destination.clone(),
398            });
399        }
400        self.vault_contents
401            .as_ref()
402            .unwrap()
403            .clone()
404            .into_par_iter()
405            .filter(|file| file.starts_with(&self.start_at))
406            .try_for_each(|file| {
407                let relative_path = file
408                    .strip_prefix(self.start_at.clone())
409                    .expect("file should always be nested under root")
410                    .to_path_buf();
411                let destination = &self.destination.join(relative_path);
412                self.export_note(&file, destination)
413            })?;
414        Ok(())
415    }
416
417    fn export_note(&self, src: &Path, dest: &Path) -> Result<()> {
418        match is_markdown_file(src) {
419            true => self.parse_and_export_obsidian_note(src, dest),
420            false => copy_file(src, dest),
421        }
422        .context(FileExportSnafu { path: src })?;
423
424        if self.preserve_mtime {
425            copy_mtime(src, dest).context(FileExportSnafu { path: src })?;
426        }
427
428        Ok(())
429    }
430
431    fn parse_and_export_obsidian_note(&self, src: &Path, dest: &Path) -> Result<()> {
432        let mut context = Context::new(src.to_path_buf(), dest.to_path_buf());
433
434        let (frontmatter, mut markdown_events) = self.parse_obsidian_note(src, &context)?;
435        context.frontmatter = frontmatter;
436        for func in &self.postprocessors {
437            match func(&mut context, &mut markdown_events) {
438                PostprocessorResult::StopHere => break,
439                PostprocessorResult::StopAndSkipNote => return Ok(()),
440                PostprocessorResult::Continue => (),
441            }
442        }
443
444        let mut outfile = create_file(&context.destination)?;
445        let write_frontmatter = match self.frontmatter_strategy {
446            FrontmatterStrategy::Always => true,
447            FrontmatterStrategy::Never => false,
448            FrontmatterStrategy::Auto => !context.frontmatter.is_empty(),
449        };
450        if write_frontmatter {
451            let mut frontmatter_str = frontmatter_to_str(&context.frontmatter)
452                .context(FrontMatterEncodeSnafu { path: src })?;
453            frontmatter_str.push('\n');
454            outfile
455                .write_all(frontmatter_str.as_bytes())
456                .context(WriteSnafu {
457                    path: &context.destination,
458                })?;
459        }
460        outfile
461            .write_all(render_mdevents_to_mdtext(&markdown_events).as_bytes())
462            .context(WriteSnafu {
463                path: &context.destination,
464            })?;
465        Ok(())
466    }
467
468    #[allow(clippy::too_many_lines)]
469    #[allow(clippy::panic_in_result_fn)]
470    #[allow(clippy::shadow_unrelated)]
471    fn parse_obsidian_note<'b>(
472        &self,
473        path: &Path,
474        context: &Context,
475    ) -> Result<(Frontmatter, MarkdownEvents<'b>)> {
476        if context.note_depth() > NOTE_RECURSION_LIMIT {
477            return Err(ExportError::RecursionLimitExceeded {
478                file_tree: context.file_tree(),
479            });
480        }
481        let content = fs::read_to_string(path).context(ReadSnafu { path })?;
482        let mut frontmatter = String::new();
483
484        let parser_options = Options::ENABLE_TABLES
485            | Options::ENABLE_FOOTNOTES
486            | Options::ENABLE_STRIKETHROUGH
487            | Options::ENABLE_TASKLISTS
488            | Options::ENABLE_MATH
489            | Options::ENABLE_YAML_STYLE_METADATA_BLOCKS;
490
491        let mut ref_parser = RefParser::new();
492        let mut events = vec![];
493        // Most of the time, a reference triggers 5 events: [ or ![, [, <text>, ], ]
494        let mut buffer = Vec::with_capacity(5);
495
496        let mut parser = Parser::new_ext(&content, parser_options);
497        'outer: while let Some(event) = parser.next() {
498            // When encountering a metadata block (frontmatter), collect all events until getting
499            // to the end of the block, at which point the nested loop will break out to the outer
500            // loop again.
501            if matches!(event, Event::Start(Tag::MetadataBlock(_kind))) {
502                for event in parser.by_ref() {
503                    match event {
504                        Event::Text(cowstr) => frontmatter.push_str(&cowstr),
505                        Event::End(TagEnd::MetadataBlock(_kind)) => {
506                            continue 'outer;
507                        },
508                        _ => panic!(
509                            "Encountered an unexpected event while processing frontmatter in {}. Please report this as a bug with a copy of the note contents and this text: \n\nEvent: {:?}\n",
510                            path.display(),
511                            event
512                        ),
513                    }
514                }
515            }
516            if ref_parser.state == RefParserState::Resetting {
517                events.append(&mut buffer);
518                buffer.clear();
519                ref_parser.reset();
520            }
521            buffer.push(event.clone());
522            match ref_parser.state {
523                RefParserState::NoState => {
524                    match event {
525                        Event::Text(CowStr::Borrowed("![")) => {
526                            ref_parser.ref_type = Some(RefType::Embed);
527                            ref_parser.transition(RefParserState::ExpectSecondOpenBracket);
528                        }
529                        Event::Text(CowStr::Borrowed("[")) => {
530                            ref_parser.ref_type = Some(RefType::Link);
531                            ref_parser.transition(RefParserState::ExpectSecondOpenBracket);
532                        }
533                        _ => {
534                            events.push(event);
535                            buffer.clear();
536                        },
537                    };
538                }
539                RefParserState::ExpectSecondOpenBracket => match event {
540                    Event::Text(CowStr::Borrowed("[")) => {
541                        ref_parser.transition(RefParserState::ExpectRefText);
542                    }
543                    _ => {
544                        ref_parser.transition(RefParserState::Resetting);
545                    }
546                },
547                RefParserState::ExpectRefText => match event {
548                    Event::Text(CowStr::Borrowed("]")) => {
549                        ref_parser.transition(RefParserState::Resetting);
550                    }
551                    Event::Text(text) => {
552                        ref_parser.ref_text.push_str(&text);
553                        ref_parser.transition(RefParserState::ExpectRefTextOrCloseBracket);
554                    }
555                    _ => {
556                        ref_parser.transition(RefParserState::Resetting);
557                    }
558                },
559                RefParserState::ExpectRefTextOrCloseBracket => match event {
560                    Event::Text(CowStr::Borrowed("]")) => {
561                        ref_parser.transition(RefParserState::ExpectFinalCloseBracket);
562                    }
563                    Event::Text(text) => {
564                        ref_parser.ref_text.push_str(&text);
565                    }
566                    _ => {
567                        ref_parser.transition(RefParserState::Resetting);
568                    }
569                },
570                RefParserState::ExpectFinalCloseBracket => match event {
571                    Event::Text(CowStr::Borrowed("]")) => match ref_parser.ref_type {
572                        Some(RefType::Link) => {
573                            let mut elements = self.make_link_to_file(
574                                ObsidianNoteReference::from_str(
575                                    ref_parser.ref_text.clone().as_ref()
576                                ),
577                                context,
578                            );
579                            events.append(&mut elements);
580                            buffer.clear();
581                            ref_parser.transition(RefParserState::Resetting);
582                        }
583                        Some(RefType::Embed) => {
584                            let mut elements = self.embed_file(
585                                ref_parser.ref_text.clone().as_ref(),
586                                context
587                            )?;
588                            events.append(&mut elements);
589                            buffer.clear();
590                            ref_parser.transition(RefParserState::Resetting);
591                        }
592                        None => panic!("In state ExpectFinalCloseBracket but ref_type is None"),
593                    },
594                    _ => {
595                        ref_parser.transition(RefParserState::Resetting);
596                    }
597                },
598                RefParserState::Resetting => panic!("Reached Resetting state, but it should have been handled prior to this match block"),
599            }
600        }
601        if !buffer.is_empty() {
602            events.append(&mut buffer);
603        }
604
605        Ok((
606            frontmatter_from_str(&frontmatter).context(FrontMatterDecodeSnafu { path })?,
607            events.into_iter().map(event_to_owned).collect(),
608        ))
609    }
610
611    // Generate markdown elements for a file that is embedded within another note.
612    //
613    // - If the file being embedded is a note, it's content is included at the point of embed.
614    // - If the file is an image, an image tag is generated.
615    // - For other types of file, a regular link is created instead.
616    fn embed_file<'b>(
617        &self,
618        link_text: &'a str,
619        context: &'a Context,
620    ) -> Result<MarkdownEvents<'b>> {
621        let note_ref = ObsidianNoteReference::from_str(link_text);
622
623        let path = match note_ref.file {
624            Some(file) => lookup_filename_in_vault(file, self.vault_contents.as_ref().unwrap()),
625
626            // If we have None file it is either to a section or id within the same file and thus
627            // the current embed logic will fail, recurssing until it reaches it's limit.
628            // For now we just bail early.
629            None => return Ok(self.make_link_to_file(note_ref, context)),
630        };
631
632        if path.is_none() {
633            // TODO: Extract into configurable function.
634            eprintln!(
635                "Warning: Unable to find embedded note\n\tReference: '{}'\n\tSource: '{}'\n",
636                note_ref
637                    .file
638                    .unwrap_or_else(|| context.current_file().to_str().unwrap()),
639                context.current_file().display(),
640            );
641            return Ok(vec![]);
642        }
643
644        let path = path.unwrap();
645        let mut child_context = Context::from_parent(context, path);
646        let no_ext = OsString::new();
647
648        if !self.process_embeds_recursively && context.file_tree().contains(path) {
649            return Ok([
650                vec![Event::Text(CowStr::Borrowed("→ "))],
651                self.make_link_to_file(note_ref, &child_context),
652            ]
653            .concat());
654        }
655
656        let events = match path.extension().unwrap_or(&no_ext).to_str() {
657            Some("md") => {
658                let (frontmatter, mut events) = self.parse_obsidian_note(path, &child_context)?;
659                child_context.frontmatter = frontmatter;
660                if let Some(section) = note_ref.section {
661                    events = reduce_to_section(events, section);
662                }
663                for func in &self.embed_postprocessors {
664                    // Postprocessors running on embeds shouldn't be able to change frontmatter (or
665                    // any other metadata), so we give them a clone of the context.
666                    match func(&mut child_context, &mut events) {
667                        PostprocessorResult::StopHere => break,
668                        PostprocessorResult::StopAndSkipNote => {
669                            events = vec![];
670                        }
671                        PostprocessorResult::Continue => (),
672                    }
673                }
674                events
675            }
676            Some("png" | "jpg" | "jpeg" | "gif" | "webp" | "svg") => {
677                self.make_link_to_file(note_ref, &child_context)
678                    .into_iter()
679                    .map(|event| match event {
680                        // make_link_to_file returns a link to a file. With this we turn the link
681                        // into an image reference instead. Slightly hacky, but avoids needing
682                        // to keep another utility function around for this, or introducing an
683                        // extra parameter on make_link_to_file.
684                        Event::Start(Tag::Link {
685                            link_type,
686                            dest_url,
687                            title,
688                            id,
689                        }) => Event::Start(Tag::Image {
690                            link_type,
691                            dest_url: CowStr::from(dest_url.into_string()),
692                            title: CowStr::from(title.into_string()),
693                            id: CowStr::from(id.into_string()),
694                        }),
695                        Event::End(TagEnd::Link) => Event::End(TagEnd::Image),
696                        _ => event,
697                    })
698                    .collect()
699            }
700            _ => self.make_link_to_file(note_ref, &child_context),
701        };
702        Ok(events)
703    }
704
705    fn make_link_to_file<'c>(
706        &self,
707        reference: ObsidianNoteReference<'_>,
708        context: &Context,
709    ) -> MarkdownEvents<'c> {
710        let target_file = reference.file.map_or_else(
711            || Some(context.current_file()),
712            |file| lookup_filename_in_vault(file, self.vault_contents.as_ref().unwrap()),
713        );
714
715        if target_file.is_none() {
716            // TODO: Extract into configurable function.
717            eprintln!(
718                "Warning: Unable to find referenced note\n\tReference: '{}'\n\tSource: '{}'\n",
719                reference
720                    .file
721                    .unwrap_or_else(|| context.current_file().to_str().unwrap()),
722                context.current_file().display(),
723            );
724            return vec![
725                Event::Start(Tag::Emphasis),
726                Event::Text(CowStr::from(reference.display())),
727                Event::End(TagEnd::Emphasis),
728            ];
729        }
730        let target_file = target_file.unwrap();
731        // We use root_file() rather than current_file() here to make sure links are always
732        // relative to the outer-most note, which is the note which this content is inserted into
733        // in case of embedded notes.
734        let rel_link = diff_paths(
735            target_file,
736            context
737                .root_file()
738                .parent()
739                .expect("obsidian content files should always have a parent"),
740        )
741        .expect("should be able to build relative path when target file is found in vault");
742
743        let rel_link = rel_link.to_string_lossy();
744        let mut link = utf8_percent_encode(&rel_link, PERCENTENCODE_CHARS).to_string();
745
746        if let Some(section) = reference.section {
747            link.push('#');
748            link.push_str(&slugify(section));
749        }
750
751        let link_tag = Tag::Link {
752            link_type: pulldown_cmark::LinkType::Inline,
753            dest_url: CowStr::from(link),
754            title: CowStr::from(""),
755            id: CowStr::from(""),
756        };
757
758        vec![
759            Event::Start(link_tag),
760            Event::Text(CowStr::from(reference.display())),
761            Event::End(TagEnd::Link),
762        ]
763    }
764}
765
766/// Get the full path for the given filename when it's contained in `vault_contents`, taking into
767/// account:
768///
769/// 1. Standard Obsidian note references not including a .md extension.
770/// 2. Case-insensitive matching
771/// 3. Unicode normalization rules using normalization form C (<https://www.w3.org/TR/charmod-norm/#unicodeNormalization>)
772fn lookup_filename_in_vault<'a>(
773    filename: &str,
774    vault_contents: &'a [PathBuf],
775) -> Option<&'a PathBuf> {
776    let filename = PathBuf::from(filename);
777    let filename_normalized = filename.to_string_lossy().nfc().collect::<String>();
778
779    vault_contents.iter().find(|path| {
780        let path_normalized_str = path.to_string_lossy().nfc().collect::<String>();
781        let path_normalized = PathBuf::from(&path_normalized_str);
782        let path_normalized_lowered = PathBuf::from(&path_normalized_str.to_lowercase());
783
784        // It would be convenient if we could just do `filename.set_extension("md")` at the start
785        // of this funtion so we don't need multiple separate + ".md" match cases here, however
786        // that would break with a reference of `[[Note.1]]` linking to `[[Note.1.md]]`.
787
788        path_normalized.ends_with(&filename_normalized)
789            || path_normalized.ends_with(filename_normalized.clone() + ".md")
790            || path_normalized_lowered.ends_with(filename_normalized.to_lowercase())
791            || path_normalized_lowered.ends_with(filename_normalized.to_lowercase() + ".md")
792    })
793}
794
795fn render_mdevents_to_mdtext(markdown: &MarkdownEvents<'_>) -> String {
796    let mut buffer = String::new();
797    cmark_with_options(
798        markdown.iter(),
799        &mut buffer,
800        pulldown_cmark_to_cmark::Options::default(),
801    )
802    .expect("formatting to string not expected to fail");
803    buffer.push('\n');
804    buffer
805}
806
807fn create_file(dest: &Path) -> Result<File> {
808    let file = File::create(dest)
809        .or_else(|err| {
810            if err.kind() == ErrorKind::NotFound {
811                let parent = dest.parent().expect("file should have a parent directory");
812                fs::create_dir_all(parent)?;
813            }
814            File::create(dest)
815        })
816        .context(WriteSnafu { path: dest })?;
817    Ok(file)
818}
819
820fn copy_mtime(src: &Path, dest: &Path) -> Result<()> {
821    let metadata = fs::metadata(src).context(ModTimeReadSnafu { path: src })?;
822    let modified_time = metadata
823        .modified()
824        .context(ModTimeReadSnafu { path: src })?;
825
826    set_file_mtime(dest, modified_time.into()).context(ModTimeSetSnafu { path: dest })?;
827    Ok(())
828}
829
830fn copy_file(src: &Path, dest: &Path) -> Result<()> {
831    fs::copy(src, dest)
832        .or_else(|err| {
833            if err.kind() == ErrorKind::NotFound {
834                let parent = dest.parent().expect("file should have a parent directory");
835                fs::create_dir_all(parent)?;
836            }
837            fs::copy(src, dest)
838        })
839        .context(WriteSnafu { path: dest })?;
840    Ok(())
841}
842
843fn is_markdown_file(file: &Path) -> bool {
844    let no_ext = OsString::new();
845    let ext = file.extension().unwrap_or(&no_ext).to_string_lossy();
846    ext == "md"
847}
848
849/// Reduce a given `MarkdownEvents` to just those elements which are children of the given section
850/// (heading name).
851fn reduce_to_section<'a>(events: MarkdownEvents<'a>, section: &str) -> MarkdownEvents<'a> {
852    let mut filtered_events = Vec::with_capacity(events.len());
853    let mut target_section_encountered = false;
854    let mut currently_in_target_section = false;
855    let mut section_level = HeadingLevel::H1;
856    let mut last_level = HeadingLevel::H1;
857    let mut last_tag_was_heading = false;
858
859    for event in events {
860        filtered_events.push(event.clone());
861        match event {
862            Event::Start(Tag::Heading { level, .. }) => {
863                last_tag_was_heading = true;
864                last_level = level;
865                if currently_in_target_section && level <= section_level {
866                    currently_in_target_section = false;
867                    filtered_events.pop();
868                }
869            }
870            Event::Text(cowstr) => {
871                if !last_tag_was_heading {
872                    last_tag_was_heading = false;
873                    continue;
874                }
875                last_tag_was_heading = false;
876
877                if cowstr.to_string().to_lowercase() == section.to_lowercase() {
878                    target_section_encountered = true;
879                    currently_in_target_section = true;
880                    section_level = last_level;
881
882                    let current_event = filtered_events.pop().unwrap();
883                    let heading_start_event = filtered_events.pop().unwrap();
884                    filtered_events.clear();
885                    filtered_events.push(heading_start_event);
886                    filtered_events.push(current_event);
887                }
888            }
889            _ => {}
890        }
891        if target_section_encountered && !currently_in_target_section {
892            return filtered_events;
893        }
894    }
895    filtered_events
896}
897
898fn event_to_owned<'a>(event: Event<'_>) -> Event<'a> {
899    match event {
900        Event::Start(tag) => Event::Start(tag_to_owned(tag)),
901        Event::End(tag) => Event::End(tag),
902        Event::Text(cowstr) => Event::Text(CowStr::from(cowstr.into_string())),
903        Event::Code(cowstr) => Event::Code(CowStr::from(cowstr.into_string())),
904        Event::Html(cowstr) => Event::Html(CowStr::from(cowstr.into_string())),
905        Event::InlineHtml(cowstr) => Event::InlineHtml(CowStr::from(cowstr.into_string())),
906        Event::FootnoteReference(cowstr) => {
907            Event::FootnoteReference(CowStr::from(cowstr.into_string()))
908        }
909        Event::SoftBreak => Event::SoftBreak,
910        Event::HardBreak => Event::HardBreak,
911        Event::Rule => Event::Rule,
912        Event::TaskListMarker(checked) => Event::TaskListMarker(checked),
913        Event::InlineMath(cowstr) => Event::InlineMath(CowStr::from(cowstr.into_string())),
914        Event::DisplayMath(cowstr) => Event::DisplayMath(CowStr::from(cowstr.into_string())),
915    }
916}
917
918fn tag_to_owned<'a>(tag: Tag<'_>) -> Tag<'a> {
919    match tag {
920        Tag::Paragraph => Tag::Paragraph,
921        Tag::Heading {
922            level: heading_level,
923            id,
924            classes,
925            attrs,
926        } => Tag::Heading {
927            level: heading_level,
928            id: id.map(|cowstr| CowStr::from(cowstr.into_string())),
929            classes: classes
930                .into_iter()
931                .map(|cowstr| CowStr::from(cowstr.into_string()))
932                .collect(),
933            attrs: attrs
934                .into_iter()
935                .map(|(attr, value)| {
936                    (
937                        CowStr::from(attr.into_string()),
938                        value.map(|cowstr| CowStr::from(cowstr.into_string())),
939                    )
940                })
941                .collect(),
942        },
943        Tag::BlockQuote(blockquote_kind) => Tag::BlockQuote(blockquote_kind),
944        Tag::CodeBlock(codeblock_kind) => Tag::CodeBlock(codeblock_kind_to_owned(codeblock_kind)),
945        Tag::List(optional) => Tag::List(optional),
946        Tag::Item => Tag::Item,
947        Tag::FootnoteDefinition(cowstr) => {
948            Tag::FootnoteDefinition(CowStr::from(cowstr.into_string()))
949        }
950        Tag::Table(alignment_vector) => Tag::Table(alignment_vector),
951        Tag::TableHead => Tag::TableHead,
952        Tag::TableRow => Tag::TableRow,
953        Tag::TableCell => Tag::TableCell,
954        Tag::Emphasis => Tag::Emphasis,
955        Tag::Strong => Tag::Strong,
956        Tag::Strikethrough => Tag::Strikethrough,
957        Tag::Link {
958            link_type,
959            dest_url,
960            title,
961            id,
962        } => Tag::Link {
963            link_type,
964            dest_url: CowStr::from(dest_url.into_string()),
965            title: CowStr::from(title.into_string()),
966            id: CowStr::from(id.into_string()),
967        },
968        Tag::Image {
969            link_type,
970            dest_url,
971            title,
972            id,
973        } => Tag::Image {
974            link_type,
975            dest_url: CowStr::from(dest_url.into_string()),
976            title: CowStr::from(title.into_string()),
977            id: CowStr::from(id.into_string()),
978        },
979        Tag::HtmlBlock => Tag::HtmlBlock,
980        Tag::MetadataBlock(metadata_block_kind) => Tag::MetadataBlock(metadata_block_kind),
981        Tag::DefinitionList => Tag::DefinitionList,
982        Tag::DefinitionListTitle => Tag::DefinitionListTitle,
983        Tag::DefinitionListDefinition => Tag::DefinitionListDefinition,
984    }
985}
986
987fn codeblock_kind_to_owned<'a>(codeblock_kind: CodeBlockKind<'_>) -> CodeBlockKind<'a> {
988    match codeblock_kind {
989        CodeBlockKind::Indented => CodeBlockKind::Indented,
990        CodeBlockKind::Fenced(cowstr) => CodeBlockKind::Fenced(CowStr::from(cowstr.into_string())),
991    }
992}
993
994#[cfg(test)]
995mod tests {
996    use std::sync::LazyLock;
997
998    use pretty_assertions::assert_eq;
999    use rstest::rstest;
1000
1001    use super::*;
1002
1003    static VAULT: LazyLock<Vec<PathBuf>> = LazyLock::new(|| {
1004        vec![
1005            PathBuf::from("NoteA.md"),
1006            PathBuf::from("Document.pdf"),
1007            PathBuf::from("Note.1.md"),
1008            PathBuf::from("nested/NoteA.md"),
1009            PathBuf::from("Note\u{E4}.md"), // Noteä.md, see also encodings() below
1010        ]
1011    });
1012
1013    #[test]
1014    #[allow(clippy::unicode_not_nfc)]
1015    fn encodings() {
1016        // Standard "Latin Small Letter A with Diaeresis" (U+00E4)
1017        // Encoded in UTF-8 as two bytes: 0xC3 0xA4
1018        assert_eq!(String::from_utf8(vec![0xC3, 0xA4]).unwrap(), "ä");
1019        assert_eq!("\u{E4}", "ä");
1020
1021        // Basic (ASCII) lowercase a followed by Unicode Character “◌̈” (U+0308)
1022        // Renders the same visual appearance but is encoded in UTF-8 as three bytes:
1023        // 0x61 0xCC 0x88
1024        assert_eq!(String::from_utf8(vec![0x61, 0xCC, 0x88]).unwrap(), "ä");
1025        assert_eq!("a\u{308}", "ä");
1026        assert_eq!("\u{61}\u{308}", "ä");
1027
1028        // For more examples and a better explanation of this concept, see
1029        // https://www.w3.org/TR/charmod-norm/#aringExample
1030    }
1031
1032    #[rstest]
1033    // Exact match
1034    #[case("NoteA.md", "NoteA.md")]
1035    #[case("NoteA", "NoteA.md")]
1036    // Same note in subdir, exact match should find it
1037    #[case("nested/NoteA.md", "nested/NoteA.md")]
1038    #[case("nested/NoteA", "nested/NoteA.md")]
1039    // Different extensions
1040    #[case("Document.pdf", "Document.pdf")]
1041    #[case("Note.1", "Note.1.md")]
1042    #[case("Note.1.md", "Note.1.md")]
1043    // Case-insensitive matches
1044    #[case("notea.md", "NoteA.md")]
1045    #[case("notea", "NoteA.md")]
1046    #[case("NESTED/notea.md", "nested/NoteA.md")]
1047    #[case("NESTED/notea", "nested/NoteA.md")]
1048    // "Latin Small Letter A with Diaeresis" (U+00E4)
1049    #[case("Note\u{E4}.md", "Note\u{E4}.md")]
1050    #[case("Note\u{E4}", "Note\u{E4}.md")]
1051    // Basic (ASCII) lowercase a followed by Unicode Character “◌̈” (U+0308)
1052    // The UTF-8 encoding is different but it renders the same visual appearance as the case above,
1053    // so we expect it to find the same file.
1054    #[case("Note\u{61}\u{308}.md", "Note\u{E4}.md")]
1055    #[case("Note\u{61}\u{308}", "Note\u{E4}.md")]
1056    // We should expect this to work with lowercasing as well, so NoteÄ should find Noteä
1057    // NoteÄ where Ä = Single Ä (U+00C4)
1058    #[case("Note\u{C4}.md", "Note\u{E4}.md")]
1059    #[case("Note\u{C4}", "Note\u{E4}.md")]
1060    // NoteÄ where Ä = decomposed to A (U+0041) + ◌̈ (U+0308)
1061    #[case("Note\u{41}\u{308}.md", "Note\u{E4}.md")]
1062    #[case("Note\u{41}\u{308}", "Note\u{E4}.md")]
1063    fn test_lookup_filename_in_vault(#[case] input: &str, #[case] expected: &str) {
1064        let result = lookup_filename_in_vault(input, &VAULT);
1065        println!("Test input: {input:?}");
1066        println!("Expecting: {expected:?}");
1067        println!("Got: {:?}", result.unwrap_or(&PathBuf::from("")));
1068        assert_eq!(result, Some(&PathBuf::from(expected)));
1069    }
1070}