obsidian_parser/obfile/
mod.rs

1//! Represents an Obsidian note file with frontmatter properties and content
2
3pub mod obfile_in_memory;
4pub mod obfile_on_disk;
5
6use crate::error::Error;
7use serde::de::DeserializeOwned;
8use std::{borrow::Cow, collections::HashMap, path::Path};
9
10pub(crate) type DefaultProperties = HashMap<String, serde_yml::Value>;
11
12/// Represents an Obsidian note file with frontmatter properties and content
13///
14/// This trait provides a standardized interface for working with Obsidian markdown files,
15/// handling frontmatter parsing, content extraction, and file operations.
16///
17/// # Type Parameters
18/// - `T`: Frontmatter properties type
19///
20/// # Example
21/// ```no_run
22/// use obsidian_parser::prelude::*;
23/// use serde::Deserialize;
24///
25/// #[derive(Deserialize, Clone)]
26/// struct NoteProperties {
27///     topic: String,
28///     created: String,
29/// }
30///
31/// let note: ObFileInMemory<NoteProperties> = ObFile::from_file("note.md").unwrap();
32/// let properties = note.properties().unwrap().unwrap();
33/// println!("Note topic: {}", properties.topic);
34/// ```
35pub trait ObFile<T = DefaultProperties>: Sized
36where
37    T: DeserializeOwned + Clone,
38{
39    /// Returns the main content body of the note (excluding frontmatter)
40    ///
41    /// # Implementation Notes
42    /// - Strips YAML frontmatter if present
43    /// - Preserves original formatting and whitespace
44    ///
45    /// # Errors
46    /// Usually errors are related to [`Error::Io`]
47    fn content(&self) -> Result<Cow<'_, str>, Error>;
48
49    /// Returns the source file path if available
50    ///
51    /// Returns [`None`] for in-memory notes without physical storage
52    fn path(&self) -> Option<Cow<'_, Path>>;
53
54    /// Returns the parsed properties of frontmatter
55    ///
56    /// Returns [`None`] if the note has no properties
57    ///
58    /// # Errors
59    /// Usually errors are related to [`Error::Io`]
60    fn properties(&self) -> Result<Option<Cow<'_, T>>, Error>;
61
62    /// Get note name
63    fn note_name(&self) -> Option<String> {
64        self.path().as_ref().map(|path| {
65            path.file_stem()
66                .expect("Path is not file")
67                .to_string_lossy()
68                .to_string()
69        })
70    }
71
72    /// Parses an Obsidian note from a string
73    ///
74    /// # Arguments
75    /// - `raw_text`: Raw markdown content with optional YAML frontmatter
76    /// - `path`: Optional source path for reference
77    ///
78    /// # Errors
79    /// - [`Error::InvalidFormat`] for malformed frontmatter
80    /// - [`Error::Yaml`] for invalid YAML syntax
81    fn from_string<P: AsRef<Path>>(raw_text: &str, path: Option<P>) -> Result<Self, Error>;
82
83    /// Parses an Obsidian note from a file
84    ///
85    /// # Arguments
86    /// - `path`: Filesystem path to markdown file
87    ///
88    /// # Errors
89    /// - [`Error::Io`] for filesystem errors
90    fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
91        let path_buf = path.as_ref().to_path_buf();
92
93        #[cfg(feature = "logging")]
94        log::trace!("Parse obsidian file from file: {}", path_buf.display());
95
96        let data = std::fs::read(path)?;
97
98        // SAFETY: Notes files in Obsidian (`*.md`) ensure that the file is encoded in UTF-8
99        let text = unsafe { String::from_utf8_unchecked(data) };
100
101        Self::from_string(&text, Some(path_buf))
102    }
103}
104
105/// Default implementation using [`HashMap`] for properties
106///
107/// Automatically implemented for all `ObFile<HashMap<..>>` types.
108/// Provides identical interface with explicitly named methods.
109pub trait ObFileDefault: ObFile<DefaultProperties> {
110    /// Same as [`ObFile::from_string`] with default properties type
111    ///
112    /// # Errors
113    /// - [`Error::InvalidFormat`] for malformed frontmatter
114    /// - [`Error::Yaml`] for invalid YAML syntax
115    fn from_string_default<P: AsRef<Path>>(text: &str, path: Option<P>) -> Result<Self, Error>;
116
117    /// Same as [`ObFile::from_file`] with default properties type
118    ///
119    /// # Errors
120    /// - [`Error::Io`] for filesystem errors
121    fn from_file_default<P: AsRef<Path>>(path: P) -> Result<Self, Error>;
122}
123
124/// Parses Obsidian-style links in note content
125///
126/// Handles all link formats:
127/// - `[[Note]]`
128/// - `[[Note|Alias]]`
129/// - `[[Note^block]]`
130/// - `[[Note#heading]]`
131/// - `[[Note#heading|Alias]]`
132///
133/// # Example
134/// ```
135/// # use obsidian_parser::obfile::parse_links;
136/// let content = "[[Physics]] and [[Math|Mathematics]]";
137/// let links: Vec<_> = parse_links(content).collect();
138/// assert_eq!(links, vec!["Physics", "Math"]);
139/// ```
140pub fn parse_links(text: &str) -> impl Iterator<Item = &str> {
141    text.match_indices("[[").filter_map(move |(start_pos, _)| {
142        let end_pos = text[start_pos + 2..].find("]]")?;
143        let inner = &text[start_pos + 2..start_pos + 2 + end_pos];
144
145        let note_name = inner
146            .split('#')
147            .next()?
148            .split('^')
149            .next()?
150            .split('|')
151            .next()?
152            .trim();
153
154        Some(note_name)
155    })
156}
157
158impl<T> ObFileDefault for T
159where
160    T: ObFile<DefaultProperties>,
161{
162    fn from_string_default<P: AsRef<Path>>(text: &str, path: Option<P>) -> Result<Self, Error> {
163        Self::from_string(text, path)
164    }
165
166    fn from_file_default<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
167        Self::from_file(path)
168    }
169}
170
171#[derive(Debug, PartialEq)]
172enum ResultParse<'a> {
173    WithProperties {
174        content: &'a str,
175        properties: &'a str,
176    },
177    WithoutProperties,
178}
179
180fn parse_obfile(raw_text: &str) -> Result<ResultParse<'_>, Error> {
181    let have_start_properties = raw_text
182        .lines()
183        .next()
184        .is_some_and(|line| line.trim_end() == "---");
185
186    if have_start_properties {
187        let closed = raw_text["---".len()..]
188            .find("---")
189            .ok_or(Error::InvalidFormat)?;
190
191        return Ok(ResultParse::WithProperties {
192            content: raw_text[(closed + 2 * "...".len())..].trim(),
193            properties: raw_text["...".len()..(closed + "...".len())].trim(),
194        });
195    }
196
197    Ok(ResultParse::WithoutProperties)
198}
199
200#[cfg(test)]
201mod tests {
202    use super::{ResultParse, parse_obfile};
203    use crate::test_utils::init_test_logger;
204
205    #[test]
206    fn parse_obfile_without_properties() {
207        init_test_logger();
208        let test_data = "test_data";
209        let result = parse_obfile(test_data).unwrap();
210
211        assert_eq!(result, ResultParse::WithoutProperties);
212    }
213
214    #[test]
215    fn parse_obfile_with_properties() {
216        init_test_logger();
217        let test_data = "---\nproperties data\n---\ntest data";
218        let result = parse_obfile(test_data).unwrap();
219
220        assert_eq!(
221            result,
222            ResultParse::WithProperties {
223                content: "test data",
224                properties: "properties data"
225            }
226        );
227    }
228
229    #[test]
230    fn parse_obfile_without_properties_but_with_closed() {
231        init_test_logger();
232        let test_data1 = "test_data---";
233        let test_data2 = "test_data\n---\n";
234
235        let result1 = parse_obfile(test_data1).unwrap();
236        let result2 = parse_obfile(test_data2).unwrap();
237
238        assert_eq!(result1, ResultParse::WithoutProperties);
239        assert_eq!(result2, ResultParse::WithoutProperties);
240    }
241
242    #[test]
243    #[should_panic]
244    fn parse_obfile_with_properties_but_without_closed() {
245        init_test_logger();
246        let test_data = "---\nproperties data\ntest data";
247        let _ = parse_obfile(test_data).unwrap();
248    }
249
250    #[test]
251    fn parse_obfile_with_() {
252        init_test_logger();
253        let test_data = "---properties data";
254
255        let result = parse_obfile(test_data).unwrap();
256        assert_eq!(result, ResultParse::WithoutProperties);
257    }
258
259    #[test]
260    fn parse_obfile_without_properties_but_with_spaces() {
261        init_test_logger();
262        let test_data = "   ---\ndata";
263
264        let result = parse_obfile(test_data).unwrap();
265        assert_eq!(result, ResultParse::WithoutProperties);
266    }
267
268    #[test]
269    fn parse_obfile_with_properties_but_check_trim_end() {
270        init_test_logger();
271        let test_data = "---\r\nproperties data\r\n---\r   \ntest data";
272        let result = parse_obfile(test_data).unwrap();
273
274        assert_eq!(
275            result,
276            ResultParse::WithProperties {
277                content: "test data",
278                properties: "properties data"
279            }
280        );
281    }
282
283    #[test]
284    fn test_parse_links() {
285        init_test_logger();
286        let test_data =
287            "[[Note]] [[Note|Alias]] [[Note^block]] [[Note#Heading|Alias]] [[Note^block|Alias]]";
288
289        let ds: Vec<_> = super::parse_links(test_data).collect();
290
291        assert!(ds.iter().all(|x| *x == "Note"))
292    }
293}
294
295#[cfg(test)]
296pub(crate) mod impl_tests {
297    use super::*;
298    use crate::test_utils::init_test_logger;
299    use std::io::Write;
300    use tempfile::NamedTempFile;
301
302    pub(crate) static TEST_DATA: &str = "---\n\
303topic: life\n\
304created: 2025-03-16\n\
305---\n\
306Test data\n\
307---\n\
308Two test data";
309
310    pub(crate) fn from_string<T: ObFile>() -> Result<(), Error> {
311        init_test_logger();
312        let file = T::from_string(TEST_DATA, None::<&str>)?;
313        let properties = file.properties().unwrap().unwrap();
314
315        assert_eq!(properties["topic"], "life");
316        assert_eq!(properties["created"], "2025-03-16");
317        assert_eq!(file.content().unwrap(), "Test data\n---\nTwo test data");
318        Ok(())
319    }
320
321    pub(crate) fn from_string_note_name<T: ObFile>() -> Result<(), Error> {
322        init_test_logger();
323        let file1 = T::from_string(TEST_DATA, None::<&str>)?;
324        let file2 = T::from_string(TEST_DATA, Some("Super node.md"))?;
325
326        assert_eq!(file1.note_name(), None);
327        assert_eq!(file2.note_name(), Some("Super node".to_string()));
328        Ok(())
329    }
330
331    pub(crate) fn from_string_without_properties<T: ObFile>() -> Result<(), Error> {
332        init_test_logger();
333        let test_data = "TEST_DATA";
334        let file = T::from_string(test_data, None::<&str>)?;
335
336        assert_eq!(file.properties().unwrap(), None);
337        assert_eq!(file.content().unwrap(), test_data);
338        Ok(())
339    }
340
341    pub(crate) fn from_string_with_invalid_yaml<T: ObFile>() -> Result<(), Error> {
342        init_test_logger();
343        let broken_data = "---\n\
344    asdfv:--fs\n\
345    sfsf\n\
346    ---\n\
347    TestData";
348
349        assert!(matches!(
350            T::from_string(broken_data, None::<&str>),
351            Err(Error::Yaml(_))
352        ));
353        Ok(())
354    }
355
356    pub(crate) fn from_string_invalid_format<T: ObFile>() -> Result<(), Error> {
357        init_test_logger();
358        let broken_data = "---\n";
359
360        assert!(matches!(
361            T::from_string(broken_data, None::<&str>),
362            Err(Error::InvalidFormat)
363        ));
364        Ok(())
365    }
366
367    pub(crate) fn from_string_with_unicode<T: ObFile>() -> Result<(), Error> {
368        init_test_logger();
369        let data = "---\ndata: 💩\n---\nSuper data 💩💩💩";
370        let file = T::from_string(data, None::<&str>)?;
371        let properties = file.properties().unwrap().unwrap();
372
373        assert_eq!(properties["data"], "💩");
374        assert_eq!(file.content().unwrap(), "Super data 💩💩💩");
375        Ok(())
376    }
377
378    pub(crate) fn from_string_space_with_properties<T: ObFile>() -> Result<(), Error> {
379        init_test_logger();
380        let data = "  ---\ntest: test-data\n---\n";
381        let file = T::from_string(data, None::<&str>)?;
382        let properties = file.properties().unwrap();
383
384        assert_eq!(file.content().unwrap(), data);
385        assert_eq!(properties, None);
386        Ok(())
387    }
388
389    pub(crate) fn from_file<T: ObFile>() -> Result<(), Error> {
390        init_test_logger();
391        let mut temp_file = NamedTempFile::new().unwrap();
392        temp_file.write_all(b"TEST_DATA").unwrap();
393
394        let file = T::from_file(temp_file.path()).unwrap();
395        assert_eq!(file.content().unwrap(), "TEST_DATA");
396        assert_eq!(file.path().unwrap(), temp_file.path());
397        assert_eq!(file.properties().unwrap(), None);
398        Ok(())
399    }
400
401    pub(crate) fn from_file_note_name<T: ObFile>() -> Result<(), Error> {
402        init_test_logger();
403        let mut temp_file = NamedTempFile::new().unwrap();
404        temp_file.write_all(b"TEST_DATA").unwrap();
405
406        let name_temp_file = temp_file
407            .path()
408            .file_stem()
409            .unwrap()
410            .to_string_lossy()
411            .to_string();
412
413        let file = T::from_file(temp_file.path()).unwrap();
414
415        assert_eq!(file.note_name(), Some(name_temp_file));
416        Ok(())
417    }
418
419    pub(crate) fn from_file_without_properties<T: ObFile>() -> Result<(), Error> {
420        init_test_logger();
421        let test_data = "TEST_DATA";
422        let mut test_file = NamedTempFile::new().unwrap();
423        test_file.write_all(test_data.as_bytes()).unwrap();
424
425        let file = T::from_file(test_file.path())?;
426
427        assert_eq!(file.properties().unwrap(), None);
428        assert_eq!(file.content().unwrap(), test_data);
429        Ok(())
430    }
431
432    pub(crate) fn from_file_with_invalid_yaml<T: ObFile>() -> Result<(), Error> {
433        init_test_logger();
434        let broken_data = "---\n\
435    asdfv:--fs\n\
436    sfsf\n\
437    ---\n\
438    TestData";
439
440        let mut test_file = NamedTempFile::new().unwrap();
441        test_file.write_all(broken_data.as_bytes()).unwrap();
442
443        assert!(matches!(
444            T::from_file(test_file.path()),
445            Err(Error::Yaml(_))
446        ));
447        Ok(())
448    }
449
450    pub(crate) fn from_file_invalid_format<T: ObFile>() -> Result<(), Error> {
451        init_test_logger();
452        let broken_data = "---\n";
453        let mut test_file = NamedTempFile::new().unwrap();
454        test_file.write_all(broken_data.as_bytes()).unwrap();
455
456        assert!(matches!(
457            T::from_file(test_file.path()),
458            Err(Error::InvalidFormat)
459        ));
460        Ok(())
461    }
462
463    pub(crate) fn from_file_with_unicode<T: ObFile>() -> Result<(), Error> {
464        init_test_logger();
465        let data = "---\ndata: 💩\n---\nSuper data 💩💩💩";
466        let mut test_file = NamedTempFile::new().unwrap();
467        test_file.write_all(data.as_bytes()).unwrap();
468
469        let file = T::from_file(test_file.path())?;
470        let properties = file.properties().unwrap().unwrap();
471
472        assert_eq!(properties["data"], "💩");
473        assert_eq!(file.content().unwrap(), "Super data 💩💩💩");
474        Ok(())
475    }
476
477    pub(crate) fn from_file_space_with_properties<T: ObFile>() -> Result<(), Error> {
478        init_test_logger();
479        let data = "  ---\ntest: test-data\n---\n";
480        let mut test_file = NamedTempFile::new().unwrap();
481        test_file.write_all(data.as_bytes()).unwrap();
482
483        let file = T::from_string(data, None::<&str>)?;
484
485        assert_eq!(file.content().unwrap(), data);
486        assert_eq!(file.properties().unwrap(), None);
487        Ok(())
488    }
489
490    macro_rules! impl_test_for_obfile {
491        ($name_test:ident, $fn_test:ident, $impl_obfile:path) => {
492            #[test]
493            fn $name_test() {
494                $fn_test::<$impl_obfile>().unwrap();
495            }
496        };
497    }
498
499    pub(crate) use impl_test_for_obfile;
500
501    macro_rules! impl_all_tests_from_string {
502        ($impl_obfile:path) => {
503            #[allow(unused_imports)]
504            use crate::obfile::impl_tests::*;
505
506            impl_test_for_obfile!(impl_from_string, from_string, $impl_obfile);
507
508            impl_test_for_obfile!(
509                impl_from_string_note_name,
510                from_string_note_name,
511                $impl_obfile
512            );
513            impl_test_for_obfile!(
514                impl_from_string_without_properties,
515                from_string_without_properties,
516                $impl_obfile
517            );
518            impl_test_for_obfile!(
519                impl_from_string_with_invalid_yaml,
520                from_string_with_invalid_yaml,
521                $impl_obfile
522            );
523            impl_test_for_obfile!(
524                impl_from_string_invalid_format,
525                from_string_invalid_format,
526                $impl_obfile
527            );
528            impl_test_for_obfile!(
529                impl_from_string_with_unicode,
530                from_string_with_unicode,
531                $impl_obfile
532            );
533            impl_test_for_obfile!(
534                impl_from_string_space_with_properties,
535                from_string_space_with_properties,
536                $impl_obfile
537            );
538        };
539    }
540
541    macro_rules! impl_all_tests_from_file {
542        ($impl_obfile:path) => {
543            #[allow(unused_imports)]
544            use crate::obfile::impl_tests::*;
545
546            impl_test_for_obfile!(impl_from_file, from_file, $impl_obfile);
547            impl_test_for_obfile!(impl_from_file_note_name, from_file_note_name, $impl_obfile);
548
549            impl_test_for_obfile!(
550                impl_from_file_without_properties,
551                from_file_without_properties,
552                $impl_obfile
553            );
554            impl_test_for_obfile!(
555                impl_from_file_with_invalid_yaml,
556                from_file_with_invalid_yaml,
557                $impl_obfile
558            );
559            impl_test_for_obfile!(
560                impl_from_file_invalid_format,
561                from_file_invalid_format,
562                $impl_obfile
563            );
564            impl_test_for_obfile!(
565                impl_from_file_with_unicode,
566                from_file_with_unicode,
567                $impl_obfile
568            );
569            impl_test_for_obfile!(
570                impl_from_file_space_with_properties,
571                from_file_space_with_properties,
572                $impl_obfile
573            );
574        };
575    }
576
577    pub(crate) use impl_all_tests_from_file;
578    pub(crate) use impl_all_tests_from_string;
579}