obsidian_parser/note/
note_on_disk.rs

1//! On-disk representation of an Obsidian note file
2
3use crate::note::parser::{self, ResultParse, parse_note};
4use crate::note::{DefaultProperties, Note};
5use serde::de::DeserializeOwned;
6use std::borrow::Cow;
7use std::marker::PhantomData;
8use std::path::Path;
9use std::path::PathBuf;
10use thiserror::Error;
11
12/// On-disk representation of an Obsidian note file
13///
14/// Optimized for vault operations where:
15/// 1. Memory efficiency is critical (large vaults)
16/// 2. Storage is fast (SSD/NVMe)
17/// 3. Content is accessed infrequently
18///
19/// # Tradeoffs vs `NoteInMemory`
20/// | Characteristic       | [`NoteOnDisk`]        | [`NoteInMemory`]          |
21/// |----------------------|-------------------------|-----------------------------|
22/// | Memory usage         | **Minimal** (~24 bytes) | High (content + properties) |
23/// | File access          | On-demand               | Preloaded                   |
24/// | Best for             | SSD-based vaults        | RAM-heavy workflows         |
25/// | Content access cost  | Disk read               | Zero cost                   |
26///
27/// # Recommendation
28/// Prefer `NoteOnDisk` for vault operations on modern hardware. The combination of
29/// SSD speeds and Rust's efficient I/O makes this implementation ideal for:
30/// - Large vaults (1000+ files)
31/// - Graph processing
32///
33/// # Warning
34/// Requires **persistent file access** throughout the object's lifetime
35///
36/// [`NoteInMemory`]: crate::note::note_in_memory::NoteInMemory
37#[derive(Debug, Default, PartialEq, Eq, Clone)]
38pub struct NoteOnDisk<T = DefaultProperties>
39where
40    T: Clone + DeserializeOwned,
41{
42    /// Absolute path to the source Markdown file
43    path: PathBuf,
44
45    /// For ignore `T`
46    phantom: PhantomData<T>,
47}
48
49/// Errors for [`NoteOnDisk`]
50#[derive(Debug, Error)]
51pub enum Error {
52    /// I/O operation failed (file reading, directory traversal, etc.)
53    #[error("IO error: {0}")]
54    IO(#[from] std::io::Error),
55
56    /// Invalid frontmatter format detected
57    ///
58    /// Occurs when:
59    /// - Frontmatter delimiters are incomplete (`---` missing)
60    /// - Content between delimiters is empty
61    ///
62    /// # Example
63    /// Parsing a file with malformed frontmatter:
64    /// ```text
65    /// ---
66    /// incomplete yaml
67    /// // Missing closing ---
68    /// ```
69    #[error("Invalid frontmatter format")]
70    InvalidFormat(#[from] parser::Error),
71
72    /// YAML parsing error in frontmatter properties
73    ///
74    /// # Example
75    /// Parsing invalid YAML syntax:
76    /// ```text
77    /// ---
78    /// key: @invalid_value
79    /// ---
80    /// ```
81    #[error("YAML parsing error: {0}")]
82    Yaml(#[from] serde_yml::Error),
83
84    /// Expected a file path
85    ///
86    /// # Example
87    /// ```no_run
88    /// use obsidian_parser::prelude::*;
89    ///
90    /// // Will fail if passed a directory path
91    /// NoteOnDisk::from_file_default("/home/test");
92    /// ```
93    #[error("Path: `{0}` is not a directory")]
94    IsNotFile(PathBuf),
95}
96
97impl<T> Note for NoteOnDisk<T>
98where
99    T: DeserializeOwned + Clone,
100{
101    type Properties = T;
102    type Error = self::Error;
103
104    /// Parses YAML frontmatter directly from disk
105    ///
106    /// # Errors
107    /// - [`Error::Yaml`] if properties can't be deserialized
108    /// - [`Error::IsNotFile`] If file doesn't exist
109    /// - [`Error::IO`] on filesystem error
110    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(path = %self.path.display())))]
111    fn properties(&self) -> Result<Option<Cow<'_, T>>, Error> {
112        #[cfg(feature = "tracing")]
113        tracing::trace!("Get properties from file");
114
115        let data = std::fs::read(&self.path)?;
116
117        // SAFETY: Notes files in Obsidian (`*.md`) ensure that the file is encoded in UTF-8
118        let raw_text = unsafe { String::from_utf8_unchecked(data) };
119
120        let result = match parse_note(&raw_text)? {
121            ResultParse::WithProperties {
122                content: _,
123                properties,
124            } => {
125                #[cfg(feature = "tracing")]
126                tracing::trace!("Frontmatter detected, parsing properties");
127
128                Some(Cow::Owned(serde_yml::from_str(properties)?))
129            }
130            ResultParse::WithoutProperties => {
131                #[cfg(feature = "tracing")]
132                tracing::trace!("No frontmatter found, storing raw content");
133
134                None
135            }
136        };
137
138        Ok(result)
139    }
140
141    /// Returns the note's content body (without frontmatter)
142    ///
143    /// # Errors
144    /// - [`Error::IO`] on filesystem error
145    ///
146    /// # Performance
147    /// Performs disk read on every call. Suitable for:
148    /// - Single-pass processing (link extraction, analysis)
149    /// - Large files where in-memory storage is prohibitive
150    ///
151    /// For repeated access, consider caching or [`NoteInMemory`](crate::note::note_in_memory::NoteInMemory).
152    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(path = %self.path.display())))]
153    fn content(&self) -> Result<Cow<'_, str>, Error> {
154        #[cfg(feature = "tracing")]
155        tracing::trace!("Get content from file");
156
157        let data = std::fs::read(&self.path)?;
158
159        // SAFETY: Notes files in Obsidian (`*.md`) ensure that the file is encoded in UTF-8
160        let raw_text = unsafe { String::from_utf8_unchecked(data) };
161
162        let result = match parse_note(&raw_text)? {
163            ResultParse::WithProperties {
164                content,
165                properties: _,
166            } => {
167                #[cfg(feature = "tracing")]
168                tracing::trace!("Frontmatter detected, parsing properties");
169
170                content.to_string()
171            }
172            ResultParse::WithoutProperties => {
173                #[cfg(feature = "tracing")]
174                tracing::trace!("No frontmatter found, storing raw content");
175
176                raw_text
177            }
178        };
179
180        Ok(Cow::Owned(result))
181    }
182
183    /// Get path to note
184    #[inline]
185    fn path(&self) -> Option<Cow<'_, Path>> {
186        Some(Cow::Borrowed(&self.path))
187    }
188}
189
190impl<T> NoteOnDisk<T>
191where
192    T: DeserializeOwned + Clone,
193{
194    /// Set path to note
195    #[inline]
196    pub fn set_path(&mut self, path: PathBuf) {
197        self.path = path;
198    }
199}
200
201#[cfg(not(target_family = "wasm"))]
202impl<T> crate::prelude::NoteFromFile for NoteOnDisk<T>
203where
204    T: DeserializeOwned + Clone,
205{
206    /// Creates instance from path
207    fn from_file(path: impl AsRef<Path>) -> Result<Self, Error> {
208        let path = path.as_ref().to_path_buf();
209
210        if !path.is_file() {
211            return Err(Error::IsNotFile(path));
212        }
213
214        Ok(Self {
215            path,
216            phantom: PhantomData,
217        })
218    }
219}
220
221#[cfg(test)]
222mod tests {
223    use super::*;
224    use crate::note::NoteDefault;
225    use crate::note::impl_tests::impl_test_for_note;
226    use crate::note::note_aliases::tests::{from_file_have_aliases, from_file_have_not_aliases};
227    use crate::note::note_is_todo::tests::{from_file_is_not_todo, from_file_is_todo};
228    use crate::note::note_read::tests::{from_file, from_file_with_unicode};
229    use crate::note::note_write::tests::impl_all_tests_flush;
230    use std::io::Write;
231    use tempfile::NamedTempFile;
232
233    impl_all_tests_flush!(NoteOnDisk);
234    impl_test_for_note!(impl_from_file, from_file, NoteOnDisk);
235
236    impl_test_for_note!(
237        impl_from_file_with_unicode,
238        from_file_with_unicode,
239        NoteOnDisk
240    );
241
242    impl_test_for_note!(impl_from_file_is_todo, from_file_is_todo, NoteOnDisk);
243    impl_test_for_note!(
244        impl_from_file_is_not_todo,
245        from_file_is_not_todo,
246        NoteOnDisk
247    );
248
249    impl_test_for_note!(
250        impl_from_file_have_aliases,
251        from_file_have_aliases,
252        NoteOnDisk
253    );
254    impl_test_for_note!(
255        impl_from_file_have_not_aliases,
256        from_file_have_not_aliases,
257        NoteOnDisk
258    );
259
260    #[cfg_attr(feature = "tracing", tracing_test::traced_test)]
261    #[test]
262    #[should_panic]
263    fn use_from_file_with_path_not_file() {
264        let temp_dir = tempfile::tempdir().unwrap();
265
266        NoteOnDisk::from_file_default(temp_dir.path()).unwrap();
267    }
268
269    #[cfg_attr(feature = "tracing", tracing_test::traced_test)]
270    #[test]
271    fn get_path() {
272        let test_file = NamedTempFile::new().unwrap();
273        let file = NoteOnDisk::from_file_default(test_file.path()).unwrap();
274
275        assert_eq!(file.path().unwrap(), test_file.path());
276        assert_eq!(file.path, test_file.path());
277    }
278
279    #[cfg_attr(feature = "tracing", tracing_test::traced_test)]
280    #[test]
281    fn get_content() {
282        let test_data = "DATA";
283        let mut test_file = NamedTempFile::new().unwrap();
284        test_file.write_all(test_data.as_bytes()).unwrap();
285
286        let file = NoteOnDisk::from_file_default(test_file.path()).unwrap();
287        assert_eq!(file.content().unwrap(), test_data);
288    }
289
290    #[cfg_attr(feature = "tracing", tracing_test::traced_test)]
291    #[test]
292    fn get_properties() {
293        let test_data = "---\ntime: now\n---\nDATA";
294        let mut test_file = NamedTempFile::new().unwrap();
295        test_file.write_all(test_data.as_bytes()).unwrap();
296
297        let file = NoteOnDisk::from_file_default(test_file.path()).unwrap();
298        let properties = file.properties().unwrap().unwrap();
299
300        assert_eq!(file.content().unwrap(), "DATA");
301        assert_eq!(properties["time"], "now");
302    }
303}