Skip to main content

obsidian_parser/note/
note_on_disk.rs

1//! On-disk representation of an Obsidian note file
2
3use crate::note::parser::{self, ResultParse, parse_note};
4use crate::note::{DefaultProperties, Note};
5use serde::de::DeserializeOwned;
6use std::borrow::Cow;
7use std::marker::PhantomData;
8use std::path::Path;
9use std::path::PathBuf;
10use thiserror::Error;
11
12/// On-disk representation of an Obsidian note file
13///
14/// Optimized for vault operations where:
15/// 1. Memory efficiency is critical (large vaults)
16/// 2. Storage is fast (SSD/NVMe)
17/// 3. Content is accessed infrequently
18///
19/// # Tradeoffs vs `NoteInMemory`
20/// | Characteristic       | [`NoteOnDisk`]        | [`NoteInMemory`]          |
21/// |----------------------|-------------------------|-----------------------------|
22/// | Memory usage         | **Minimal** (~24 bytes) | High (content + properties) |
23/// | File access          | On-demand               | Preloaded                   |
24/// | Best for             | SSD-based vaults        | RAM-heavy workflows         |
25/// | Content access cost  | Disk read               | Zero cost                   |
26///
27/// # Recommendation
28/// Prefer `NoteOnDisk` for vault operations on modern hardware. The combination of
29/// SSD speeds and Rust's efficient I/O makes this implementation ideal for:
30/// - Large vaults (1000+ files)
31/// - Graph processing
32///
33/// # Warning
34/// Requires **persistent file access** throughout the object's lifetime
35///
36/// [`NoteInMemory`]: crate::note::note_in_memory::NoteInMemory
37#[derive(Debug, Default, PartialEq, Eq, Clone)]
38pub struct NoteOnDisk<T = DefaultProperties>
39where
40    T: Clone + DeserializeOwned,
41{
42    /// Absolute path to the source Markdown file
43    path: PathBuf,
44
45    /// For ignore `T`
46    phantom: PhantomData<T>,
47}
48
49/// Errors for [`NoteOnDisk`]
50#[derive(Debug, Error)]
51pub enum Error {
52    /// I/O operation failed (file reading, directory traversal, etc.)
53    #[error("IO error: {0}")]
54    IO(#[from] std::io::Error),
55
56    /// Invalid frontmatter format detected
57    ///
58    /// Occurs when:
59    /// - Frontmatter delimiters are incomplete (`---` missing)
60    /// - Content between delimiters is empty
61    ///
62    /// # Example
63    /// Parsing a file with malformed frontmatter:
64    /// ```text
65    /// ---
66    /// incomplete yaml
67    /// // Missing closing ---
68    /// ```
69    #[error("Invalid frontmatter format")]
70    InvalidFormat(#[from] parser::Error),
71
72    /// YAML parsing error in frontmatter properties
73    ///
74    /// # Example
75    /// Parsing invalid YAML syntax:
76    /// ```text
77    /// ---
78    /// key: @invalid_value
79    /// ---
80    /// ```
81    #[error("YAML parsing error: {0}")]
82    Yaml(#[from] serde_yml::Error),
83
84    /// Expected a file path
85    ///
86    /// # Example
87    /// ```no_run
88    /// use obsidian_parser::prelude::*;
89    ///
90    /// // Will fail if passed a directory path
91    /// NoteOnDisk::from_file_default("/home/test");
92    /// ```
93    #[error("Path: `{0}` is not a directory")]
94    IsNotFile(PathBuf),
95}
96
97impl<T> Note for NoteOnDisk<T>
98where
99    T: DeserializeOwned + Clone,
100{
101    type Properties = T;
102    type Error = self::Error;
103
104    /// Parses YAML frontmatter directly from disk
105    ///
106    /// # Errors
107    /// - [`Error::Yaml`] if properties can't be deserialized
108    /// - [`Error::IsNotFile`] If file doesn't exist
109    /// - [`Error::IO`] on filesystem error
110    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(path = %self.path.display())))]
111    fn properties(&self) -> Result<Option<Cow<'_, T>>, Error> {
112        #[cfg(feature = "tracing")]
113        tracing::trace!("Get properties from file");
114
115        let raw_text = std::fs::read_to_string(&self.path)?;
116
117        let result = match parse_note(&raw_text)? {
118            ResultParse::WithProperties {
119                content: _,
120                properties,
121            } => {
122                #[cfg(feature = "tracing")]
123                tracing::trace!("Frontmatter detected, parsing properties");
124
125                Some(Cow::Owned(serde_yml::from_str(properties)?))
126            }
127            ResultParse::WithoutProperties => {
128                #[cfg(feature = "tracing")]
129                tracing::trace!("No frontmatter found, storing raw content");
130
131                None
132            }
133        };
134
135        Ok(result)
136    }
137
138    /// Returns the note's content body (without frontmatter)
139    ///
140    /// # Errors
141    /// - [`Error::IO`] on filesystem error
142    ///
143    /// # Performance
144    /// Performs disk read on every call. Suitable for:
145    /// - Single-pass processing (link extraction, analysis)
146    /// - Large files where in-memory storage is prohibitive
147    ///
148    /// For repeated access, consider caching or [`NoteInMemory`](crate::note::note_in_memory::NoteInMemory).
149    #[cfg_attr(feature = "tracing", tracing::instrument(skip(self), fields(path = %self.path.display())))]
150    fn content(&self) -> Result<Cow<'_, str>, Error> {
151        #[cfg(feature = "tracing")]
152        tracing::trace!("Get content from file");
153
154        let raw_text = std::fs::read_to_string(&self.path)?;
155
156        let result = match parse_note(&raw_text)? {
157            ResultParse::WithProperties {
158                content,
159                properties: _,
160            } => {
161                #[cfg(feature = "tracing")]
162                tracing::trace!("Frontmatter detected, parsing properties");
163
164                content.to_string()
165            }
166            ResultParse::WithoutProperties => {
167                #[cfg(feature = "tracing")]
168                tracing::trace!("No frontmatter found, storing raw content");
169
170                raw_text
171            }
172        };
173
174        Ok(Cow::Owned(result))
175    }
176
177    /// Get path to note
178    #[inline]
179    fn path(&self) -> Option<Cow<'_, Path>> {
180        Some(Cow::Borrowed(&self.path))
181    }
182}
183
184impl<T> NoteOnDisk<T>
185where
186    T: DeserializeOwned + Clone,
187{
188    /// Set path to note
189    #[inline]
190    pub fn set_path(&mut self, path: PathBuf) {
191        self.path = path;
192    }
193}
194
195#[cfg(not(target_family = "wasm"))]
196impl<T> crate::prelude::NoteFromFile for NoteOnDisk<T>
197where
198    T: DeserializeOwned + Clone,
199{
200    /// Creates instance from path
201    fn from_file(path: impl AsRef<Path>) -> Result<Self, Error> {
202        let path = path.as_ref().to_path_buf();
203
204        if !path.is_file() {
205            return Err(Error::IsNotFile(path));
206        }
207
208        Ok(Self {
209            path,
210            phantom: PhantomData,
211        })
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218    use crate::note::NoteDefault;
219    use crate::note::impl_tests::impl_test_for_note;
220    use crate::note::note_aliases::tests::{from_file_have_aliases, from_file_have_not_aliases};
221    use crate::note::note_is_todo::tests::{from_file_is_not_todo, from_file_is_todo};
222    use crate::note::note_read::tests::{from_file, from_file_with_unicode};
223    use crate::note::note_tags::tests::from_file_tags;
224    use crate::note::note_write::tests::impl_all_tests_flush;
225    use std::io::Write;
226    use tempfile::NamedTempFile;
227
228    impl_all_tests_flush!(NoteOnDisk);
229    impl_test_for_note!(impl_from_file, from_file, NoteOnDisk);
230    impl_test_for_note!(impl_from_file_tags, from_file_tags, NoteOnDisk);
231
232    impl_test_for_note!(
233        impl_from_file_with_unicode,
234        from_file_with_unicode,
235        NoteOnDisk
236    );
237
238    impl_test_for_note!(impl_from_file_is_todo, from_file_is_todo, NoteOnDisk);
239    impl_test_for_note!(
240        impl_from_file_is_not_todo,
241        from_file_is_not_todo,
242        NoteOnDisk
243    );
244
245    impl_test_for_note!(
246        impl_from_file_have_aliases,
247        from_file_have_aliases,
248        NoteOnDisk
249    );
250    impl_test_for_note!(
251        impl_from_file_have_not_aliases,
252        from_file_have_not_aliases,
253        NoteOnDisk
254    );
255
256    #[cfg_attr(feature = "tracing", tracing_test::traced_test)]
257    #[test]
258    #[should_panic]
259    fn use_from_file_with_path_not_file() {
260        let temp_dir = tempfile::tempdir().unwrap();
261
262        NoteOnDisk::from_file_default(temp_dir.path()).unwrap();
263    }
264
265    #[cfg_attr(feature = "tracing", tracing_test::traced_test)]
266    #[test]
267    fn get_path() {
268        let test_file = NamedTempFile::new().unwrap();
269        let file = NoteOnDisk::from_file_default(test_file.path()).unwrap();
270
271        assert_eq!(file.path().unwrap(), test_file.path());
272        assert_eq!(file.path, test_file.path());
273    }
274
275    #[cfg_attr(feature = "tracing", tracing_test::traced_test)]
276    #[test]
277    fn get_content() {
278        let test_data = "DATA";
279        let mut test_file = NamedTempFile::new().unwrap();
280        test_file.write_all(test_data.as_bytes()).unwrap();
281
282        let file = NoteOnDisk::from_file_default(test_file.path()).unwrap();
283        assert_eq!(file.content().unwrap(), test_data);
284    }
285
286    #[cfg_attr(feature = "tracing", tracing_test::traced_test)]
287    #[test]
288    fn get_properties() {
289        let test_data = "---\ntime: now\n---\nDATA";
290        let mut test_file = NamedTempFile::new().unwrap();
291        test_file.write_all(test_data.as_bytes()).unwrap();
292
293        let file = NoteOnDisk::from_file_default(test_file.path()).unwrap();
294        let properties = file.properties().unwrap().unwrap();
295
296        assert_eq!(file.content().unwrap(), "DATA");
297        assert_eq!(properties["time"], "now");
298    }
299}