obsidian_parser/obfile/
obfile_on_disk.rs

1//! On-disk representation of an Obsidian note file
2
3use crate::error::Error;
4use crate::obfile::{DefaultProperties, ObFile, ObFileFlush, ResultParse, parse_obfile};
5use serde::Serialize;
6use serde::de::DeserializeOwned;
7use std::borrow::Cow;
8use std::marker::PhantomData;
9use std::path::Path;
10use std::path::PathBuf;
11
12/// On-disk representation of an Obsidian note file
13///
14/// Optimized for vault operations where:
15/// 1. Memory efficiency is critical (large vaults)
16/// 2. Storage is fast (SSD/NVMe)
17/// 3. Content is accessed infrequently
18///
19/// # Tradeoffs vs `ObFileInMemory`
20/// | Characteristic       | [`ObFileOnDisk`]        | [`ObFileInMemory`]          |
21/// |----------------------|-------------------------|-----------------------------|
22/// | Memory usage         | **Minimal** (~24 bytes) | High (content + properties) |
23/// | File access          | On-demand               | Preloaded                   |
24/// | Best for             | SSD-based vaults        | RAM-heavy workflows         |
25/// | Content access cost  | Disk read               | Zero cost                   |
26///
27/// # Recommendation
28/// Prefer `ObFileOnDisk` for vault operations on modern hardware. The combination of
29/// SSD speeds and Rust's efficient I/O makes this implementation ideal for:
30/// - Large vaults (1000+ files)
31/// - Graph processing
32///
33/// # Warning
34/// Requires **persistent file access** throughout the object's lifetime
35///
36/// [`ObFileInMemory`]: crate::obfile::obfile_in_memory::ObFileInMemory
37#[derive(Debug, Default, PartialEq, Eq, Clone)]
38pub struct ObFileOnDisk<T = DefaultProperties>
39where
40    T: DeserializeOwned + Clone,
41{
42    /// Absolute path to the source Markdown file
43    path: PathBuf,
44
45    phantom: PhantomData<T>,
46}
47
48impl<T: DeserializeOwned + Clone> ObFile<T> for ObFileOnDisk<T> {
49    /// Returns the note's content body (without frontmatter)
50    ///
51    /// # Errors
52    /// - If file doesn't exist
53    /// - On filesystem errors
54    ///
55    /// # Performance
56    /// Performs disk read on every call. Suitable for:
57    /// - Single-pass processing (link extraction, analysis)
58    /// - Large files where in-memory storage is prohibitive
59    ///
60    /// For repeated access, consider caching or [`ObFileInMemory`](crate::obfile::obfile_in_memory::ObFileInMemory).
61    fn content(&self) -> Result<Cow<'_, str>, Error> {
62        let data = std::fs::read(&self.path)?;
63
64        // SAFETY: Notes files in Obsidian (`*.md`) ensure that the file is encoded in UTF-8
65        let raw_text = unsafe { String::from_utf8_unchecked(data) };
66
67        let result = match parse_obfile(&raw_text)? {
68            ResultParse::WithProperties {
69                content,
70                properties: _,
71            } => {
72                #[cfg(feature = "logging")]
73                log::trace!("Frontmatter detected, parsing properties");
74
75                content.to_string()
76            }
77            ResultParse::WithoutProperties => {
78                #[cfg(feature = "logging")]
79                log::trace!("No frontmatter found, storing raw content");
80
81                raw_text
82            }
83        };
84
85        Ok(Cow::Owned(result))
86    }
87
88    /// Parses YAML frontmatter directly from disk
89    ///
90    /// # Errors
91    /// - If properties can't be deserialized
92    /// - If file doesn't exist
93    /// - On filesystem errors
94    fn properties(&self) -> Result<Option<Cow<'_, T>>, Error> {
95        let data = std::fs::read(&self.path)?;
96
97        // SAFETY: Notes files in Obsidian (`*.md`) ensure that the file is encoded in UTF-8
98        let raw_text = unsafe { String::from_utf8_unchecked(data) };
99
100        let result = match parse_obfile(&raw_text)? {
101            ResultParse::WithProperties {
102                content: _,
103                properties,
104            } => {
105                #[cfg(feature = "logging")]
106                log::trace!("Frontmatter detected, parsing properties");
107
108                Some(Cow::Owned(serde_yml::from_str(properties)?))
109            }
110            ResultParse::WithoutProperties => {
111                #[cfg(feature = "logging")]
112                log::trace!("No frontmatter found, storing raw content");
113
114                None
115            }
116        };
117
118        Ok(result)
119    }
120
121    #[inline]
122    fn path(&self) -> Option<Cow<'_, Path>> {
123        Some(Cow::Borrowed(&self.path))
124    }
125
126    /// Creates instance from text (requires path!)
127    ///
128    /// Dont use this function. Use `from_file`
129    fn from_string<P: AsRef<std::path::Path>>(
130        _raw_text: &str,
131        path: Option<P>,
132    ) -> Result<Self, Error> {
133        let path_buf = path.expect("Path is required").as_ref().to_path_buf();
134
135        Self::from_file(path_buf)
136    }
137
138    /// Creates instance from path
139    fn from_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self, Error> {
140        let path_buf = path.as_ref().to_path_buf();
141
142        if !path_buf.is_file() {
143            return Err(Error::IsNotFile(path_buf));
144        }
145
146        Ok(Self {
147            path: path_buf,
148            phantom: PhantomData,
149        })
150    }
151}
152
153impl<T: DeserializeOwned + Serialize + Clone> ObFileFlush<T> for ObFileOnDisk<T> {}
154
155#[cfg(test)]
156mod tests {
157    use super::*;
158    use crate::obfile::ObFileDefault;
159    use crate::obfile::impl_tests::{
160        from_file, from_file_with_unicode, impl_all_tests_flush, impl_test_for_obfile,
161    };
162    use crate::test_utils::init_test_logger;
163    use std::io::Write;
164    use tempfile::NamedTempFile;
165
166    impl_all_tests_flush!(ObFileOnDisk);
167    impl_test_for_obfile!(impl_from_file, from_file, ObFileOnDisk);
168
169    impl_test_for_obfile!(
170        impl_from_file_with_unicode,
171        from_file_with_unicode,
172        ObFileOnDisk
173    );
174
175    #[test]
176    #[should_panic]
177    fn use_from_string_without_path() {
178        init_test_logger();
179        ObFileOnDisk::from_string_default("", None::<&str>).unwrap();
180    }
181
182    #[test]
183    #[should_panic]
184    fn use_from_file_with_path_not_file() {
185        init_test_logger();
186        let temp_dir = tempfile::tempdir().unwrap();
187
188        ObFileOnDisk::from_file_default(temp_dir.path()).unwrap();
189    }
190
191    #[test]
192    fn get_path() {
193        init_test_logger();
194        let test_file = NamedTempFile::new().unwrap();
195        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
196
197        assert_eq!(file.path().unwrap(), test_file.path());
198        assert_eq!(file.path, test_file.path());
199    }
200
201    #[test]
202    fn get_content() {
203        init_test_logger();
204        let test_data = "DATA";
205        let mut test_file = NamedTempFile::new().unwrap();
206        test_file.write_all(test_data.as_bytes()).unwrap();
207
208        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
209        assert_eq!(file.content().unwrap(), test_data);
210    }
211
212    #[test]
213    fn get_properties() {
214        init_test_logger();
215        let test_data = "---\ntime: now\n---\nDATA";
216        let mut test_file = NamedTempFile::new().unwrap();
217        test_file.write_all(test_data.as_bytes()).unwrap();
218
219        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
220        let properties = file.properties().unwrap().unwrap();
221
222        assert_eq!(file.content().unwrap(), "DATA");
223        assert_eq!(properties["time"], "now");
224    }
225}