obsidian_parser/obfile/
obfile_on_disk.rs

1//! On-disk representation of an Obsidian note file
2
3use crate::error::Error;
4use crate::obfile::{DefaultProperties, ObFile, ResultParse, parse_obfile};
5use serde::de::DeserializeOwned;
6use std::borrow::Cow;
7use std::marker::PhantomData;
8use std::path::Path;
9use std::path::PathBuf;
10
11/// On-disk representation of an Obsidian note file
12///
13/// Optimized for vault operations where:
14/// 1. Memory efficiency is critical (large vaults)
15/// 2. Storage is fast (SSD/NVMe)
16/// 3. Content is accessed infrequently
17///
18/// # Tradeoffs vs `ObFileInMemory`
19/// | Characteristic       | [`ObFileOnDisk`]        | [`ObFileInMemory`]          |
20/// |----------------------|-------------------------|-----------------------------|
21/// | Memory usage         | **Minimal** (~24 bytes) | High (content + properties) |
22/// | File access          | On-demand               | Preloaded                   |
23/// | Best for             | SSD-based vaults        | RAM-heavy workflows         |
24/// | Content access cost  | Disk read               | Zero cost                   |
25///
26/// # Recommendation
27/// Prefer `ObFileOnDisk` for vault operations on modern hardware. The combination of
28/// SSD speeds and Rust's efficient I/O makes this implementation ideal for:
29/// - Large vaults (1000+ files)
30/// - Graph processing
31///
32/// # Warning
33/// Requires **persistent file access** throughout the object's lifetime
34///
35/// [`ObFileInMemory`]: crate::obfile::obfile_in_memory::ObFileInMemory
36#[derive(Debug, Default, PartialEq, Eq, Clone)]
37pub struct ObFileOnDisk<T = DefaultProperties>
38where
39    T: DeserializeOwned + Clone,
40{
41    /// Absolute path to the source Markdown file
42    path: PathBuf,
43
44    phantom: PhantomData<T>,
45}
46
47impl<T: DeserializeOwned + Clone> ObFile<T> for ObFileOnDisk<T> {
48    /// Returns the note's content body (without frontmatter)
49    ///
50    /// # Errors
51    /// - If file doesn't exist
52    /// - On filesystem errors
53    ///
54    /// # Performance
55    /// Performs disk read on every call. Suitable for:
56    /// - Single-pass processing (link extraction, analysis)
57    /// - Large files where in-memory storage is prohibitive
58    ///
59    /// For repeated access, consider caching or [`ObFileInMemory`](crate::obfile::obfile_in_memory::ObFileInMemory).
60    fn content(&self) -> Result<Cow<'_, str>, Error> {
61        let data = std::fs::read(&self.path)?;
62
63        // SAFETY: Notes files in Obsidian (`*.md`) ensure that the file is encoded in UTF-8
64        let raw_text = unsafe { String::from_utf8_unchecked(data) };
65
66        let result = match parse_obfile(&raw_text)? {
67            ResultParse::WithProperties {
68                content,
69                properties: _,
70            } => {
71                #[cfg(feature = "logging")]
72                log::trace!("Frontmatter detected, parsing properties");
73
74                content.to_string()
75            }
76            ResultParse::WithoutProperties => {
77                #[cfg(feature = "logging")]
78                log::trace!("No frontmatter found, storing raw content");
79
80                raw_text
81            }
82        };
83
84        Ok(Cow::Owned(result))
85    }
86
87    /// Parses YAML frontmatter directly from disk
88    ///
89    /// # Errors
90    /// - If properties can't be deserialized
91    /// - If file doesn't exist
92    /// - On filesystem errors
93    fn properties(&self) -> Result<Option<Cow<'_, T>>, Error> {
94        let data = std::fs::read(&self.path)?;
95
96        // SAFETY: Notes files in Obsidian (`*.md`) ensure that the file is encoded in UTF-8
97        let raw_text = unsafe { String::from_utf8_unchecked(data) };
98
99        let result = match parse_obfile(&raw_text)? {
100            ResultParse::WithProperties {
101                content: _,
102                properties,
103            } => {
104                #[cfg(feature = "logging")]
105                log::trace!("Frontmatter detected, parsing properties");
106
107                Some(Cow::Owned(serde_yml::from_str(properties)?))
108            }
109            ResultParse::WithoutProperties => {
110                #[cfg(feature = "logging")]
111                log::trace!("No frontmatter found, storing raw content");
112
113                None
114            }
115        };
116
117        Ok(result)
118    }
119
120    #[inline]
121    fn path(&self) -> Option<Cow<'_, Path>> {
122        Some(Cow::Borrowed(&self.path))
123    }
124
125    /// Creates instance from text (requires path!)
126    ///
127    /// Dont use this function. Use `from_file`
128    fn from_string<P: AsRef<std::path::Path>>(
129        _raw_text: &str,
130        path: Option<P>,
131    ) -> Result<Self, Error> {
132        let path_buf = path.expect("Path is required").as_ref().to_path_buf();
133
134        Self::from_file(path_buf)
135    }
136
137    /// Creates instance from path
138    fn from_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self, Error> {
139        let path_buf = path.as_ref().to_path_buf();
140
141        if !path_buf.is_file() {
142            return Err(Error::IsNotFile(path_buf));
143        }
144
145        Ok(Self {
146            path: path_buf,
147            phantom: PhantomData,
148        })
149    }
150}
151
152#[cfg(test)]
153mod tests {
154    use super::*;
155    use crate::obfile::ObFileDefault;
156    use crate::obfile::impl_tests::{from_file, from_file_with_unicode, impl_test_for_obfile};
157    use crate::test_utils::init_test_logger;
158    use std::io::Write;
159    use tempfile::NamedTempFile;
160
161    impl_test_for_obfile!(impl_from_file, from_file, ObFileOnDisk);
162
163    impl_test_for_obfile!(
164        impl_from_file_with_unicode,
165        from_file_with_unicode,
166        ObFileOnDisk
167    );
168
169    #[test]
170    #[should_panic]
171    fn use_from_string_without_path() {
172        init_test_logger();
173        ObFileOnDisk::from_string_default("", None::<&str>).unwrap();
174    }
175
176    #[test]
177    #[should_panic]
178    fn use_from_file_with_path_not_file() {
179        init_test_logger();
180        let temp_dir = tempfile::tempdir().unwrap();
181
182        ObFileOnDisk::from_file_default(temp_dir.path()).unwrap();
183    }
184
185    #[test]
186    fn get_path() {
187        init_test_logger();
188        let test_file = NamedTempFile::new().unwrap();
189        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
190
191        assert_eq!(file.path().unwrap(), test_file.path());
192        assert_eq!(file.path, test_file.path());
193    }
194
195    #[test]
196    fn get_content() {
197        init_test_logger();
198        let test_data = "DATA";
199        let mut test_file = NamedTempFile::new().unwrap();
200        test_file.write_all(test_data.as_bytes()).unwrap();
201
202        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
203        assert_eq!(file.content().unwrap(), test_data);
204    }
205
206    #[test]
207    fn get_properties() {
208        init_test_logger();
209        let test_data = "---\ntime: now\n---\nDATA";
210        let mut test_file = NamedTempFile::new().unwrap();
211        test_file.write_all(test_data.as_bytes()).unwrap();
212
213        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
214        let properties = file.properties().unwrap().unwrap();
215
216        assert_eq!(file.content().unwrap(), "DATA");
217        assert_eq!(properties["time"], "now");
218    }
219}