obsidian_parser/obfile/
obfile_on_disk.rs

1//! On-disk representation of an Obsidian note file
2
3use crate::error::Error;
4use crate::obfile::{DefaultProperties, ObFile, ResultParse, parse_obfile};
5use serde::de::DeserializeOwned;
6use std::borrow::Cow;
7use std::marker::PhantomData;
8use std::path::Path;
9use std::path::PathBuf;
10
11/// On-disk representation of an Obsidian note file
12///
13/// Optimized for vault operations where:
14/// 1. Memory efficiency is critical (large vaults)
15/// 2. Storage is fast (SSD/NVMe)
16/// 3. Content is accessed infrequently
17///
18/// # Tradeoffs vs `ObFileInMemory`
19/// | Characteristic       | [`ObFileOnDisk`]        | [`ObFileInMemory`]          |
20/// |----------------------|-------------------------|-----------------------------|
21/// | Memory usage         | **Minimal** (~24 bytes) | High (content + properties) |
22/// | File access          | On-demand               | Preloaded                   |
23/// | Best for             | SSD-based vaults        | RAM-heavy workflows         |
24/// | Content access cost  | Disk read               | Zero cost                   |
25///
26/// # Recommendation
27/// Prefer `ObFileOnDisk` for vault operations on modern hardware. The combination of
28/// SSD speeds and Rust's efficient I/O makes this implementation ideal for:
29/// - Large vaults (1000+ files)
30/// - Graph processing
31///
32/// # Warning
33/// Requires **persistent file access** throughout the object's lifetime
34///
35/// [`ObFileInMemory`]: crate::obfile::obfile_in_memory::ObFileInMemory
36#[derive(Debug, Default, PartialEq, Eq, Clone)]
37pub struct ObFileOnDisk<T = DefaultProperties>
38where
39    T: DeserializeOwned + Clone,
40{
41    /// Absolute path to the source Markdown file
42    path: PathBuf,
43
44    phantom: PhantomData<T>,
45}
46
47impl<T: DeserializeOwned + Clone> ObFile<T> for ObFileOnDisk<T> {
48    /// Returns the note's content body (without frontmatter)
49    ///
50    /// # Errors
51    /// - If file doesn't exist
52    /// - On filesystem errors
53    /// - If file contains invalid UTF-8
54    ///
55    /// # Performance
56    /// Performs disk read on every call. Suitable for:
57    /// - Single-pass processing (link extraction, analysis)
58    /// - Large files where in-memory storage is prohibitive
59    ///
60    /// For repeated access, consider caching or `ObFileInMemory`.
61    fn content(&self) -> Result<Cow<'_, str>, Error> {
62        let data = std::fs::read(&self.path)?;
63        let raw_text = unsafe { String::from_utf8_unchecked(data) };
64
65        let result = match parse_obfile(&raw_text)? {
66            ResultParse::WithProperties {
67                content,
68                properties: _,
69            } => {
70                #[cfg(feature = "logging")]
71                log::trace!("Frontmatter detected, parsing properties");
72
73                content.to_string()
74            }
75            ResultParse::WithoutProperties => {
76                #[cfg(feature = "logging")]
77                log::trace!("No frontmatter found, storing raw content");
78
79                raw_text
80            }
81        };
82
83        Ok(Cow::Owned(result))
84    }
85
86    /// Parses YAML frontmatter directly from disk
87    ///
88    /// # Errors
89    /// - If properties can't be deserialized
90    fn properties(&self) -> Result<Option<Cow<'_, T>>, Error> {
91        let data = std::fs::read(&self.path)?;
92        let raw_text = unsafe { String::from_utf8_unchecked(data) };
93
94        let result = match parse_obfile(&raw_text)? {
95            ResultParse::WithProperties {
96                content: _,
97                properties,
98            } => {
99                #[cfg(feature = "logging")]
100                log::trace!("Frontmatter detected, parsing properties");
101
102                Some(serde_yml::from_str(properties)?)
103            }
104            ResultParse::WithoutProperties => {
105                #[cfg(feature = "logging")]
106                log::trace!("No frontmatter found, storing raw content");
107
108                None
109            }
110        };
111
112        Ok(result)
113    }
114
115    #[inline]
116    fn path(&self) -> Option<Cow<'_, Path>> {
117        Some(Cow::Borrowed(&self.path))
118    }
119
120    /// Creates instance from text (requires path!)
121    ///
122    /// Dont use this function. Use `from_file`
123    fn from_string<P: AsRef<std::path::Path>>(
124        _raw_text: &str,
125        path: Option<P>,
126    ) -> Result<Self, Error> {
127        let path_buf = path.expect("Path is required").as_ref().to_path_buf();
128
129        Self::from_file(path_buf)
130    }
131
132    /// Creates instance from path
133    fn from_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self, Error> {
134        let path_buf = path.as_ref().to_path_buf();
135
136        if !path_buf.is_file() {
137            return Err(Error::IsNotFile(path_buf));
138        }
139
140        Ok(Self {
141            path: path_buf,
142            phantom: PhantomData,
143        })
144    }
145}
146
147#[cfg(test)]
148mod tests {
149    use super::*;
150    use crate::obfile::ObFileDefault;
151    use crate::obfile::impl_tests::{from_file, from_file_with_unicode, impl_test_for_obfile};
152    use crate::test_utils::init_test_logger;
153    use std::io::Write;
154    use tempfile::NamedTempFile;
155
156    impl_test_for_obfile!(impl_from_file, from_file, ObFileOnDisk);
157
158    impl_test_for_obfile!(
159        impl_from_file_with_unicode,
160        from_file_with_unicode,
161        ObFileOnDisk
162    );
163
164    #[test]
165    #[should_panic]
166    fn use_from_string_without_path() {
167        init_test_logger();
168        ObFileOnDisk::from_string_default("", None::<&str>).unwrap();
169    }
170
171    #[test]
172    #[should_panic]
173    fn use_from_file_with_path_not_file() {
174        init_test_logger();
175        let temp_dir = tempfile::tempdir().unwrap();
176
177        ObFileOnDisk::from_file_default(temp_dir.path()).unwrap();
178    }
179
180    #[test]
181    fn get_path() {
182        init_test_logger();
183        let test_file = NamedTempFile::new().unwrap();
184        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
185
186        assert_eq!(file.path().unwrap(), test_file.path());
187        assert_eq!(file.path, test_file.path());
188    }
189
190    #[test]
191    fn get_content() {
192        init_test_logger();
193        let test_data = "DATA";
194        let mut test_file = NamedTempFile::new().unwrap();
195        test_file.write_all(test_data.as_bytes()).unwrap();
196
197        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
198        assert_eq!(file.content().unwrap(), test_data);
199    }
200
201    #[test]
202    fn get_properties() {
203        init_test_logger();
204        let test_data = "---\ntime: now\n---\nDATA";
205        let mut test_file = NamedTempFile::new().unwrap();
206        test_file.write_all(test_data.as_bytes()).unwrap();
207
208        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
209        let properties = file.properties().unwrap().unwrap();
210
211        assert_eq!(file.content().unwrap(), "DATA");
212        assert_eq!(properties["time"], "now");
213    }
214}