obsidian_parser/obfile/
obfile_on_disk.rs

1//! On-disk representation of an Obsidian note file
2
3use crate::error::Error;
4use crate::obfile::{ObFile, ResultParse, parse_obfile};
5use serde::de::DeserializeOwned;
6use std::marker::PhantomData;
7use std::{collections::HashMap, path::PathBuf};
8
9/// On-disk representation of an Obsidian note file
10///
11/// Optimized for vault operations where:
12/// 1. Memory efficiency is critical (large vaults)
13/// 2. Storage is fast (SSD/NVMe)
14/// 3. Content is accessed infrequently
15///
16/// # Tradeoffs vs `ObFileInMemory`
17/// | Characteristic       | [`ObFileOnDisk`]        | [`ObFileInMemory`]          |
18/// |----------------------|-------------------------|-----------------------------|
19/// | Memory usage         | **Minimal** (~24 bytes) | High (content + properties) |
20/// | File access          | On-demand               | Preloaded                   |
21/// | Best for             | SSD-based vaults        | RAM-heavy workflows         |
22/// | Content access cost  | Disk read               | Zero cost                   |
23///
24/// # Recommendation
25/// Prefer `ObFileOnDisk` for vault operations on modern hardware. The combination of
26/// SSD speeds and Rust's efficient I/O makes this implementation ideal for:
27/// - Large vaults (1000+ files)
28/// - Graph processing
29///
30/// # Warning
31/// Requires **persistent file access** throughout the object's lifetime. If files are moved/deleted,
32/// calling `content()` or `properties()` will **panic**
33///
34/// [`ObFileInMemory`]: crate::obfile::obfile_in_memory::ObFileInMemory
35#[derive(Debug, Default, PartialEq, Eq, Clone)]
36pub struct ObFileOnDisk<T = HashMap<String, serde_yml::Value>>
37where
38    T: DeserializeOwned + Clone,
39{
40    /// Absolute path to the source Markdown file
41    path: PathBuf,
42
43    phantom: PhantomData<T>,
44}
45
46impl<T: DeserializeOwned + Clone> ObFile<T> for ObFileOnDisk<T> {
47    /// Returns the note's content body (without frontmatter)
48    ///
49    /// # Panics
50    /// - If file doesn't exist
51    /// - On filesystem errors
52    /// - If file contains invalid UTF-8
53    ///
54    /// # Performance
55    /// Performs disk read on every call. Suitable for:
56    /// - Single-pass processing (link extraction, analysis)
57    /// - Large files where in-memory storage is prohibitive
58    ///
59    /// For repeated access, consider caching or `ObFileInMemory`.
60    #[allow(
61        clippy::unwrap_used,
62        reason = "The documentation states that panics are possible"
63    )]
64    fn content(&self) -> String {
65        let data = std::fs::read(&self.path).unwrap();
66        let raw_text = String::from_utf8(data).unwrap();
67
68        match parse_obfile(&raw_text).unwrap() {
69            ResultParse::WithProperties {
70                content,
71                properties: _,
72            } => {
73                #[cfg(feature = "logging")]
74                log::trace!("Frontmatter detected, parsing properties");
75
76                content.to_string()
77            }
78            ResultParse::WithoutProperties => {
79                #[cfg(feature = "logging")]
80                log::trace!("No frontmatter found, storing raw content");
81
82                raw_text
83            }
84        }
85    }
86
87    /// Parses YAML frontmatter directly from disk
88    ///
89    /// # Panics
90    /// - If properties can't be deserialized
91    #[allow(
92        clippy::unwrap_used,
93        reason = "The documentation states that panics are possible"
94    )]
95    fn properties(&self) -> Option<T> {
96        let data = std::fs::read(&self.path).unwrap();
97        let raw_text = String::from_utf8(data).unwrap();
98
99        match parse_obfile(&raw_text).unwrap() {
100            ResultParse::WithProperties {
101                content: _,
102                properties,
103            } => {
104                #[cfg(feature = "logging")]
105                log::trace!("Frontmatter detected, parsing properties");
106
107                Some(serde_yml::from_str(properties).unwrap())
108            }
109            ResultParse::WithoutProperties => {
110                #[cfg(feature = "logging")]
111                log::trace!("No frontmatter found, storing raw content");
112
113                None
114            }
115        }
116    }
117
118    #[inline]
119    fn path(&self) -> Option<PathBuf> {
120        Some(self.path.clone())
121    }
122
123    /// Creates instance from text (requires path!)
124    ///
125    /// Dont use this function. Use `from_file`
126    fn from_string<P: AsRef<std::path::Path>>(
127        _raw_text: &str,
128        path: Option<P>,
129    ) -> Result<Self, Error> {
130        let path_buf = path.expect("Path is required").as_ref().to_path_buf();
131
132        Self::from_file(path_buf)
133    }
134
135    /// Creates instance from path
136    fn from_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self, Error> {
137        let path_buf = path.as_ref().to_path_buf();
138
139        if !path_buf.is_file() {
140            return Err(Error::IsNotFile(path_buf));
141        }
142
143        Ok(Self {
144            path: path_buf,
145            phantom: PhantomData,
146        })
147    }
148}
149
150#[cfg(test)]
151mod tests {
152    use super::*;
153    use crate::obfile::ObFileDefault;
154    use crate::obfile::impl_tests::{from_file, from_file_with_unicode, impl_test_for_obfile};
155    use crate::test_utils::init_test_logger;
156    use std::io::Write;
157    use tempfile::NamedTempFile;
158
159    impl_test_for_obfile!(impl_from_file, from_file, ObFileOnDisk);
160
161    impl_test_for_obfile!(
162        impl_from_file_with_unicode,
163        from_file_with_unicode,
164        ObFileOnDisk
165    );
166
167    #[test]
168    #[should_panic]
169    fn use_from_string_without_path() {
170        init_test_logger();
171        ObFileOnDisk::from_string_default("", None::<&str>).unwrap();
172    }
173
174    #[test]
175    #[should_panic]
176    fn use_from_file_with_path_not_file() {
177        init_test_logger();
178        let temp_dir = tempfile::tempdir().unwrap();
179
180        ObFileOnDisk::from_file_default(temp_dir.path()).unwrap();
181    }
182
183    #[test]
184    fn get_path() {
185        init_test_logger();
186        let test_file = NamedTempFile::new().unwrap();
187        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
188
189        assert_eq!(file.path().unwrap(), test_file.path());
190        assert_eq!(file.path, test_file.path());
191    }
192
193    #[test]
194    fn get_content() {
195        init_test_logger();
196        let test_data = "DATA";
197        let mut test_file = NamedTempFile::new().unwrap();
198        test_file.write_all(test_data.as_bytes()).unwrap();
199
200        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
201        assert_eq!(file.content(), test_data);
202    }
203
204    #[test]
205    fn get_properties() {
206        init_test_logger();
207        let test_data = "---\ntime: now\n---\nDATA";
208        let mut test_file = NamedTempFile::new().unwrap();
209        test_file.write_all(test_data.as_bytes()).unwrap();
210
211        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
212        assert_eq!(file.content(), "DATA");
213        assert_eq!(file.properties().unwrap()["time"], "now");
214    }
215}