obsidian_parser/obfile/
obfile_on_disk.rs

1use crate::error::Error;
2use crate::obfile::{ObFile, ResultParse, parse_obfile};
3use serde::de::DeserializeOwned;
4use std::marker::PhantomData;
5use std::{collections::HashMap, path::PathBuf};
6
7/// On-disk representation of an Obsidian note file
8///
9/// Optimized for vault operations where:
10/// 1. Memory efficiency is critical (large vaults)
11/// 2. Storage is fast (SSD/NVMe)
12/// 3. Content is accessed infrequently
13///
14/// # Tradeoffs vs `ObFileInMemory`
15/// | Characteristic       | `ObFileOnDisk`          | `ObFileInMemory`            |
16/// |----------------------|-------------------------|-----------------------------|
17/// | Memory usage         | **Minimal** (~24 bytes) | High (content + properties) |
18/// | File access          | On-demand               | Preloaded                   |
19/// | Best for             | SSD-based vaults        | RAM-heavy workflows         |
20/// | Content access cost  | Disk read               | Zero cost                   |
21///
22/// # Recommendation
23/// Prefer `ObFileOnDisk` for vault operations on modern hardware. The combination of
24/// SSD speeds and Rust's efficient I/O makes this implementation ideal for:
25/// - Large vaults (1000+ files)
26/// - Graph processing
27///
28/// # Warning
29/// Requires **persistent file access** throughout the object's lifetime. If files are moved/deleted,
30/// calling `content()` or `properties()` will **panic**
31#[derive(Debug, Default, PartialEq, Clone)]
32pub struct ObFileOnDisk<T = HashMap<String, serde_yml::Value>>
33where
34    T: DeserializeOwned + Default + Clone + Send,
35{
36    /// Absolute path to the source Markdown file
37    pub path: PathBuf,
38
39    phantom: PhantomData<T>,
40}
41
42impl<T: DeserializeOwned + Default + Clone + Send> ObFile<T> for ObFileOnDisk<T> {
43    /// Returns the note's content body (without frontmatter)
44    ///
45    /// # Panics
46    /// - If file doesn't exist
47    /// - On filesystem errors
48    /// - If file contains invalid UTF-8
49    ///
50    /// # Performance
51    /// Performs disk read on every call. Suitable for:
52    /// - Single-pass processing (link extraction, analysis)
53    /// - Large files where in-memory storage is prohibitive
54    ///
55    /// For repeated access, consider caching or `ObFileInMemory`.
56    fn content(&self) -> String {
57        let raw_text = std::fs::read_to_string(&self.path).unwrap();
58
59        match parse_obfile(&raw_text).unwrap() {
60            ResultParse::WithProperties {
61                content,
62                properties: _,
63            } => {
64                #[cfg(feature = "logging")]
65                log::trace!("Frontmatter detected, parsing properties");
66
67                content.to_string()
68            }
69            ResultParse::WithoutProperties => {
70                #[cfg(feature = "logging")]
71                log::trace!("No frontmatter found, storing raw content");
72
73                raw_text
74            }
75        }
76    }
77
78    /// Parses YAML frontmatter directly from disk
79    ///
80    /// # Panics
81    /// - If properties can't be deserialized
82    fn properties(&self) -> T {
83        let raw_text = std::fs::read_to_string(&self.path).unwrap();
84
85        match parse_obfile(&raw_text).unwrap() {
86            ResultParse::WithProperties {
87                content: _,
88                properties,
89            } => {
90                #[cfg(feature = "logging")]
91                log::trace!("Frontmatter detected, parsing properties");
92
93                serde_yml::from_str(properties).unwrap()
94            }
95            ResultParse::WithoutProperties => {
96                #[cfg(feature = "logging")]
97                log::trace!("No frontmatter found, storing raw content");
98
99                T::default()
100            }
101        }
102    }
103
104    fn path(&self) -> Option<PathBuf> {
105        Some(self.path.clone())
106    }
107
108    /// Creates instance from text (requires path!)
109    ///
110    /// Dont use this function. Use `from_file`
111    fn from_string<P: AsRef<std::path::Path>>(
112        _raw_text: &str,
113        path: Option<P>,
114    ) -> Result<Self, Error> {
115        let path_buf = path.expect("Path is required").as_ref().to_path_buf();
116
117        Self::from_file(path_buf)
118    }
119
120    /// Creates instance from path
121    fn from_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self, Error> {
122        let path_buf = path.as_ref().to_path_buf();
123
124        if !path_buf.is_file() {
125            return Err(Error::IsNotFile(path_buf));
126        }
127
128        Ok(Self {
129            path: path_buf,
130            phantom: PhantomData,
131        })
132    }
133}
134
135#[cfg(test)]
136mod tests {
137    use super::*;
138    use crate::obfile::ObFileDefault;
139    use crate::obfile::impl_tests::{from_file, from_file_with_unicode, impl_test_for_obfile};
140    use crate::test_utils::init_test_logger;
141    use std::io::Write;
142    use tempfile::NamedTempFile;
143
144    impl_test_for_obfile!(impl_from_file, from_file, ObFileOnDisk);
145
146    impl_test_for_obfile!(
147        impl_from_file_with_unicode,
148        from_file_with_unicode,
149        ObFileOnDisk
150    );
151
152    #[test]
153    #[should_panic]
154    fn use_from_string_without_path() {
155        init_test_logger();
156        ObFileOnDisk::from_string_default("", None::<&str>).unwrap();
157    }
158
159    #[test]
160    #[should_panic]
161    fn use_from_file_with_path_not_file() {
162        init_test_logger();
163        let temp_dir = tempfile::tempdir().unwrap();
164
165        ObFileOnDisk::from_file_default(temp_dir.path()).unwrap();
166    }
167
168    #[test]
169    fn get_path() {
170        init_test_logger();
171        let test_file = NamedTempFile::new().unwrap();
172        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
173
174        assert_eq!(file.path().unwrap(), test_file.path());
175        assert_eq!(file.path, test_file.path());
176    }
177
178    #[test]
179    fn get_content() {
180        init_test_logger();
181        let test_data = "DATA";
182        let mut test_file = NamedTempFile::new().unwrap();
183        test_file.write_all(test_data.as_bytes()).unwrap();
184
185        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
186        assert_eq!(file.content(), test_data);
187    }
188
189    #[test]
190    fn get_properties() {
191        init_test_logger();
192        let test_data = "---\ntime: now\n---\nDATA";
193        let mut test_file = NamedTempFile::new().unwrap();
194        test_file.write_all(test_data.as_bytes()).unwrap();
195
196        let file = ObFileOnDisk::from_file_default(test_file.path()).unwrap();
197        assert_eq!(file.content(), "DATA");
198        assert_eq!(file.properties()["time"], "now");
199    }
200}