obsidian_parser/obfile/
mod.rs

1pub mod obfile_in_memory;
2pub mod obfile_on_disk;
3
4use crate::error::Error;
5use regex::{Regex, RegexBuilder};
6use serde::de::DeserializeOwned;
7use std::{
8    collections::HashMap,
9    path::{Path, PathBuf},
10    sync::LazyLock,
11};
12
13/// Represents an Obsidian note file with frontmatter properties and content
14///
15/// This trait provides a standardized interface for working with Obsidian markdown files,
16/// handling frontmatter parsing, content extraction, and file operations.
17///
18/// # Type Parameters
19/// - `T`: Frontmatter properties type (must implement `DeserializeOwned + Default + Clone + Send`)
20///
21/// # Example
22/// ```no_run
23/// use obsidian_parser::prelude::*;
24/// use serde::Deserialize;
25///
26/// #[derive(Deserialize, Default, Clone)]
27/// struct NoteProperties {
28///     topic: String,
29///     created: String,
30/// }
31///
32/// let note: ObFileInMemory<NoteProperties> = ObFile::from_file("note.md").unwrap();
33/// println!("Note topic: {}", note.properties().topic);
34/// ```
35pub trait ObFile<T = HashMap<String, serde_yaml::Value>>: Sized
36where
37    T: DeserializeOwned + Default + Clone + Send,
38{
39    /// Returns the main content body of the note (excluding frontmatter)
40    ///
41    /// # Implementation Notes
42    /// - Strips YAML frontmatter if present
43    /// - Preserves original formatting and whitespace
44    fn content(&self) -> String;
45
46    /// Returns the source file path if available
47    ///
48    /// Returns `None` for in-memory notes without physical storage
49    fn path(&self) -> Option<PathBuf>;
50
51    /// Returns parsed frontmatter properties
52    ///
53    /// # Behavior
54    /// - Returns default-initialized properties if frontmatter is missing/invalid
55    /// - Automatically handles YAML deserialization
56    fn properties(&self) -> T;
57
58    /// Parses an Obsidian note from a string
59    ///
60    /// # Arguments
61    /// - `raw_text`: Raw markdown content with optional YAML frontmatter
62    /// - `path`: Optional source path for reference
63    ///
64    /// # Errors
65    /// - `Error::InvalidFormat` for malformed frontmatter
66    /// - `Error::Yaml` for invalid YAML syntax
67    fn from_string<P: AsRef<Path>>(raw_text: &str, path: Option<P>) -> Result<Self, Error>;
68
69    /// Parses an Obsidian note from a file
70    ///
71    /// # Arguments
72    /// - `path`: Filesystem path to markdown file
73    ///
74    /// # Errors
75    /// - `Error::Io` for filesystem errors
76    /// - `Error::FromUtf8` for non-UTF8 content
77    fn from_file<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
78        let path_buf = path.as_ref().to_path_buf();
79
80        #[cfg(feature = "logging")]
81        log::trace!("Parse obsidian file from file: {}", path_buf.display());
82        let data = std::fs::read(path)?;
83        let text = String::from_utf8(data)?;
84
85        Self::from_string(&text, Some(path_buf))
86    }
87}
88
89/// Default implementation using `HashMap` for properties
90///
91/// Automatically implemented for all `ObFile<HashMap<..>>` types.
92/// Provides identical interface with explicitly named methods.
93pub trait ObFileDefault: ObFile<HashMap<String, serde_yaml::Value>> {
94    /// Same as `ObFile::from_string` with default properties type
95    ///
96    /// # Errors
97    /// - `Error::InvalidFormat` for malformed frontmatter
98    /// - `Error::Yaml` for invalid YAML syntax
99    fn from_string_default<P: AsRef<Path>>(text: &str, path: Option<P>) -> Result<Self, Error>;
100
101    /// Same as `ObFile::from_file` with default properties type
102    ///
103    /// # Errors
104    /// - `Error::Io` for filesystem errors
105    /// - `Error::FromUtf8` for non-UTF8 content
106    fn from_file_default<P: AsRef<Path>>(path: P) -> Result<Self, Error>;
107}
108
109impl<T> ObFileDefault for T
110where
111    T: ObFile<HashMap<String, serde_yaml::Value>>,
112{
113    fn from_string_default<P: AsRef<Path>>(text: &str, path: Option<P>) -> Result<Self, Error> {
114        Self::from_string(text, path)
115    }
116
117    fn from_file_default<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
118        Self::from_file(path)
119    }
120}
121
122/// Helper function with enhanced logging
123fn parse_obfile(raw_text: &str) -> (bool, Vec<&str>) {
124    static PROPERTIES_REGEX: LazyLock<Regex> = LazyLock::new(|| {
125        RegexBuilder::new(r"^---\s*$")
126            .multi_line(true)
127            .unicode(false)
128            .build()
129            .unwrap()
130    });
131
132    #[cfg(feature = "logging")]
133    log::trace!("Parse obsidian file from string");
134
135    let parts: Vec<_> = PROPERTIES_REGEX.splitn(raw_text, 3).collect();
136    let valid_properties = raw_text.starts_with("---");
137
138    (valid_properties, parts)
139}
140
141#[cfg(test)]
142pub(crate) mod tests {
143    use super::*;
144    use crate::test_utils::init_test_logger;
145    use serde::Deserialize;
146    use std::io::Write;
147    use tempfile::NamedTempFile;
148
149    pub(crate) static TEST_DATA: &str = "---\n\
150topic: life\n\
151created: 2025-03-16\n\
152---\n\
153Test data\n\
154---\n\
155Two test data";
156
157    #[derive(Debug, Deserialize, Default, PartialEq, Clone)]
158    pub(crate) struct TestProperties {
159        pub(crate) topic: String,
160        pub(crate) created: String,
161    }
162
163    pub(crate) fn from_string<T: ObFile>() -> Result<(), Error> {
164        init_test_logger();
165        let file = T::from_string(TEST_DATA, None::<&str>)?;
166        let properties = file.properties();
167
168        assert_eq!(properties["topic"], "life");
169        assert_eq!(properties["created"], "2025-03-16");
170        assert_eq!(file.content(), "Test data\n---\nTwo test data");
171        Ok(())
172    }
173
174    pub(crate) fn from_string_without_properties<T: ObFile>() -> Result<(), Error> {
175        init_test_logger();
176        let test_data = "TEST_DATA";
177        let file = T::from_string(test_data, None::<&str>)?;
178        let properties = file.properties();
179
180        assert_eq!(properties.len(), 0);
181        assert_eq!(file.content(), test_data);
182        Ok(())
183    }
184
185    pub(crate) fn from_string_with_invalid_yaml<T: ObFile>() -> Result<(), Error> {
186        init_test_logger();
187        let broken_data = "---\n\
188    asdfv:--fs\n\
189    sfsf\n\
190    ---\n\
191    TestData";
192
193        assert!(matches!(
194            T::from_string(broken_data, None::<&str>),
195            Err(Error::Yaml(_))
196        ));
197        Ok(())
198    }
199
200    pub(crate) fn from_string_invalid_format<T: ObFile>() -> Result<(), Error> {
201        init_test_logger();
202        let broken_data = "---\n";
203
204        assert!(matches!(
205            T::from_string(broken_data, None::<&str>),
206            Err(Error::InvalidFormat)
207        ));
208        Ok(())
209    }
210
211    pub(crate) fn from_string_with_unicode<T: ObFile>() -> Result<(), Error> {
212        init_test_logger();
213        let data = "---\ndata: 💩\n---\nSuper data 💩💩💩";
214        let file = T::from_string(data, None::<&str>)?;
215        let properties = file.properties();
216
217        assert_eq!(properties["data"], "💩");
218        assert_eq!(file.content(), "Super data 💩💩💩");
219        Ok(())
220    }
221
222    pub(crate) fn from_string_space_with_properties<T: ObFile>() -> Result<(), Error> {
223        init_test_logger();
224        let data = "  ---\ntest: test-data\n---\n";
225        let file = T::from_string(data, None::<&str>)?;
226        let properties = file.properties();
227
228        assert_eq!(file.content(), data);
229        assert_eq!(properties.len(), 0);
230        Ok(())
231    }
232
233    pub(crate) fn from_file<T: ObFile>() -> Result<(), Error> {
234        init_test_logger();
235        let mut temp_file = NamedTempFile::new().unwrap();
236        temp_file.write_all(b"TEST_DATA").unwrap();
237
238        let file = T::from_file(temp_file.path()).unwrap();
239        assert_eq!(file.content(), "TEST_DATA");
240        assert_eq!(file.path().unwrap(), temp_file.path());
241        assert_eq!(file.properties().len(), 0);
242        Ok(())
243    }
244
245    pub(crate) fn from_file_without_properties<T: ObFile>() -> Result<(), Error> {
246        init_test_logger();
247        let test_data = "TEST_DATA";
248        let mut test_file = NamedTempFile::new().unwrap();
249        test_file.write_all(test_data.as_bytes()).unwrap();
250
251        let file = T::from_file(test_file.path())?;
252        let properties = file.properties();
253
254        assert_eq!(properties.len(), 0);
255        assert_eq!(file.content(), test_data);
256        Ok(())
257    }
258
259    pub(crate) fn from_file_with_invalid_yaml<T: ObFile>() -> Result<(), Error> {
260        init_test_logger();
261        let broken_data = "---\n\
262    asdfv:--fs\n\
263    sfsf\n\
264    ---\n\
265    TestData";
266
267        let mut test_file = NamedTempFile::new().unwrap();
268        test_file.write_all(broken_data.as_bytes()).unwrap();
269
270        assert!(matches!(
271            T::from_file(test_file.path()),
272            Err(Error::Yaml(_))
273        ));
274        Ok(())
275    }
276
277    pub(crate) fn from_file_invalid_format<T: ObFile>() -> Result<(), Error> {
278        init_test_logger();
279        let broken_data = "---\n";
280        let mut test_file = NamedTempFile::new().unwrap();
281        test_file.write_all(broken_data.as_bytes()).unwrap();
282
283        assert!(matches!(
284            T::from_file(test_file.path()),
285            Err(Error::InvalidFormat)
286        ));
287        Ok(())
288    }
289
290    pub(crate) fn from_file_with_unicode<T: ObFile>() -> Result<(), Error> {
291        init_test_logger();
292        let data = "---\ndata: 💩\n---\nSuper data 💩💩💩";
293        let mut test_file = NamedTempFile::new().unwrap();
294        test_file.write_all(data.as_bytes()).unwrap();
295
296        let file = T::from_file(test_file.path())?;
297        let properties = file.properties();
298
299        assert_eq!(properties["data"], "💩");
300        assert_eq!(file.content(), "Super data 💩💩💩");
301        Ok(())
302    }
303
304    pub(crate) fn from_file_space_with_properties<T: ObFile>() -> Result<(), Error> {
305        init_test_logger();
306        let data = "  ---\ntest: test-data\n---\n";
307        let mut test_file = NamedTempFile::new().unwrap();
308        test_file.write_all(data.as_bytes()).unwrap();
309
310        let file = T::from_string(data, None::<&str>)?;
311        let properties = file.properties();
312
313        assert_eq!(file.content(), data);
314        assert_eq!(properties.len(), 0);
315        Ok(())
316    }
317
318    macro_rules! impl_test_for_obfile {
319        ($name_test:ident, $fn_test:ident, $impl_obfile:path) => {
320            #[test]
321            fn $name_test() {
322                $fn_test::<$impl_obfile>().unwrap();
323            }
324        };
325    }
326
327    pub(crate) use impl_test_for_obfile;
328
329    macro_rules! impl_all_tests_from_string {
330        ($impl_obfile:path) => {
331            #[allow(unused_imports)]
332            use crate::obfile::tests::*;
333
334            impl_test_for_obfile!(impl_from_string, from_string, $impl_obfile);
335
336            impl_test_for_obfile!(
337                impl_from_string_without_properties,
338                from_string_without_properties,
339                $impl_obfile
340            );
341            impl_test_for_obfile!(
342                impl_from_string_with_invalid_yaml,
343                from_string_with_invalid_yaml,
344                $impl_obfile
345            );
346            impl_test_for_obfile!(
347                impl_from_string_invalid_format,
348                from_string_invalid_format,
349                $impl_obfile
350            );
351            impl_test_for_obfile!(
352                impl_from_string_with_unicode,
353                from_string_with_unicode,
354                $impl_obfile
355            );
356            impl_test_for_obfile!(
357                impl_from_string_space_with_properties,
358                from_string_space_with_properties,
359                $impl_obfile
360            );
361        };
362    }
363
364    macro_rules! impl_all_tests_from_file {
365        ($impl_obfile:path) => {
366            #[allow(unused_imports)]
367            use crate::obfile::tests::*;
368
369            impl_test_for_obfile!(impl_from_file, from_file, $impl_obfile);
370
371            impl_test_for_obfile!(
372                impl_from_file_without_properties,
373                from_file_without_properties,
374                $impl_obfile
375            );
376            impl_test_for_obfile!(
377                impl_from_file_with_invalid_yaml,
378                from_file_with_invalid_yaml,
379                $impl_obfile
380            );
381            impl_test_for_obfile!(
382                impl_from_file_invalid_format,
383                from_file_invalid_format,
384                $impl_obfile
385            );
386            impl_test_for_obfile!(
387                impl_from_file_with_unicode,
388                from_file_with_unicode,
389                $impl_obfile
390            );
391            impl_test_for_obfile!(
392                impl_from_file_space_with_properties,
393                from_file_space_with_properties,
394                $impl_obfile
395            );
396        };
397    }
398
399    pub(crate) use impl_all_tests_from_file;
400    pub(crate) use impl_all_tests_from_string;
401}