turbovault_parser/
parsers.rs

1//! OFM parser implementation using unified ParseEngine.
2
3use std::path::{Path, PathBuf};
4use turbovault_core::{FileMetadata, Frontmatter, Result, SourcePosition, VaultFile};
5
6use crate::ParseOptions;
7use crate::engine::ParseEngine;
8
9// Individual parser modules are still available for backwards compatibility
10// and granular use cases, but the main Parser uses the unified engine.
11pub mod callouts;
12pub mod embeds;
13pub mod frontmatter_parser;
14pub mod headings;
15pub mod link_utils;
16pub mod markdown_links;
17pub mod tags;
18pub mod tasks;
19pub mod wikilinks;
20
21#[allow(deprecated)]
22pub use self::frontmatter_parser::extract_frontmatter;
23
24/// Main parser for OFM files.
25///
26/// Uses the unified ParseEngine internally for efficient, single-source parsing.
27pub struct Parser {
28    vault_root: PathBuf,
29}
30
31impl Parser {
32    /// Create a new parser for the given vault root.
33    pub fn new(vault_root: PathBuf) -> Self {
34        Self { vault_root }
35    }
36
37    /// Get the vault root path.
38    pub fn vault_root(&self) -> &Path {
39        &self.vault_root
40    }
41
42    /// Parse a file from path and content.
43    pub fn parse_file(&self, path: &Path, content: &str) -> Result<VaultFile> {
44        let metadata = self.extract_metadata(path, content)?;
45        let mut vault_file = VaultFile::new(path.to_path_buf(), content.to_string(), metadata);
46
47        // Parse content if markdown
48        if path.extension().is_some_and(|ext| ext == "md") {
49            self.parse_content(&mut vault_file)?;
50            vault_file.is_parsed = true;
51            vault_file.last_parsed = Some(
52                std::time::SystemTime::now()
53                    .duration_since(std::time::UNIX_EPOCH)
54                    .unwrap_or_default()
55                    .as_secs_f64(),
56            );
57        }
58
59        Ok(vault_file)
60    }
61
62    fn extract_metadata(&self, path: &Path, content: &str) -> Result<FileMetadata> {
63        use std::collections::hash_map::DefaultHasher;
64        use std::hash::{Hash, Hasher};
65
66        let size = content.len() as u64;
67        let mut hasher = DefaultHasher::new();
68        content.hash(&mut hasher);
69        let checksum = format!("{:x}", hasher.finish());
70
71        Ok(FileMetadata {
72            path: path.to_path_buf(),
73            size,
74            created_at: 0.0,
75            modified_at: 0.0,
76            checksum,
77            is_attachment: !matches!(
78                path.extension().map(|e| e.to_str()),
79                Some(Some("md" | "txt"))
80            ),
81        })
82    }
83
84    /// Parse all content elements from file using unified engine.
85    fn parse_content(&self, vault_file: &mut VaultFile) -> Result<()> {
86        let content = &vault_file.content;
87
88        // Use ParseEngine with source file for vault-aware parsing
89        let engine = ParseEngine::with_source_file(content, &vault_file.path);
90        let result = engine.parse(&ParseOptions::all());
91
92        // Transfer results to VaultFile
93        vault_file.frontmatter = result.frontmatter;
94
95        // Strip frontmatter using pulldown-cmark's byte offset (avoids redundant regex parse)
96        if result.frontmatter_end_offset > 0 {
97            vault_file.content = content[result.frontmatter_end_offset..].to_string();
98        }
99
100        // Links (wikilinks, embeds, markdown links)
101        vault_file.links.extend(result.wikilinks);
102        vault_file.links.extend(result.embeds);
103        vault_file.links.extend(result.markdown_links);
104
105        // Other elements
106        vault_file.tags.extend(result.tags);
107        vault_file.tasks.extend(result.tasks);
108        vault_file.callouts.extend(result.callouts);
109        vault_file.headings.extend(result.headings);
110
111        Ok(())
112    }
113
114    /// Parse frontmatter from YAML string.
115    #[allow(dead_code)]
116    fn parse_frontmatter(&self, fm_str: &str) -> Result<Option<Frontmatter>> {
117        match serde_yaml::from_str::<serde_json::Value>(fm_str) {
118            Ok(serde_json::Value::Object(map)) => {
119                let data = map.into_iter().collect();
120                Ok(Some(Frontmatter {
121                    data,
122                    position: SourcePosition::start(),
123                }))
124            }
125            Ok(_) => Ok(None),
126            Err(_) => Ok(None),
127        }
128    }
129}
130
131#[cfg(test)]
132mod tests {
133    use super::*;
134
135    #[test]
136    fn test_parser_creation() {
137        let parser = Parser::new(PathBuf::from("/vault"));
138        assert_eq!(parser.vault_root, PathBuf::from("/vault"));
139    }
140
141    #[test]
142    fn test_parse_file_complete() {
143        let parser = Parser::new(PathBuf::from("/vault"));
144        let content = r#"---
145title: Test
146---
147
148# Heading
149
150[[Link]] and [md](url) with #tag
151
152- [ ] Task
153
154> [!NOTE] Callout
155"#;
156        let result = parser
157            .parse_file(&PathBuf::from("test.md"), content)
158            .unwrap();
159
160        assert!(result.frontmatter.is_some());
161        assert_eq!(result.headings.len(), 1);
162        assert!(result.links.len() >= 2); // wikilink + markdown link
163        assert_eq!(result.tags.len(), 1);
164        assert_eq!(result.tasks.len(), 1);
165        assert_eq!(result.callouts.len(), 1);
166    }
167
168    #[test]
169    fn test_parse_file_non_markdown() {
170        let parser = Parser::new(PathBuf::from("/vault"));
171        let content = "[[Link]] #tag";
172        let result = parser
173            .parse_file(&PathBuf::from("test.txt"), content)
174            .unwrap();
175
176        // .txt files are not parsed for OFM elements
177        assert!(!result.is_parsed);
178    }
179}