turbovault_parser/
parsers.rs

1//! OFM parser implementation using unified ParseEngine.
2
3use std::path::{Path, PathBuf};
4use turbovault_core::{FileMetadata, Frontmatter, Result, SourcePosition, VaultFile};
5
6use crate::ParseOptions;
7use crate::engine::ParseEngine;
8
9// Individual parser modules are still available for backwards compatibility
10// and granular use cases, but the main Parser uses the unified engine.
11pub mod callouts;
12pub mod embeds;
13pub mod frontmatter_parser;
14pub mod headings;
15pub mod link_utils;
16pub mod markdown_links;
17pub mod tags;
18pub mod tasks;
19pub mod wikilinks;
20
21pub use self::frontmatter_parser::extract_frontmatter;
22
23/// Main parser for OFM files.
24///
25/// Uses the unified ParseEngine internally for efficient, single-source parsing.
26pub struct Parser {
27    vault_root: PathBuf,
28}
29
30impl Parser {
31    /// Create a new parser for the given vault root.
32    pub fn new(vault_root: PathBuf) -> Self {
33        Self { vault_root }
34    }
35
36    /// Get the vault root path.
37    pub fn vault_root(&self) -> &Path {
38        &self.vault_root
39    }
40
41    /// Parse a file from path and content.
42    pub fn parse_file(&self, path: &Path, content: &str) -> Result<VaultFile> {
43        let metadata = self.extract_metadata(path, content)?;
44        let mut vault_file = VaultFile::new(path.to_path_buf(), content.to_string(), metadata);
45
46        // Parse content if markdown
47        if path.extension().is_some_and(|ext| ext == "md") {
48            self.parse_content(&mut vault_file)?;
49            vault_file.is_parsed = true;
50            vault_file.last_parsed = Some(
51                std::time::SystemTime::now()
52                    .duration_since(std::time::UNIX_EPOCH)
53                    .unwrap_or_default()
54                    .as_secs_f64(),
55            );
56        }
57
58        Ok(vault_file)
59    }
60
61    fn extract_metadata(&self, path: &Path, content: &str) -> Result<FileMetadata> {
62        use std::collections::hash_map::DefaultHasher;
63        use std::hash::{Hash, Hasher};
64
65        let size = content.len() as u64;
66        let mut hasher = DefaultHasher::new();
67        content.hash(&mut hasher);
68        let checksum = format!("{:x}", hasher.finish());
69
70        Ok(FileMetadata {
71            path: path.to_path_buf(),
72            size,
73            created_at: 0.0,
74            modified_at: 0.0,
75            checksum,
76            is_attachment: !matches!(
77                path.extension().map(|e| e.to_str()),
78                Some(Some("md" | "txt"))
79            ),
80        })
81    }
82
83    /// Parse all content elements from file using unified engine.
84    fn parse_content(&self, vault_file: &mut VaultFile) -> Result<()> {
85        let content = &vault_file.content;
86
87        // Use ParseEngine with source file for vault-aware parsing
88        let engine = ParseEngine::with_source_file(content, &vault_file.path);
89        let result = engine.parse(&ParseOptions::all());
90
91        // Transfer results to VaultFile
92        vault_file.frontmatter = result.frontmatter;
93
94        // Update content without frontmatter (if present)
95        // Note: The engine doesn't return stripped content, but for VaultFile
96        // we may want to keep original content. For now, preserve behavior.
97        if vault_file.frontmatter.is_some()
98            && let Ok((_, stripped)) = extract_frontmatter(content)
99        {
100            vault_file.content = stripped;
101        }
102
103        // Links (wikilinks, embeds, markdown links)
104        vault_file.links.extend(result.wikilinks);
105        vault_file.links.extend(result.embeds);
106        vault_file.links.extend(result.markdown_links);
107
108        // Other elements
109        vault_file.tags.extend(result.tags);
110        vault_file.tasks.extend(result.tasks);
111        vault_file.callouts.extend(result.callouts);
112        vault_file.headings.extend(result.headings);
113
114        Ok(())
115    }
116
117    /// Parse frontmatter from YAML string.
118    #[allow(dead_code)]
119    fn parse_frontmatter(&self, fm_str: &str) -> Result<Option<Frontmatter>> {
120        match serde_yaml::from_str::<serde_json::Value>(fm_str) {
121            Ok(serde_json::Value::Object(map)) => {
122                let data = map.into_iter().collect();
123                Ok(Some(Frontmatter {
124                    data,
125                    position: SourcePosition::start(),
126                }))
127            }
128            Ok(_) => Ok(None),
129            Err(_) => Ok(None),
130        }
131    }
132}
133
134#[cfg(test)]
135mod tests {
136    use super::*;
137
138    #[test]
139    fn test_parser_creation() {
140        let parser = Parser::new(PathBuf::from("/vault"));
141        assert_eq!(parser.vault_root, PathBuf::from("/vault"));
142    }
143
144    #[test]
145    fn test_parse_file_complete() {
146        let parser = Parser::new(PathBuf::from("/vault"));
147        let content = r#"---
148title: Test
149---
150
151# Heading
152
153[[Link]] and [md](url) with #tag
154
155- [ ] Task
156
157> [!NOTE] Callout
158"#;
159        let result = parser
160            .parse_file(&PathBuf::from("test.md"), content)
161            .unwrap();
162
163        assert!(result.frontmatter.is_some());
164        assert_eq!(result.headings.len(), 1);
165        assert!(result.links.len() >= 2); // wikilink + markdown link
166        assert_eq!(result.tags.len(), 1);
167        assert_eq!(result.tasks.len(), 1);
168        assert_eq!(result.callouts.len(), 1);
169    }
170
171    #[test]
172    fn test_parse_file_non_markdown() {
173        let parser = Parser::new(PathBuf::from("/vault"));
174        let content = "[[Link]] #tag";
175        let result = parser
176            .parse_file(&PathBuf::from("test.txt"), content)
177            .unwrap();
178
179        // .txt files are not parsed for OFM elements
180        assert!(!result.is_parsed);
181    }
182}