turbovault_parser/
standalone.rs

1//! Standalone markdown parsing without vault context.
2//!
3//! Use this when you just need to parse markdown content without
4//! the full vault file management. Perfect for integration with
5//! tools like treemd that need OFM parsing capabilities.
6//!
7//! # Example
8//!
9//! ```
10//! use turbovault_parser::ParsedContent;
11//!
12//! let content = r#"---
13//! title: My Note
14//! ---
15//!
16//! # Heading
17//!
18//! [[WikiLink]] and [markdown](link) with #tag
19//!
20//! > [!NOTE] A callout
21//! > With content
22//! "#;
23//!
24//! let parsed = ParsedContent::parse(content);
25//! assert!(parsed.frontmatter.is_some());
26//! assert_eq!(parsed.wikilinks.len(), 1);
27//! assert_eq!(parsed.markdown_links.len(), 1);
28//! assert_eq!(parsed.tags.len(), 1);
29//! ```
30
31use turbovault_core::{Callout, Frontmatter, Heading, Link, Tag, TaskItem};
32
33use crate::engine::ParseEngine;
34
35/// Options for selective parsing.
36///
37/// Use this to parse only the elements you need, improving performance
38/// for large documents when you don't need all OFM features.
39#[derive(Debug, Clone)]
40pub struct ParseOptions {
41    /// Parse YAML frontmatter
42    pub parse_frontmatter: bool,
43    /// Parse wikilinks and embeds
44    pub parse_wikilinks: bool,
45    /// Parse markdown links [text](url)
46    pub parse_markdown_links: bool,
47    /// Parse headings (H1-H6)
48    pub parse_headings: bool,
49    /// Parse task items (- [ ] / - [x])
50    pub parse_tasks: bool,
51    /// Parse callout blocks (> [!NOTE])
52    pub parse_callouts: bool,
53    /// Parse inline tags (#tag)
54    pub parse_tags: bool,
55    /// Use full callout parsing (extracts multi-line content)
56    pub full_callouts: bool,
57}
58
59impl Default for ParseOptions {
60    fn default() -> Self {
61        Self::all()
62    }
63}
64
65impl ParseOptions {
66    /// Parse all OFM elements.
67    pub fn all() -> Self {
68        Self {
69            parse_frontmatter: true,
70            parse_wikilinks: true,
71            parse_markdown_links: true,
72            parse_headings: true,
73            parse_tasks: true,
74            parse_callouts: true,
75            parse_tags: true,
76            full_callouts: false,
77        }
78    }
79
80    /// Parse nothing - useful as a starting point for selective parsing.
81    pub fn none() -> Self {
82        Self {
83            parse_frontmatter: false,
84            parse_wikilinks: false,
85            parse_markdown_links: false,
86            parse_headings: false,
87            parse_tasks: false,
88            parse_callouts: false,
89            parse_tags: false,
90            full_callouts: false,
91        }
92    }
93
94    /// Preset for treemd: links + headings + callouts.
95    pub fn treemd() -> Self {
96        Self {
97            parse_frontmatter: false,
98            parse_wikilinks: true,
99            parse_markdown_links: true,
100            parse_headings: true,
101            parse_tasks: false,
102            parse_callouts: true,
103            parse_tags: false,
104            full_callouts: true, // treemd needs full callout content
105        }
106    }
107
108    /// Preset for link analysis: wikilinks + markdown links + embeds.
109    pub fn links_only() -> Self {
110        Self {
111            parse_frontmatter: false,
112            parse_wikilinks: true,
113            parse_markdown_links: true,
114            parse_headings: false,
115            parse_tasks: false,
116            parse_callouts: false,
117            parse_tags: false,
118            full_callouts: false,
119        }
120    }
121
122    /// Builder method to enable frontmatter parsing.
123    pub fn with_frontmatter(mut self) -> Self {
124        self.parse_frontmatter = true;
125        self
126    }
127
128    /// Builder method to enable full callout parsing.
129    pub fn with_full_callouts(mut self) -> Self {
130        self.full_callouts = true;
131        self
132    }
133}
134
135/// Parsed markdown content without vault context.
136///
137/// This is a lightweight alternative to `VaultFile` when you don't need
138/// file metadata, backlinks, or other vault-specific features.
139#[derive(Debug, Clone, Default)]
140pub struct ParsedContent {
141    /// YAML frontmatter if present
142    pub frontmatter: Option<Frontmatter>,
143    /// Document headings (H1-H6)
144    pub headings: Vec<Heading>,
145    /// Wikilinks: [[Note]], [[Note|alias]], [[Note#heading]]
146    pub wikilinks: Vec<Link>,
147    /// Embeds: ![[image.png]], ![[Note]]
148    pub embeds: Vec<Link>,
149    /// Standard markdown links: [text](url)
150    pub markdown_links: Vec<Link>,
151    /// Inline tags: #tag, #nested/tag
152    pub tags: Vec<Tag>,
153    /// Task items: - [ ], - [x]
154    pub tasks: Vec<TaskItem>,
155    /// Callout blocks: > [!NOTE]
156    pub callouts: Vec<Callout>,
157}
158
159impl ParsedContent {
160    /// Parse markdown content with default options (all elements).
161    ///
162    /// # Example
163    /// ```
164    /// use turbovault_parser::ParsedContent;
165    ///
166    /// let content = "# Title\n\n[[Link]] and #tag";
167    /// let parsed = ParsedContent::parse(content);
168    /// assert_eq!(parsed.headings.len(), 1);
169    /// assert_eq!(parsed.wikilinks.len(), 1);
170    /// assert_eq!(parsed.tags.len(), 1);
171    /// ```
172    pub fn parse(content: &str) -> Self {
173        Self::parse_with_options(content, ParseOptions::all())
174    }
175
176    /// Parse markdown content with custom options.
177    ///
178    /// Use this for better performance when you only need specific elements.
179    ///
180    /// # Example
181    /// ```
182    /// use turbovault_parser::{ParsedContent, ParseOptions};
183    ///
184    /// let content = "# Title\n\n[[Link]] and #tag";
185    /// let opts = ParseOptions::none().with_frontmatter();
186    /// let parsed = ParsedContent::parse_with_options(content, opts);
187    /// // Only frontmatter was parsed
188    /// assert!(parsed.headings.is_empty());
189    /// ```
190    pub fn parse_with_options(content: &str, opts: ParseOptions) -> Self {
191        let engine = ParseEngine::new(content);
192        let result = engine.parse(&opts);
193
194        Self {
195            frontmatter: result.frontmatter,
196            headings: result.headings,
197            wikilinks: result.wikilinks,
198            embeds: result.embeds,
199            markdown_links: result.markdown_links,
200            tags: result.tags,
201            tasks: result.tasks,
202            callouts: result.callouts,
203        }
204    }
205
206    /// Get all links combined (wikilinks + embeds + markdown links).
207    pub fn all_links(&self) -> impl Iterator<Item = &Link> {
208        self.wikilinks
209            .iter()
210            .chain(self.embeds.iter())
211            .chain(self.markdown_links.iter())
212    }
213
214    /// Check if content has any links.
215    pub fn has_links(&self) -> bool {
216        !self.wikilinks.is_empty() || !self.embeds.is_empty() || !self.markdown_links.is_empty()
217    }
218
219    /// Get total link count.
220    pub fn link_count(&self) -> usize {
221        self.wikilinks.len() + self.embeds.len() + self.markdown_links.len()
222    }
223
224    /// Check if content has any OFM elements.
225    pub fn is_empty(&self) -> bool {
226        self.frontmatter.is_none()
227            && self.headings.is_empty()
228            && self.wikilinks.is_empty()
229            && self.embeds.is_empty()
230            && self.markdown_links.is_empty()
231            && self.tags.is_empty()
232            && self.tasks.is_empty()
233            && self.callouts.is_empty()
234    }
235}
236
237#[cfg(test)]
238mod tests {
239    use super::*;
240
241    #[test]
242    fn test_parse_complete() {
243        let content = r#"---
244title: Test Note
245tags: [test]
246---
247
248# Heading 1
249
250This has [[WikiLink]] and [markdown](url).
251
252## Heading 2
253
254- [ ] Task 1
255- [x] Task 2 #tag
256
257> [!NOTE]
258> Callout content
259
260![[image.png]]
261"#;
262
263        let parsed = ParsedContent::parse(content);
264
265        assert!(parsed.frontmatter.is_some());
266        assert_eq!(parsed.headings.len(), 2);
267        assert_eq!(parsed.wikilinks.len(), 1);
268        assert_eq!(parsed.markdown_links.len(), 1);
269        assert_eq!(parsed.tasks.len(), 2);
270        assert_eq!(parsed.tags.len(), 1);
271        assert_eq!(parsed.callouts.len(), 1);
272        assert_eq!(parsed.embeds.len(), 1);
273    }
274
275    #[test]
276    fn test_all_links() {
277        let content = "[[wiki]] and [md](url) and ![[embed]]";
278        let parsed = ParsedContent::parse(content);
279        assert_eq!(parsed.all_links().count(), 3);
280        assert_eq!(parsed.link_count(), 3);
281        assert!(parsed.has_links());
282    }
283
284    #[test]
285    fn test_empty_content() {
286        let parsed = ParsedContent::parse("");
287        assert!(parsed.frontmatter.is_none());
288        assert!(parsed.headings.is_empty());
289        assert!(!parsed.has_links());
290        assert!(parsed.is_empty());
291    }
292
293    #[test]
294    fn test_parse_options_none() {
295        let content = "# Title\n\n[[Link]] #tag";
296        let parsed = ParsedContent::parse_with_options(content, ParseOptions::none());
297
298        assert!(parsed.frontmatter.is_none());
299        assert!(parsed.headings.is_empty());
300        assert!(parsed.wikilinks.is_empty());
301        assert!(parsed.tags.is_empty());
302    }
303
304    #[test]
305    fn test_parse_options_links_only() {
306        let content = "# Title\n\n[[Link]] #tag";
307        let parsed = ParsedContent::parse_with_options(content, ParseOptions::links_only());
308
309        assert!(parsed.headings.is_empty()); // Not parsed
310        assert_eq!(parsed.wikilinks.len(), 1); // Parsed
311        assert!(parsed.tags.is_empty()); // Not parsed
312    }
313
314    #[test]
315    fn test_parse_options_treemd() {
316        let content = r#"# Title
317
318[[Link]] #tag
319
320> [!NOTE] Title
321> Content here
322"#;
323        let parsed = ParsedContent::parse_with_options(content, ParseOptions::treemd());
324
325        assert_eq!(parsed.headings.len(), 1); // Parsed
326        assert_eq!(parsed.wikilinks.len(), 1); // Parsed
327        assert!(parsed.tags.is_empty()); // Not parsed for treemd
328        assert_eq!(parsed.callouts.len(), 1); // Parsed
329        assert_eq!(parsed.callouts[0].content, "Content here"); // Full content
330    }
331
332    #[test]
333    fn test_full_callouts() {
334        let content = r#"> [!WARNING] Important
335> Line 1
336> Line 2"#;
337
338        let simple = ParsedContent::parse_with_options(content, ParseOptions::all());
339        let full =
340            ParsedContent::parse_with_options(content, ParseOptions::all().with_full_callouts());
341
342        assert!(simple.callouts[0].content.is_empty()); // Simple parsing
343        assert_eq!(full.callouts[0].content, "Line 1\nLine 2"); // Full parsing
344    }
345
346    #[test]
347    fn test_frontmatter_parsing() {
348        let content = r#"---
349title: Test
350author: Alice
351---
352
353Content here"#;
354
355        let parsed = ParsedContent::parse(content);
356        let fm = parsed.frontmatter.unwrap();
357        assert_eq!(fm.data.get("title").and_then(|v| v.as_str()), Some("Test"));
358        assert_eq!(
359            fm.data.get("author").and_then(|v| v.as_str()),
360            Some("Alice")
361        );
362    }
363
364    #[test]
365    fn test_position_tracking() {
366        let content = "Line 1\n[[Link]] on line 2";
367        let parsed = ParsedContent::parse(content);
368
369        assert_eq!(parsed.wikilinks[0].position.line, 2);
370        assert_eq!(parsed.wikilinks[0].position.column, 1);
371    }
372
373    #[test]
374    fn test_code_block_awareness() {
375        // Patterns inside code blocks should NOT be parsed
376        // This is powered by pulldown-cmark integration
377        let content = r#"
378Normal [[Valid Link]] here.
379
380```rust
381// This is a code block
382let link = "[[Fake Link Inside Code]]";
383```
384
385Also valid: [[Another Valid Link]]
386"#;
387
388        let parsed = ParsedContent::parse(content);
389
390        // Should only find 2 valid links, NOT the one inside the code block
391        assert_eq!(parsed.wikilinks.len(), 2);
392        assert_eq!(parsed.wikilinks[0].target, "Valid Link");
393        assert_eq!(parsed.wikilinks[1].target, "Another Valid Link");
394    }
395}