Skip to main content

obsidian_cli_inspector/
parser.rs

1use std::collections::HashMap;
2
3mod markdown;
4mod wikilink;
5
6pub use markdown::{build_markdown_link, extract_markdown_links};
7pub use wikilink::{extract_wikilinks, parse_wikilink};
8
9#[derive(Debug, Clone)]
10pub struct ParsedNote {
11    pub title: String,
12    pub frontmatter: HashMap<String, String>,
13    pub tags: Vec<String>,
14    pub links: Vec<Link>,
15    pub text: String,
16}
17
18#[derive(Debug, Clone)]
19pub struct Link {
20    pub text: String,
21    pub alias: Option<String>,
22    pub heading_ref: Option<String>,
23    pub block_ref: Option<String>,
24    pub is_embed: bool,
25    pub link_type: LinkType,
26}
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq)]
29pub enum LinkType {
30    Wiki,
31    Markdown,
32}
33
34impl LinkType {
35    pub fn as_str(&self) -> &'static str {
36        match self {
37            LinkType::Wiki => "wikilink",
38            LinkType::Markdown => "markdown",
39        }
40    }
41}
42
43pub struct MarkdownParser;
44
45impl MarkdownParser {
46    pub fn parse(content: &str) -> ParsedNote {
47        let (frontmatter, rest) = Self::extract_frontmatter(content);
48        let tags = Self::extract_tags(&frontmatter, rest);
49        let links = Self::extract_links(rest);
50        let title = Self::extract_title(&frontmatter, rest);
51
52        ParsedNote {
53            title,
54            frontmatter,
55            tags,
56            links,
57            text: rest.to_string(),
58        }
59    }
60
61    fn extract_frontmatter(content: &str) -> (HashMap<String, String>, &str) {
62        let mut map = HashMap::new();
63
64        if !content.starts_with("---") {
65            return (map, content);
66        }
67
68        let rest = &content[3..];
69        if let Some(end_pos) = rest.find("---") {
70            let frontmatter_text = &rest[..end_pos];
71            let content_after = &rest[end_pos + 3..].trim_start();
72
73            for line in frontmatter_text.lines() {
74                let line = line.trim();
75                if line.is_empty() {
76                    continue;
77                }
78
79                if let Some(colon_pos) = line.find(':') {
80                    let key = line[..colon_pos].trim().to_lowercase();
81                    let value = line[colon_pos + 1..].trim().to_string();
82
83                    // Special handling for tags which might be arrays
84                    if key == "tags" {
85                        let tags_str = value.trim_start_matches('[').trim_end_matches(']');
86                        for tag in tags_str.split(',') {
87                            let clean_tag = tag.trim().trim_matches('"').trim_matches('\'');
88                            if !clean_tag.is_empty() {
89                                map.insert(format!("tag_{clean_tag}"), clean_tag.to_string());
90                            }
91                        }
92                    } else {
93                        map.insert(key, value);
94                    }
95                }
96            }
97
98            return (map, content_after);
99        }
100
101        (map, content)
102    }
103
104    fn extract_title(frontmatter: &HashMap<String, String>, content: &str) -> String {
105        // Try to get from frontmatter
106        if let Some(title) = frontmatter.get("title") {
107            return title.clone();
108        }
109
110        // Try to extract from first heading
111        for line in content.lines() {
112            let trimmed = line.trim();
113            if let Some(stripped) = trimmed.strip_prefix("# ") {
114                return stripped.trim().to_string();
115            }
116        }
117
118        // Fallback to empty string
119        String::new()
120    }
121
122    fn extract_tags(frontmatter: &HashMap<String, String>, content: &str) -> Vec<String> {
123        let mut tags = Vec::new();
124
125        // From frontmatter
126        for (key, value) in frontmatter {
127            if key.starts_with("tag_") {
128                tags.push(value.clone());
129            }
130        }
131
132        // From inline tags in content
133        for word in content.split_whitespace() {
134            if word.starts_with('#') && word.len() > 1 {
135                let tag = word
136                    .trim_matches(|c: char| !c.is_alphanumeric() && c != '/' && c != '_')
137                    .trim_start_matches('#');
138                if !tag.is_empty() && !tags.contains(&tag.to_string()) {
139                    tags.push(tag.to_string());
140                }
141            }
142        }
143
144        tags.sort();
145        tags.dedup();
146        tags
147    }
148
149    fn extract_links(content: &str) -> Vec<Link> {
150        let mut links = extract_wikilinks(content);
151        links.extend(extract_markdown_links(content));
152        links
153    }
154}
155
156pub fn normalize_note_identifier(raw: &str) -> String {
157    let mut value = raw.trim().to_string();
158    if value.starts_with("./") {
159        value = value.trim_start_matches("./").to_string();
160    }
161    value = value.replace('\\', "/");
162    if value.ends_with(".md") || value.ends_with(".MD") {
163        let len = value.len();
164        value = value[..len.saturating_sub(3)].to_string();
165    }
166    value.trim().to_string()
167}
168
169#[cfg(test)]
170mod tests {
171    use super::*;
172
173    #[test]
174    fn test_parse_wikilink_simple() {
175        let parsed = MarkdownParser::parse("This is [[note]] link");
176        assert_eq!(parsed.links.len(), 1);
177        assert_eq!(parsed.links[0].text, "note");
178    }
179
180    #[test]
181    fn test_parse_wikilink_with_alias() {
182        let parsed = MarkdownParser::parse("This is [[note|alias]] link");
183        assert_eq!(parsed.links.len(), 1);
184        assert_eq!(parsed.links[0].text, "note");
185        assert_eq!(parsed.links[0].alias, Some("alias".to_string()));
186    }
187
188    #[test]
189    fn test_parse_markdown_link_basic() {
190        let parsed = MarkdownParser::parse("See [Doc](docs/Note.md)");
191        assert_eq!(parsed.links.len(), 1);
192        assert_eq!(parsed.links[0].text, "docs/Note");
193        assert_eq!(parsed.links[0].alias, Some("Doc".to_string()));
194        assert_eq!(parsed.links[0].link_type, LinkType::Markdown);
195    }
196
197    #[test]
198    fn test_normalize_note_identifier() {
199        assert_eq!(normalize_note_identifier("./Note.md"), "Note");
200        assert_eq!(normalize_note_identifier("Folder\\Note.md"), "Folder/Note");
201    }
202
203    #[test]
204    fn test_link_type_as_str() {
205        assert_eq!(LinkType::Wiki.as_str(), "wikilink");
206        assert_eq!(LinkType::Markdown.as_str(), "markdown");
207    }
208
209    #[test]
210    fn test_link_creation() {
211        let link = Link {
212            text: "test".to_string(),
213            alias: Some("alias".to_string()),
214            heading_ref: Some("heading".to_string()),
215            block_ref: Some("block".to_string()),
216            is_embed: true,
217            link_type: LinkType::Wiki,
218        };
219        assert_eq!(link.text, "test");
220        assert!(link.alias.is_some());
221        assert!(link.is_embed);
222    }
223
224    #[test]
225    fn test_parsed_note_creation() {
226        let note = ParsedNote {
227            title: "Test".to_string(),
228            frontmatter: HashMap::new(),
229            tags: vec!["tag1".to_string()],
230            links: vec![],
231            text: "Content".to_string(),
232        };
233        assert_eq!(note.title, "Test");
234        assert_eq!(note.tags.len(), 1);
235    }
236}