Skip to main content

robinpath_modules/modules/
markdown_mod.rs

1use robinpath::{RobinPath, Value};
2
3pub fn register(rp: &mut RobinPath) {
4    rp.register_builtin("markdown.toHtml", |args, _| {
5        let md = args.first().map(|v| v.to_display_string()).unwrap_or_default();
6        Ok(Value::String(markdown_to_html(&md)))
7    });
8
9    rp.register_builtin("markdown.extractHeadings", |args, _| {
10        let md = args.first().map(|v| v.to_display_string()).unwrap_or_default();
11        let re = regex::Regex::new(r"^(#{1,6})\s+(.+)$").unwrap();
12        let headings: Vec<Value> = md
13            .lines()
14            .filter_map(|line| {
15                re.captures(line).map(|caps| {
16                    let mut obj = indexmap::IndexMap::new();
17                    obj.insert("level".to_string(), Value::Number(caps[1].len() as f64));
18                    obj.insert("text".to_string(), Value::String(caps[2].trim().to_string()));
19                    Value::Object(obj)
20                })
21            })
22            .collect();
23        Ok(Value::Array(headings))
24    });
25
26    rp.register_builtin("markdown.extractLinks", |args, _| {
27        let md = args.first().map(|v| v.to_display_string()).unwrap_or_default();
28        let re = regex::Regex::new(r"\[([^\]]*)\]\(([^)]+)\)").unwrap();
29        let links: Vec<Value> = re
30            .captures_iter(&md)
31            .map(|caps| {
32                let mut obj = indexmap::IndexMap::new();
33                obj.insert("text".to_string(), Value::String(caps[1].to_string()));
34                obj.insert("url".to_string(), Value::String(caps[2].to_string()));
35                Value::Object(obj)
36            })
37            .collect();
38        Ok(Value::Array(links))
39    });
40
41    rp.register_builtin("markdown.extractImages", |args, _| {
42        let md = args.first().map(|v| v.to_display_string()).unwrap_or_default();
43        let re = regex::Regex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap();
44        let images: Vec<Value> = re
45            .captures_iter(&md)
46            .map(|caps| {
47                let mut obj = indexmap::IndexMap::new();
48                obj.insert("alt".to_string(), Value::String(caps[1].to_string()));
49                obj.insert("src".to_string(), Value::String(caps[2].to_string()));
50                Value::Object(obj)
51            })
52            .collect();
53        Ok(Value::Array(images))
54    });
55
56    rp.register_builtin("markdown.extractCodeBlocks", |args, _| {
57        let md = args.first().map(|v| v.to_display_string()).unwrap_or_default();
58        let re = regex::Regex::new(r"(?ms)```(\w*)\n(.*?)```").unwrap();
59        let blocks: Vec<Value> = re
60            .captures_iter(&md)
61            .map(|caps| {
62                let mut obj = indexmap::IndexMap::new();
63                obj.insert("language".to_string(), Value::String(caps[1].to_string()));
64                obj.insert("code".to_string(), Value::String(caps[2].to_string()));
65                Value::Object(obj)
66            })
67            .collect();
68        Ok(Value::Array(blocks))
69    });
70
71    rp.register_builtin("markdown.stripMarkdown", |args, _| {
72        let md = args.first().map(|v| v.to_display_string()).unwrap_or_default();
73        let mut s = md;
74        // Remove code blocks
75        let code_re = regex::Regex::new(r"(?ms)```.*?```").unwrap();
76        s = code_re.replace_all(&s, "").to_string();
77        // Remove inline code
78        let inline_re = regex::Regex::new(r"`[^`]+`").unwrap();
79        s = inline_re.replace_all(&s, "").to_string();
80        // Remove images
81        let img_re = regex::Regex::new(r"!\[([^\]]*)\]\([^)]+\)").unwrap();
82        s = img_re.replace_all(&s, "$1").to_string();
83        // Remove links but keep text
84        let link_re = regex::Regex::new(r"\[([^\]]*)\]\([^)]+\)").unwrap();
85        s = link_re.replace_all(&s, "$1").to_string();
86        // Remove headings markers
87        let h_re = regex::Regex::new(r"^#{1,6}\s+").unwrap();
88        s = s.lines().map(|l| h_re.replace(l, "").to_string()).collect::<Vec<_>>().join("\n");
89        // Remove bold/italic
90        let bold_re = regex::Regex::new(r"\*{1,3}([^*]+)\*{1,3}").unwrap();
91        s = bold_re.replace_all(&s, "$1").to_string();
92        let under_re = regex::Regex::new(r"_{1,3}([^_]+)_{1,3}").unwrap();
93        s = under_re.replace_all(&s, "$1").to_string();
94        // Remove strikethrough
95        let strike_re = regex::Regex::new(r"~~([^~]+)~~").unwrap();
96        s = strike_re.replace_all(&s, "$1").to_string();
97        Ok(Value::String(s.trim().to_string()))
98    });
99
100    rp.register_builtin("markdown.extractTodos", |args, _| {
101        let md = args.first().map(|v| v.to_display_string()).unwrap_or_default();
102        let re = regex::Regex::new(r"^-\s+\[([ xX])\]\s+(.+)$").unwrap();
103        let todos: Vec<Value> = md
104            .lines()
105            .filter_map(|line| {
106                re.captures(line.trim()).map(|caps| {
107                    let mut obj = indexmap::IndexMap::new();
108                    obj.insert("text".to_string(), Value::String(caps[2].to_string()));
109                    obj.insert("checked".to_string(), Value::Bool(&caps[1] != " "));
110                    Value::Object(obj)
111                })
112            })
113            .collect();
114        Ok(Value::Array(todos))
115    });
116
117    rp.register_builtin("markdown.tableToArray", |args, _| {
118        let md = args.first().map(|v| v.to_display_string()).unwrap_or_default();
119        let lines: Vec<&str> = md.lines().filter(|l| l.contains('|')).collect();
120        if lines.len() < 2 {
121            return Ok(Value::Array(vec![]));
122        }
123        let headers: Vec<String> = lines[0]
124            .split('|')
125            .map(|s| s.trim().to_string())
126            .filter(|s| !s.is_empty())
127            .collect();
128        // Skip separator line (line 1)
129        let rows: Vec<Value> = lines[2..]
130            .iter()
131            .map(|line| {
132                let cells: Vec<String> = line
133                    .split('|')
134                    .map(|s| s.trim().to_string())
135                    .filter(|s| !s.is_empty())
136                    .collect();
137                let mut obj = indexmap::IndexMap::new();
138                for (i, header) in headers.iter().enumerate() {
139                    obj.insert(
140                        header.clone(),
141                        Value::String(cells.get(i).cloned().unwrap_or_default()),
142                    );
143                }
144                Value::Object(obj)
145            })
146            .collect();
147        Ok(Value::Array(rows))
148    });
149
150    rp.register_builtin("markdown.wordCount", |args, _| {
151        let md = args.first().map(|v| v.to_display_string()).unwrap_or_default();
152        // Strip markdown first
153        let code_re = regex::Regex::new(r"(?ms)```.*?```").unwrap();
154        let cleaned = code_re.replace_all(&md, "");
155        let tag_re = regex::Regex::new(r"[#*_`~\[\]()!]").unwrap();
156        let stripped = tag_re.replace_all(&cleaned, "");
157        let count = stripped.split_whitespace().count();
158        Ok(Value::Number(count as f64))
159    });
160}
161
162fn markdown_to_html(md: &str) -> String {
163    let mut html = String::new();
164    let mut in_code_block = false;
165
166    for line in md.lines() {
167        if line.starts_with("```") {
168            if in_code_block {
169                html.push_str("</code></pre>\n");
170                in_code_block = false;
171            } else {
172                let lang = line.trim_start_matches('`').trim();
173                if lang.is_empty() {
174                    html.push_str("<pre><code>");
175                } else {
176                    html.push_str(&format!("<pre><code class=\"language-{}\">", lang));
177                }
178                in_code_block = true;
179            }
180            continue;
181        }
182        if in_code_block {
183            html.push_str(&escape_html_basic(line));
184            html.push('\n');
185            continue;
186        }
187
188        let mut processed = line.to_string();
189
190        // Headings
191        if let Some(caps) = regex::Regex::new(r"^(#{1,6})\s+(.+)$").unwrap().captures(&processed) {
192            let level = caps[1].len();
193            html.push_str(&format!("<h{}>{}</h{}>\n", level, &caps[2], level));
194            continue;
195        }
196
197        // Bold
198        let bold_re = regex::Regex::new(r"\*\*([^*]+)\*\*").unwrap();
199        processed = bold_re.replace_all(&processed, "<strong>$1</strong>").to_string();
200        // Italic
201        let italic_re = regex::Regex::new(r"\*([^*]+)\*").unwrap();
202        processed = italic_re.replace_all(&processed, "<em>$1</em>").to_string();
203        // Inline code
204        let code_re = regex::Regex::new(r"`([^`]+)`").unwrap();
205        processed = code_re.replace_all(&processed, "<code>$1</code>").to_string();
206        // Links
207        let link_re = regex::Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap();
208        processed = link_re.replace_all(&processed, "<a href=\"$2\">$1</a>").to_string();
209        // Images
210        let img_re = regex::Regex::new(r"!\[([^\]]*)\]\(([^)]+)\)").unwrap();
211        processed = img_re.replace_all(&processed, "<img src=\"$2\" alt=\"$1\">").to_string();
212
213        if processed.trim().is_empty() {
214            html.push('\n');
215        } else {
216            html.push_str(&format!("<p>{}</p>\n", processed));
217        }
218    }
219    html.trim().to_string()
220}
221
222fn escape_html_basic(s: &str) -> String {
223    s.replace('&', "&amp;")
224        .replace('<', "&lt;")
225        .replace('>', "&gt;")
226}