buup/transformers/
markdown_to_html.rs1use crate::{Transform, TransformError, TransformerCategory};
2
3#[derive(Debug, Clone, Copy, PartialEq, Eq)]
5pub struct MarkdownToHtml;
6
7impl Transform for MarkdownToHtml {
8 fn name(&self) -> &'static str {
9 "Markdown to HTML"
10 }
11
12 fn id(&self) -> &'static str {
13 "markdowntohtml"
14 }
15
16 fn category(&self) -> TransformerCategory {
17 TransformerCategory::Formatter
18 }
19
20 fn description(&self) -> &'static str {
21 "Converts Markdown text to HTML format"
22 }
23
24 fn transform(&self, input: &str) -> Result<String, TransformError> {
25 let mut html = String::new();
26 let mut in_code_block = false;
27 let mut code_language = String::new();
28 let mut in_list = false;
29 let mut in_ordered_list = false;
30 let mut in_blockquote = false;
31 let lines = input.lines();
32
33 for line in lines {
34 if line.trim().starts_with("```") {
36 if in_code_block {
37 html.push_str("</code></pre>\n");
38 in_code_block = false;
39 code_language.clear();
40 } else {
41 in_code_block = true;
42 code_language.clear();
43 let language_start = line.trim_start().chars().skip(3).collect::<String>();
45 if !language_start.is_empty() {
46 code_language = language_start.trim().to_string();
47 if !code_language.is_empty() {
48 html.push_str(&format!(
49 "<pre><code class=\"language-{}\">",
50 code_language
51 ));
52 } else {
53 html.push_str("<pre><code>");
54 }
55 } else {
56 html.push_str("<pre><code>");
57 }
58 }
59 continue;
60 }
61
62 if in_code_block {
63 html.push_str(&line.replace('<', "<").replace('>', ">"));
64 html.push('\n');
65 continue;
66 }
67
68 if line.trim() == "---" || line.trim() == "***" || line.trim() == "___" {
70 html.push_str("<hr>\n");
71 continue;
72 }
73
74 if line.trim().starts_with('>') {
76 if !in_blockquote {
77 html.push_str("<blockquote>\n");
78 in_blockquote = true;
79 }
80 let content = line.trim()[1..].trim_start();
81 let processed_content = process_inline_markdown(content);
82 html.push_str(&format!("<p>{}</p>\n", processed_content));
83 continue;
84 } else if in_blockquote && line.trim().is_empty() {
85 html.push_str("</blockquote>\n");
86 in_blockquote = false;
87 continue;
88 }
89
90 let level = line.chars().take_while(|&c| c == '#').count();
92 if level > 0 && level <= 6 && line.chars().nth(level) == Some(' ') {
93 let content = line[level..].trim();
94 let processed_content = process_inline_markdown(content);
95 html.push_str(&format!("<h{}>{}</h{}>\n", level, processed_content, level));
96 continue;
97 }
98
99 if let Some(content) = line.trim().strip_prefix("1. ") {
101 if !in_ordered_list {
102 if in_list {
103 html.push_str("</ul>\n");
104 in_list = false;
105 }
106 html.push_str("<ol>\n");
107 in_ordered_list = true;
108 }
109 let processed_content = process_inline_markdown(content);
110 html.push_str(&format!("<li>{}</li>\n", processed_content));
111 continue;
112 } else if in_ordered_list && line.trim().len() >= 3 {
113 let parts: Vec<&str> = line.trim().splitn(2, ". ").collect();
115 if parts.len() == 2 && parts[0].parse::<usize>().is_ok() {
116 let processed_content = process_inline_markdown(parts[1]);
117 html.push_str(&format!("<li>{}</li>\n", processed_content));
118 continue;
119 } else if in_ordered_list {
120 html.push_str("</ol>\n");
121 in_ordered_list = false;
122 }
123 } else if in_ordered_list && line.trim().is_empty() {
124 html.push_str("</ol>\n");
125 in_ordered_list = false;
126 }
127
128 if line.trim().starts_with("- ") || line.trim().starts_with("* ") {
130 if !in_list {
131 if in_ordered_list {
132 html.push_str("</ol>\n");
133 in_ordered_list = false;
134 }
135 html.push_str("<ul>\n");
136 in_list = true;
137 }
138 let marker_len = 2; let content = line.trim()[marker_len..].trim();
140 let processed_content = process_inline_markdown(content);
141 html.push_str(&format!("<li>{}</li>\n", processed_content));
142 continue;
143 } else if in_list && line.trim().is_empty() {
144 html.push_str("</ul>\n");
145 in_list = false;
146 continue;
147 }
148
149 if !line.trim().is_empty() {
151 let processed_line = process_inline_markdown(line);
152
153 if !processed_line.starts_with("<h")
155 && !processed_line.starts_with("<ul")
156 && !processed_line.starts_with("<ol")
157 && !processed_line.starts_with("<li")
158 && !processed_line.starts_with("<blockquote")
159 {
160 html.push_str("<p>");
161 html.push_str(&processed_line);
162 html.push_str("</p>\n");
163 } else {
164 html.push_str(&processed_line);
165 html.push('\n');
166 }
167 } else if !in_list && !in_ordered_list && !in_blockquote && !line.trim().is_empty() {
168 html.push('\n');
169 }
170 }
171
172 if in_list {
174 html.push_str("</ul>\n");
175 }
176 if in_ordered_list {
177 html.push_str("</ol>\n");
178 }
179 if in_blockquote {
180 html.push_str("</blockquote>\n");
181 }
182 if in_code_block {
183 html.push_str("</code></pre>\n");
184 }
185
186 Ok(html)
187 }
188
189 fn default_test_input(&self) -> &'static str {
190 "# Hello World\n\nThis is a **bold** and *italic* text with ~~strikethrough~~ and `inline code`.\n\n- List item 1\n- List item 2\n\n1. Ordered item 1\n2. Ordered item 2\n\n> This is a blockquote\n\n[Link text](https://example.com)\n\n---\n\n```rust\nfn main() {\n println!(\"Hello, world!\");\n}\n```"
191 }
192}
193
194fn process_inline_markdown(input: &str) -> String {
196 let mut result = input.to_string();
197
198 while let Some(start) = result.find('`') {
200 if let Some(end) = result[start + 1..].find('`') {
201 let code_content = &result[start + 1..start + 1 + end];
202 let code_html = format!("<code>{}</code>", code_content);
203 result.replace_range(start..=start + 1 + end, &code_html);
204 } else {
205 break;
206 }
207 }
208
209 while let Some(start) = result.find("**") {
211 if let Some(end) = result[start + 2..].find("**") {
212 let bold_content = &result[start + 2..start + 2 + end];
213 let bold_html = format!("<strong>{}</strong>", bold_content);
214 result.replace_range(start..=start + 2 + end + 1, &bold_html);
215 } else {
216 break;
217 }
218 }
219
220 while let Some(start) = result.find('*') {
222 if let Some(end) = result[start + 1..].find('*') {
223 let italic_content = &result[start + 1..start + 1 + end];
224 let italic_html = format!("<em>{}</em>", italic_content);
225 result.replace_range(start..=start + 1 + end, &italic_html);
226 } else {
227 break;
228 }
229 }
230
231 while let Some(start) = result.find("~~") {
233 if let Some(end) = result[start + 2..].find("~~") {
234 let strike_content = &result[start + 2..start + 2 + end];
235 let strike_html = format!("<del>{}</del>", strike_content);
236 result.replace_range(start..=start + 2 + end + 1, &strike_html);
237 } else {
238 break;
239 }
240 }
241
242 while let Some(start) = result.find('[') {
244 if let Some(text_end) = result[start..].find(']') {
245 let text_end = start + text_end;
246 if result.len() > text_end + 1 && result.as_bytes()[text_end + 1] == b'(' {
247 if let Some(url_end) = result[text_end + 1..].find(')') {
248 let url_end = text_end + 1 + url_end;
249 let link_text = &result[start + 1..text_end];
250 let url = &result[text_end + 2..url_end];
251 let link_html = format!("<a href=\"{}\">{}</a>", url, link_text);
252 result.replace_range(start..=url_end, &link_html);
253 } else {
254 break;
255 }
256 } else {
257 break;
258 }
259 } else {
260 break;
261 }
262 }
263
264 let mut final_result = String::new();
266 let mut i = 0;
267 let bytes = result.as_bytes();
268
269 while i < bytes.len() {
270 if bytes[i] == b'<' && i + 1 < bytes.len() {
272 if is_start_of_html_tag(&bytes[i + 1..]) {
273 final_result.push('<');
275 i += 1;
276
277 while i < bytes.len() && bytes[i] != b'>' {
279 final_result.push(bytes[i] as char);
280 i += 1;
281 }
282
283 if i < bytes.len() {
284 final_result.push('>');
285 i += 1;
286 }
287 } else {
288 final_result.push_str("<");
290 i += 1;
291 }
292 } else if bytes[i] == b'>' && (i == 0 || bytes[i - 1] != b'/') {
293 let preceding_is_tag = i >= 2 && bytes[i - 1] == b'/' && bytes[i - 2] == b'<';
295 if !preceding_is_tag {
296 final_result.push_str(">");
297 } else {
298 final_result.push('>');
299 }
300 i += 1;
301 } else {
302 final_result.push(bytes[i] as char);
303 i += 1;
304 }
305 }
306
307 final_result
308}
309
310fn is_start_of_html_tag(bytes: &[u8]) -> bool {
312 let html_tags = &[
313 b"a " as &[u8],
314 b"a>" as &[u8],
315 b"a href" as &[u8],
316 b"/a>" as &[u8],
317 b"strong" as &[u8],
318 b"/strong" as &[u8],
319 b"em" as &[u8],
320 b"/em" as &[u8],
321 b"del" as &[u8],
322 b"/del" as &[u8],
323 b"code" as &[u8],
324 b"/code" as &[u8],
325 b"p>" as &[u8],
326 b"/p>" as &[u8],
327 ];
328
329 for &tag in html_tags {
330 if bytes.len() >= tag.len() && bytes[..tag.len()] == *tag {
331 return true;
332 }
333 }
334 false
335}
336
337#[cfg(test)]
338mod tests {
339 use super::*;
340
341 #[test]
342 fn test_markdown_to_html() {
343 let transformer = MarkdownToHtml;
344 let input = "# Title\n\nThis is **bold** and *italic*.\n\n- Item 1\n- Item 2\n\n[Link](https://example.com)";
345 let expected = "<h1>Title</h1>\n<p>This is <strong>bold</strong> and <em>italic</em>.</p>\n<ul>\n<li>Item 1</li>\n<li>Item 2</li>\n</ul>\n<p><a href=\"https://example.com\">Link</a></p>\n";
346 assert_eq!(transformer.transform(input).unwrap(), expected);
347 }
348
349 #[test]
350 fn test_code_block() {
351 let transformer = MarkdownToHtml;
352 let input = "```\ncode here\n```";
353 let expected = "<pre><code>code here\n</code></pre>\n";
354 assert_eq!(transformer.transform(input).unwrap(), expected);
355 }
356
357 #[test]
358 fn test_code_block_with_language() {
359 let transformer = MarkdownToHtml;
360 let input = "```rust\nfn main() {\n println!(\"Hello!\");\n}\n```";
361 let expected = "<pre><code class=\"language-rust\">fn main() {\n println!(\"Hello!\");\n}\n</code></pre>\n";
362 assert_eq!(transformer.transform(input).unwrap(), expected);
363 }
364
365 #[test]
366 fn test_ordered_list() {
367 let transformer = MarkdownToHtml;
368 let input = "1. First item\n2. Second item";
369 let expected = "<ol>\n<li>First item</li>\n<li>Second item</li>\n</ol>\n";
370 assert_eq!(transformer.transform(input).unwrap(), expected);
371 }
372
373 #[test]
374 fn test_blockquote() {
375 let transformer = MarkdownToHtml;
376 let input = "> This is a quote";
377 let expected = "<blockquote>\n<p>This is a quote</p>\n</blockquote>\n";
378 assert_eq!(transformer.transform(input).unwrap(), expected);
379 }
380
381 #[test]
382 fn test_horizontal_rule() {
383 let transformer = MarkdownToHtml;
384 let input = "Before\n\n---\n\nAfter";
385 let expected = "<p>Before</p>\n<hr>\n<p>After</p>\n";
386 assert_eq!(transformer.transform(input).unwrap(), expected);
387 }
388
389 #[test]
390 fn test_strikethrough() {
391 let transformer = MarkdownToHtml;
392 let input = "This is ~~strikethrough~~ text";
393 let expected = "<p>This is <del>strikethrough</del> text</p>\n";
394 assert_eq!(transformer.transform(input).unwrap(), expected);
395 }
396
397 #[test]
398 fn test_inline_code() {
399 let transformer = MarkdownToHtml;
400 let input = "This is `inline code` text";
401 let expected = "<p>This is <code>inline code</code> text</p>\n";
402 assert_eq!(transformer.transform(input).unwrap(), expected);
403 }
404}