1use regex::Regex;
11use std::collections::HashMap;
12use std::rc::Rc;
13
14pub fn escape_html(s: &str) -> String {
20 s.replace('&', "&")
21 .replace('<', "<")
22 .replace('>', ">")
23}
24
25pub fn strip_html_tags(s: &str) -> String {
27 let re = Regex::new(r"<[^>]*>").unwrap();
28 re.replace_all(s, "").to_string()
29}
30
31pub fn replace_with_placeholders(
34 s: &str,
35 pattern: &str,
36 placeholder: &str,
37) -> (String, HashMap<String, String>) {
38 let re = Regex::new(pattern).unwrap();
39 let mut placeholders = HashMap::new();
40 let mut counter: usize = 0;
41
42 let result = re
43 .replace_all(s, |caps: ®ex::Captures<'_>| {
44 let full = caps.get(0).unwrap().as_str().to_string();
45 let ph = format!("#{placeholder}{counter}#");
46 counter += 1;
47 placeholders.insert(ph.clone(), full);
48 ph
49 })
50 .to_string();
51
52 (result, placeholders)
53}
54
55pub fn restore_from_placeholders(s: &str, placeholders: &HashMap<String, String>) -> String {
57 let mut result = s.to_string();
58 for (ph, original) in placeholders {
59 result = result.replace(ph, original);
60 }
61 result
62}
63
64#[derive(Clone, Debug)]
71struct ParseResult {
72 consumed: String,
73 left: String,
74}
75
76static OPEN_TAGS: &[(&str, &str)] = &[("*", "<i>"), ("**", "<b>"), ("_", "<i>"), ("__", "<b>")];
78
79static CLOSE_TAGS: &[(&str, &str)] =
81 &[("*", "</i>"), ("**", "</b>"), ("_", "</i>"), ("__", "</b>")];
82
83fn open_tag(token: &str) -> &'static str {
84 OPEN_TAGS
85 .iter()
86 .find(|(k, _)| *k == token)
87 .map(|(_, v)| *v)
88 .unwrap_or("")
89}
90
91fn close_tag(token: &str) -> &'static str {
92 CLOSE_TAGS
93 .iter()
94 .find(|(k, _)| *k == token)
95 .map(|(_, v)| *v)
96 .unwrap_or("")
97}
98
99type Parser = Rc<dyn Fn(&str) -> Vec<ParseResult>>;
101
102fn parse_open(token: &'static str) -> Parser {
105 Rc::new(move |input: &str| {
106 if let Some(rest) = input.strip_prefix(token) {
107 vec![ParseResult {
108 consumed: open_tag(token).to_string(),
109 left: rest.to_string(),
110 }]
111 } else {
112 vec![]
113 }
114 })
115}
116
117fn parse_close(token: &'static str) -> Parser {
120 Rc::new(move |input: &str| {
121 if let Some(rest) = input.strip_prefix(token) {
122 vec![ParseResult {
123 consumed: close_tag(token).to_string(),
124 left: rest.to_string(),
125 }]
126 } else {
127 vec![]
128 }
129 })
130}
131
132fn parse_not_markdown() -> Parser {
134 Rc::new(|input: &str| {
135 for (i, ch) in input.char_indices() {
136 if ch == '*' || ch == '_' {
137 return vec![ParseResult {
138 consumed: input[..i].to_string(),
139 left: input[i..].to_string(),
140 }];
141 }
142 }
143 if !input.is_empty() {
144 vec![ParseResult {
145 consumed: input.to_string(),
146 left: String::new(),
147 }]
148 } else {
149 vec![]
150 }
151 })
152}
153
154fn parse_or(parsers: Vec<Parser>) -> Parser {
156 Rc::new(move |input: &str| {
157 let mut results = Vec::new();
158 for p in &parsers {
159 results.extend(p(input));
160 }
161 results
162 })
163}
164
165fn parse_and(parsers: Vec<Parser>) -> Parser {
167 Rc::new(move |input: &str| {
168 let mut results = vec![ParseResult {
169 consumed: String::new(),
170 left: input.to_string(),
171 }];
172
173 for p in &parsers {
174 let mut new_results = Vec::new();
175 for r in &results {
176 for parsed in p(&r.left) {
177 if !parsed.consumed.is_empty() {
178 new_results.push(ParseResult {
179 consumed: format!("{}{}", r.consumed, parsed.consumed),
180 left: parsed.left.clone(),
181 });
182 }
183 }
184 }
185 if new_results.is_empty() {
186 return vec![];
187 }
188 results = new_results;
189 }
190 results
191 })
192}
193
194fn parse_some(parser: Parser) -> Parser {
196 Rc::new(move |input: &str| recursive(input, &parser, 0))
197}
198
199fn recursive(input: &str, parser: &Parser, depth: usize) -> Vec<ParseResult> {
200 let mut results = Vec::new();
201 let mut empty = true;
202
203 for item in parser(input) {
204 if item.consumed.is_empty() {
205 continue;
206 }
207 empty = false;
208 for child in recursive(&item.left, parser, depth + 1) {
209 results.push(ParseResult {
210 consumed: format!("{}{}", item.consumed, child.consumed),
211 left: child.left,
212 });
213 }
214 }
215
216 if empty && depth != 0 {
217 results.push(ParseResult {
218 consumed: String::new(),
219 left: input.to_string(),
220 });
221 }
222
223 results
224}
225
226fn markdown_parser() -> Parser {
233 let text = parse_not_markdown();
235
236 let italic_no_bold = parse_or(vec![
241 parse_and(vec![
242 parse_open("*"),
243 parse_not_markdown(),
244 parse_close("*"),
245 ]),
246 parse_and(vec![
247 parse_open("_"),
248 parse_not_markdown(),
249 parse_close("_"),
250 ]),
251 ]);
252
253 let bold = parse_or(vec![
258 parse_and(vec![
259 parse_open("**"),
260 parse_some(parse_or(vec![parse_not_markdown(), italic_no_bold.clone()])),
261 parse_close("**"),
262 ]),
263 parse_and(vec![
264 parse_open("__"),
265 parse_some(parse_or(vec![parse_not_markdown(), italic_no_bold])),
266 parse_close("__"),
267 ]),
268 ]);
269
270 let italic = parse_or(vec![
275 parse_and(vec![
276 parse_open("*"),
277 parse_some(parse_or(vec![parse_not_markdown(), bold.clone()])),
278 parse_close("*"),
279 ]),
280 parse_and(vec![
281 parse_open("_"),
282 parse_some(parse_or(vec![parse_not_markdown(), bold.clone()])),
283 parse_close("_"),
284 ]),
285 ]);
286
287 parse_some(parse_or(vec![bold, italic, text]))
290}
291
292pub fn markdown_to_html(md: &str) -> String {
301 let md_without_code = escape_html(md);
302
303 let (md_without_code, code_placeholders) =
305 replace_with_placeholders(&md_without_code, r"(?s)```.*?```", "c0debl0ck");
306 let (md_without_code, inline_placeholders) =
307 replace_with_placeholders(&md_without_code, r"`[^`]+`", "inl1ne");
308
309 let re_newlines = Regex::new(r"\n{2,}").unwrap();
311 let segments = re_newlines.split(&md_without_code);
312 let processed: Vec<String> = segments
313 .map(|segment| {
314 let parser = markdown_parser();
315 let docs = parser(segment);
316 if !docs.is_empty() {
317 format!("{}{}", docs[0].consumed, docs[0].left)
318 } else {
319 segment.to_string()
320 }
321 })
322 .collect();
323 let md_without_code = processed.join("\n\n");
324
325 let mut result = restore_from_placeholders(&md_without_code, &code_placeholders);
327 result = restore_from_placeholders(&result, &inline_placeholders);
328
329 let re_code_block = Regex::new(r"(?s)```(.+?)```").unwrap();
331 result = re_code_block
332 .replace_all(&result, |caps: ®ex::Captures<'_>| {
333 let inner = caps.get(1).unwrap().as_str().trim();
334 format!("<pre>{inner}</pre>")
335 })
336 .to_string();
337
338 let re_inline_code = Regex::new(r"`([^`]+?)`").unwrap();
340 result = re_inline_code
341 .replace_all(&result, "<code>$1</code>")
342 .to_string();
343
344 let re_header = Regex::new(r"(?m)^#+\s*(.+)").unwrap();
346 result = re_header.replace_all(&result, "<b>$1</b>").to_string();
347
348 result
349}
350
351#[cfg(test)]
356mod tests {
357 use super::*;
358
359 #[test]
360 fn test_escape_html() {
361 assert_eq!(escape_html("a & b < c > d"), "a & b < c > d");
362 assert_eq!(escape_html("plain"), "plain");
363 }
364
365 #[test]
366 fn test_strip_html_tags() {
367 assert_eq!(strip_html_tags("<b>hello</b>"), "hello");
368 assert_eq!(strip_html_tags("no tags"), "no tags");
369 assert_eq!(
370 strip_html_tags("<b>bold</b> and <i>italic</i>"),
371 "bold and italic"
372 );
373 }
374
375 #[test]
376 fn test_replace_and_restore_placeholders() {
377 let input = "some ```code``` here";
378 let (modified, phs) = replace_with_placeholders(input, r"(?s)```.*?```", "c0de");
379 assert!(modified.contains("c0de"));
380 let restored = restore_from_placeholders(&modified, &phs);
381 assert_eq!(restored, input);
382 }
383
384 #[test]
385 fn test_markdown_to_html_italic() {
386 let result = markdown_to_html("hello *world*");
387 assert!(result.contains("<i>world</i>"));
388 assert!(result.contains("hello"));
389 }
390
391 #[test]
392 fn test_markdown_to_html_bold() {
393 let result = markdown_to_html("hello **world**");
394 assert!(result.contains("<b>world</b>"));
395 }
396
397 #[test]
398 fn test_markdown_to_html_bold_underscore() {
399 let result = markdown_to_html("hello __world__");
400 assert!(result.contains("<b>world</b>"));
401 }
402
403 #[test]
404 fn test_markdown_to_html_italic_underscore() {
405 let result = markdown_to_html("hello _world_");
406 assert!(result.contains("<i>world</i>"));
407 }
408
409 #[test]
410 fn test_markdown_to_html_nested_bold_italic() {
411 let result = markdown_to_html("**bold *italic* bold**");
412 assert!(result.contains("<b>"));
413 assert!(result.contains("<i>italic</i>"));
414 assert!(result.contains("</b>"));
415 }
416
417 #[test]
418 fn test_markdown_to_html_code_block() {
419 let result = markdown_to_html("```\ncode\n```");
420 assert!(result.contains("<pre>code</pre>"));
421 }
422
423 #[test]
424 fn test_markdown_to_html_inline_code() {
425 let result = markdown_to_html("use `foo` here");
426 assert!(result.contains("<code>foo</code>"));
427 }
428
429 #[test]
430 fn test_markdown_to_html_header() {
431 let result = markdown_to_html("# Title");
432 assert!(result.contains("<b>Title</b>"));
433 }
434
435 #[test]
436 fn test_markdown_to_html_header_h3() {
437 let result = markdown_to_html("### Subtitle");
438 assert!(result.contains("<b>Subtitle</b>"));
439 }
440
441 #[test]
442 fn test_markdown_to_html_plain_text_unchanged() {
443 let result = markdown_to_html("just plain text");
444 assert_eq!(result, "just plain text");
445 }
446
447 #[test]
448 fn test_markdown_to_html_html_chars_escaped() {
449 let result = markdown_to_html("a < b & c > d");
450 assert!(result.contains("<"));
451 assert!(result.contains(">"));
452 assert!(result.contains("&"));
453 }
454
455 #[test]
456 fn test_markdown_to_html_mixed() {
457 let result = markdown_to_html("**bold** and *italic* and `code`");
458 assert!(result.contains("<b>bold</b>"));
459 assert!(result.contains("<i>italic</i>"));
460 assert!(result.contains("<code>code</code>"));
461 }
462
463 #[test]
464 fn test_parser_not_markdown() {
465 let p = parse_not_markdown();
466 let results = p("hello*world");
467 assert_eq!(results.len(), 1);
468 assert_eq!(results[0].consumed, "hello");
469 assert_eq!(results[0].left, "*world");
470 }
471
472 #[test]
473 fn test_parser_not_markdown_no_special() {
474 let p = parse_not_markdown();
475 let results = p("hello world");
476 assert_eq!(results.len(), 1);
477 assert_eq!(results[0].consumed, "hello world");
478 assert_eq!(results[0].left, "");
479 }
480
481 #[test]
482 fn test_parser_open_close() {
483 let p = parse_open("**");
484 let results = p("**bold**");
485 assert_eq!(results.len(), 1);
486 assert_eq!(results[0].consumed, "<b>");
487 assert_eq!(results[0].left, "bold**");
488
489 let p = parse_close("**");
490 let results = p("**rest");
491 assert_eq!(results.len(), 1);
492 assert_eq!(results[0].consumed, "</b>");
493 assert_eq!(results[0].left, "rest");
494 }
495
496 #[test]
497 fn test_parser_and() {
498 let p = parse_and(vec![
499 parse_open("*"),
500 parse_not_markdown(),
501 parse_close("*"),
502 ]);
503 let results = p("*hello*");
504 assert!(!results.is_empty());
505 assert_eq!(results[0].consumed, "<i>hello</i>");
506 }
507}