use regex::Regex;
use std::collections::HashMap;
use std::rc::Rc;
pub fn escape_html(s: &str) -> String {
s.replace('&', "&")
.replace('<', "<")
.replace('>', ">")
}
pub fn strip_html_tags(s: &str) -> String {
let re = Regex::new(r"<[^>]*>").unwrap();
re.replace_all(s, "").to_string()
}
pub fn replace_with_placeholders(
s: &str,
pattern: &str,
placeholder: &str,
) -> (String, HashMap<String, String>) {
let re = Regex::new(pattern).unwrap();
let mut placeholders = HashMap::new();
let mut counter: usize = 0;
let result = re
.replace_all(s, |caps: ®ex::Captures<'_>| {
let full = caps.get(0).unwrap().as_str().to_string();
let ph = format!("#{placeholder}{counter}#");
counter += 1;
placeholders.insert(ph.clone(), full);
ph
})
.to_string();
(result, placeholders)
}
pub fn restore_from_placeholders(s: &str, placeholders: &HashMap<String, String>) -> String {
let mut result = s.to_string();
for (ph, original) in placeholders {
result = result.replace(ph, original);
}
result
}
#[derive(Clone, Debug)]
struct ParseResult {
consumed: String,
left: String,
}
static OPEN_TAGS: &[(&str, &str)] = &[("*", "<i>"), ("**", "<b>"), ("_", "<i>"), ("__", "<b>")];
static CLOSE_TAGS: &[(&str, &str)] =
&[("*", "</i>"), ("**", "</b>"), ("_", "</i>"), ("__", "</b>")];
fn open_tag(token: &str) -> &'static str {
OPEN_TAGS
.iter()
.find(|(k, _)| *k == token)
.map(|(_, v)| *v)
.unwrap_or("")
}
fn close_tag(token: &str) -> &'static str {
CLOSE_TAGS
.iter()
.find(|(k, _)| *k == token)
.map(|(_, v)| *v)
.unwrap_or("")
}
type Parser = Rc<dyn Fn(&str) -> Vec<ParseResult>>;
fn parse_open(token: &'static str) -> Parser {
Rc::new(move |input: &str| {
if let Some(rest) = input.strip_prefix(token) {
vec![ParseResult {
consumed: open_tag(token).to_string(),
left: rest.to_string(),
}]
} else {
vec![]
}
})
}
fn parse_close(token: &'static str) -> Parser {
Rc::new(move |input: &str| {
if let Some(rest) = input.strip_prefix(token) {
vec![ParseResult {
consumed: close_tag(token).to_string(),
left: rest.to_string(),
}]
} else {
vec![]
}
})
}
fn parse_not_markdown() -> Parser {
Rc::new(|input: &str| {
for (i, ch) in input.char_indices() {
if ch == '*' || ch == '_' {
return vec![ParseResult {
consumed: input[..i].to_string(),
left: input[i..].to_string(),
}];
}
}
if !input.is_empty() {
vec![ParseResult {
consumed: input.to_string(),
left: String::new(),
}]
} else {
vec![]
}
})
}
fn parse_or(parsers: Vec<Parser>) -> Parser {
Rc::new(move |input: &str| {
let mut results = Vec::new();
for p in &parsers {
results.extend(p(input));
}
results
})
}
fn parse_and(parsers: Vec<Parser>) -> Parser {
Rc::new(move |input: &str| {
let mut results = vec![ParseResult {
consumed: String::new(),
left: input.to_string(),
}];
for p in &parsers {
let mut new_results = Vec::new();
for r in &results {
for parsed in p(&r.left) {
if !parsed.consumed.is_empty() {
new_results.push(ParseResult {
consumed: format!("{}{}", r.consumed, parsed.consumed),
left: parsed.left.clone(),
});
}
}
}
if new_results.is_empty() {
return vec![];
}
results = new_results;
}
results
})
}
fn parse_some(parser: Parser) -> Parser {
Rc::new(move |input: &str| recursive(input, &parser, 0))
}
fn recursive(input: &str, parser: &Parser, depth: usize) -> Vec<ParseResult> {
let mut results = Vec::new();
let mut empty = true;
for item in parser(input) {
if item.consumed.is_empty() {
continue;
}
empty = false;
for child in recursive(&item.left, parser, depth + 1) {
results.push(ParseResult {
consumed: format!("{}{}", item.consumed, child.consumed),
left: child.left,
});
}
}
if empty && depth != 0 {
results.push(ParseResult {
consumed: String::new(),
left: input.to_string(),
});
}
results
}
fn markdown_parser() -> Parser {
let text = parse_not_markdown();
let italic_no_bold = parse_or(vec![
parse_and(vec![
parse_open("*"),
parse_not_markdown(),
parse_close("*"),
]),
parse_and(vec![
parse_open("_"),
parse_not_markdown(),
parse_close("_"),
]),
]);
let bold = parse_or(vec![
parse_and(vec![
parse_open("**"),
parse_some(parse_or(vec![parse_not_markdown(), italic_no_bold.clone()])),
parse_close("**"),
]),
parse_and(vec![
parse_open("__"),
parse_some(parse_or(vec![parse_not_markdown(), italic_no_bold])),
parse_close("__"),
]),
]);
let italic = parse_or(vec![
parse_and(vec![
parse_open("*"),
parse_some(parse_or(vec![parse_not_markdown(), bold.clone()])),
parse_close("*"),
]),
parse_and(vec![
parse_open("_"),
parse_some(parse_or(vec![parse_not_markdown(), bold.clone()])),
parse_close("_"),
]),
]);
parse_some(parse_or(vec![bold, italic, text]))
}
pub fn markdown_to_html(md: &str) -> String {
let md_without_code = escape_html(md);
let (md_without_code, code_placeholders) =
replace_with_placeholders(&md_without_code, r"(?s)```.*?```", "c0debl0ck");
let (md_without_code, inline_placeholders) =
replace_with_placeholders(&md_without_code, r"`[^`]+`", "inl1ne");
let re_newlines = Regex::new(r"\n{2,}").unwrap();
let segments = re_newlines.split(&md_without_code);
let processed: Vec<String> = segments
.map(|segment| {
let parser = markdown_parser();
let docs = parser(segment);
if !docs.is_empty() {
format!("{}{}", docs[0].consumed, docs[0].left)
} else {
segment.to_string()
}
})
.collect();
let md_without_code = processed.join("\n\n");
let mut result = restore_from_placeholders(&md_without_code, &code_placeholders);
result = restore_from_placeholders(&result, &inline_placeholders);
let re_code_block = Regex::new(r"(?s)```(.+?)```").unwrap();
result = re_code_block
.replace_all(&result, |caps: ®ex::Captures<'_>| {
let inner = caps.get(1).unwrap().as_str().trim();
format!("<pre>{inner}</pre>")
})
.to_string();
let re_inline_code = Regex::new(r"`([^`]+?)`").unwrap();
result = re_inline_code
.replace_all(&result, "<code>$1</code>")
.to_string();
let re_header = Regex::new(r"(?m)^#+\s*(.+)").unwrap();
result = re_header.replace_all(&result, "<b>$1</b>").to_string();
result
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_escape_html() {
assert_eq!(escape_html("a & b < c > d"), "a & b < c > d");
assert_eq!(escape_html("plain"), "plain");
}
#[test]
fn test_strip_html_tags() {
assert_eq!(strip_html_tags("<b>hello</b>"), "hello");
assert_eq!(strip_html_tags("no tags"), "no tags");
assert_eq!(
strip_html_tags("<b>bold</b> and <i>italic</i>"),
"bold and italic"
);
}
#[test]
fn test_replace_and_restore_placeholders() {
let input = "some ```code``` here";
let (modified, phs) = replace_with_placeholders(input, r"(?s)```.*?```", "c0de");
assert!(modified.contains("c0de"));
let restored = restore_from_placeholders(&modified, &phs);
assert_eq!(restored, input);
}
#[test]
fn test_markdown_to_html_italic() {
let result = markdown_to_html("hello *world*");
assert!(result.contains("<i>world</i>"));
assert!(result.contains("hello"));
}
#[test]
fn test_markdown_to_html_bold() {
let result = markdown_to_html("hello **world**");
assert!(result.contains("<b>world</b>"));
}
#[test]
fn test_markdown_to_html_bold_underscore() {
let result = markdown_to_html("hello __world__");
assert!(result.contains("<b>world</b>"));
}
#[test]
fn test_markdown_to_html_italic_underscore() {
let result = markdown_to_html("hello _world_");
assert!(result.contains("<i>world</i>"));
}
#[test]
fn test_markdown_to_html_nested_bold_italic() {
let result = markdown_to_html("**bold *italic* bold**");
assert!(result.contains("<b>"));
assert!(result.contains("<i>italic</i>"));
assert!(result.contains("</b>"));
}
#[test]
fn test_markdown_to_html_code_block() {
let result = markdown_to_html("```\ncode\n```");
assert!(result.contains("<pre>code</pre>"));
}
#[test]
fn test_markdown_to_html_inline_code() {
let result = markdown_to_html("use `foo` here");
assert!(result.contains("<code>foo</code>"));
}
#[test]
fn test_markdown_to_html_header() {
let result = markdown_to_html("# Title");
assert!(result.contains("<b>Title</b>"));
}
#[test]
fn test_markdown_to_html_header_h3() {
let result = markdown_to_html("### Subtitle");
assert!(result.contains("<b>Subtitle</b>"));
}
#[test]
fn test_markdown_to_html_plain_text_unchanged() {
let result = markdown_to_html("just plain text");
assert_eq!(result, "just plain text");
}
#[test]
fn test_markdown_to_html_html_chars_escaped() {
let result = markdown_to_html("a < b & c > d");
assert!(result.contains("<"));
assert!(result.contains(">"));
assert!(result.contains("&"));
}
#[test]
fn test_markdown_to_html_mixed() {
let result = markdown_to_html("**bold** and *italic* and `code`");
assert!(result.contains("<b>bold</b>"));
assert!(result.contains("<i>italic</i>"));
assert!(result.contains("<code>code</code>"));
}
#[test]
fn test_parser_not_markdown() {
let p = parse_not_markdown();
let results = p("hello*world");
assert_eq!(results.len(), 1);
assert_eq!(results[0].consumed, "hello");
assert_eq!(results[0].left, "*world");
}
#[test]
fn test_parser_not_markdown_no_special() {
let p = parse_not_markdown();
let results = p("hello world");
assert_eq!(results.len(), 1);
assert_eq!(results[0].consumed, "hello world");
assert_eq!(results[0].left, "");
}
#[test]
fn test_parser_open_close() {
let p = parse_open("**");
let results = p("**bold**");
assert_eq!(results.len(), 1);
assert_eq!(results[0].consumed, "<b>");
assert_eq!(results[0].left, "bold**");
let p = parse_close("**");
let results = p("**rest");
assert_eq!(results.len(), 1);
assert_eq!(results[0].consumed, "</b>");
assert_eq!(results[0].left, "rest");
}
#[test]
fn test_parser_and() {
let p = parse_and(vec![
parse_open("*"),
parse_not_markdown(),
parse_close("*"),
]);
let results = p("*hello*");
assert!(!results.is_empty());
assert_eq!(results[0].consumed, "<i>hello</i>");
}
}