tetratto_shared/
markdown.rs

1use ammonia::Builder;
2use pulldown_cmark::{Parser, Options, html::push_html};
3use std::collections::HashSet;
4
5pub fn render_markdown_dirty(input: &str) -> String {
6    let input = &autolinks(&parse_alignment(&parse_backslash_breaks(input)));
7
8    let mut options = Options::empty();
9    options.insert(Options::ENABLE_STRIKETHROUGH);
10    options.insert(Options::ENABLE_GFM);
11    options.insert(Options::ENABLE_FOOTNOTES);
12    options.insert(Options::ENABLE_TABLES);
13    options.insert(Options::ENABLE_HEADING_ATTRIBUTES);
14    options.insert(Options::ENABLE_SUBSCRIPT);
15    options.insert(Options::ENABLE_SUPERSCRIPT);
16
17    let parser = Parser::new_ext(input, options);
18
19    let mut html = String::new();
20    push_html(&mut html, parser);
21
22    html
23}
24
25pub fn clean_html(html: String, allowed_attributes: HashSet<&str>) -> String {
26    Builder::default()
27        .generic_attributes(allowed_attributes)
28        .add_tags(&[
29            "video", "source", "img", "b", "span", "p", "i", "strong", "em", "a", "align",
30        ])
31        .rm_tags(&["script", "style", "link", "canvas"])
32        .add_tag_attributes("a", &["href", "target"])
33        .add_url_schemes(&["atto"])
34        .clean(&html.replace("<video ", "<video controls "))
35        .to_string()
36}
37
38/// Render markdown input into HTML
39pub fn render_markdown(input: &str, proxy_images: bool) -> String {
40    let html = render_markdown_dirty(input);
41
42    let mut allowed_attributes = HashSet::new();
43    allowed_attributes.insert("id");
44    allowed_attributes.insert("class");
45    allowed_attributes.insert("ref");
46    allowed_attributes.insert("aria-label");
47    allowed_attributes.insert("lang");
48    allowed_attributes.insert("title");
49    allowed_attributes.insert("align");
50    allowed_attributes.insert("src");
51
52    let output = clean_html(html, allowed_attributes);
53
54    if proxy_images {
55        output.replace(
56            "src=\"http",
57            "loading=\"lazy\" src=\"/api/v1/util/proxy?url=http",
58        )
59    } else {
60        output
61    }
62}
63
64fn parse_alignment_line(line: &str, output: &mut String, buffer: &mut String, is_in_pre: bool) {
65    if is_in_pre {
66        output.push_str(&format!("{line}\n"));
67        return;
68    }
69
70    let mut is_alignment_waiting: bool = false;
71    let mut alignment_center: bool = false;
72    let mut has_dash: bool = false;
73    let mut escape: bool = false;
74
75    for char in line.chars() {
76        if alignment_center && char != '-' {
77            // last char was <, but we didn't receive a hyphen directly after
78            alignment_center = false;
79            buffer.push('<');
80        }
81
82        if has_dash && char != '>' {
83            // the last char was -, meaning we need to flip has_dash and push the char since we haven't used it
84            has_dash = false;
85            buffer.push('-');
86        }
87
88        match char {
89            '\\' => {
90                escape = true;
91                continue;
92            }
93            '-' => {
94                if escape {
95                    buffer.push(char);
96                    escape = false;
97                    continue;
98                }
99
100                if alignment_center && is_alignment_waiting {
101                    // this means the previous element was <, so we're wrapping up alignment now
102                    alignment_center = false;
103                    is_alignment_waiting = false;
104                    output.push_str(&format!("<align class=\"center\">{buffer}</align>"));
105                    buffer.clear();
106                    continue;
107                }
108
109                has_dash = true;
110
111                if !is_alignment_waiting {
112                    // we need to go ahead and push/clear the buffer so we don't capture the stuff that came before this
113                    // this only needs to be done on the first of these for a single alignment block
114                    output.push_str(&buffer);
115                    buffer.clear();
116                }
117            }
118            '<' => {
119                if escape {
120                    buffer.push(char);
121                    escape = false;
122                    continue;
123                }
124
125                alignment_center = true;
126                continue;
127            }
128            '>' => {
129                if escape {
130                    buffer.push(char);
131                    escape = false;
132                    continue;
133                }
134
135                if has_dash {
136                    has_dash = false;
137
138                    // if we're already waiting for aligmment, this means this is the SECOND aligner arrow
139                    if is_alignment_waiting {
140                        is_alignment_waiting = false;
141                        output.push_str(&format!("<align class=\"right\">{buffer}</align>"));
142                        buffer.clear();
143                        continue;
144                    }
145
146                    // we're now waiting for the next aligner
147                    is_alignment_waiting = true;
148                    continue;
149                } else {
150                    buffer.push('>');
151                }
152            }
153            _ => buffer.push(char),
154        }
155
156        escape = false;
157    }
158
159    output.push_str(&format!("{buffer}\n"));
160    buffer.clear();
161}
162
163pub fn parse_alignment(input: &str) -> String {
164    let lines = input.split("\n");
165
166    let mut is_in_pre: bool = false;
167    let mut output = String::new();
168    let mut buffer = String::new();
169
170    for line in lines {
171        if line.starts_with("```") {
172            is_in_pre = !is_in_pre;
173            output.push_str(&format!("{line}\n"));
174        } else {
175            parse_alignment_line(line, &mut output, &mut buffer, is_in_pre)
176        }
177    }
178
179    output.push_str(&buffer);
180    output
181}
182
183pub fn parse_backslash_breaks(input: &str) -> String {
184    let mut in_pre_block = false;
185    let mut output = String::new();
186
187    for line in input.split("\n") {
188        if line.starts_with("```") {
189            in_pre_block = !in_pre_block;
190            output.push_str(&format!("{line}\n"));
191            continue;
192        }
193
194        if in_pre_block {
195            output.push_str(&format!("{line}\n"));
196            continue;
197        }
198
199        if line.trim_end().ends_with("\\") {
200            output.push_str(&format!("{line}<br />\n"));
201        } else {
202            output.push_str(&format!("{line}\n"));
203        }
204    }
205
206    output
207}
208
209/// Adapted from <https://git.cypr.io/oz/autolink-rust>.
210///
211/// The only real change here is that autolinks require a whitespace OR end the
212/// end of the pattern to match here.
213pub fn autolinks(input: &str) -> String {
214    if input.len() == 0 {
215        return String::new();
216    }
217
218    let pattern = regex::Regex::new(
219        r"(?ix)\b(([\w-]+://?|www[.])[^\s()<>]+(?:\([\w\d]+\)|([^[:punct:]\s]|/)))(\s|$)",
220    )
221    .unwrap();
222
223    pattern
224        .replace_all(input, "<a href=\"$0\">$0</a> ")
225        .to_string()
226}