umark_lib/
lib.rs

1use std::collections::{HashMap, HashSet};
2use std::error::Error;
3use std::fmt;
4use std::fs;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum MarkdownFlavor {
8    CommonMark,
9    Gfm,
10}
11
12#[derive(Debug)]
13struct MarkdownSecurityError;
14
15impl fmt::Display for MarkdownSecurityError {
16    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
17        write!(f, "raw html tag is not allowed in safe_parse")
18    }
19}
20
21impl Error for MarkdownSecurityError {}
22
23const RAW_HTML_OMITTED_MARKER: &str = "<!-- raw HTML omitted -->";
24const MERMAID_BOOTSTRAP: &str = "<script src=\"https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.min.js\"></script>\n<script>if (typeof mermaid !== \"undefined\") { mermaid.initialize({ startOnLoad: true }); }</script>\n";
25
26#[derive(Debug, Clone, Copy)]
27struct ParserConfig {
28    omit_raw_html: bool,
29    enable_tables: bool,
30    enable_task_list: bool,
31    enable_strikethrough: bool,
32    enable_autolink_literals: bool,
33    enable_footnotes: bool,
34    enable_charts: bool,
35}
36
37impl ParserConfig {
38    fn from_flavor(flavor: MarkdownFlavor) -> Self {
39        match flavor {
40            MarkdownFlavor::CommonMark => Self {
41                omit_raw_html: false,
42                enable_tables: false,
43                enable_task_list: false,
44                enable_strikethrough: false,
45                enable_autolink_literals: false,
46                enable_footnotes: false,
47                enable_charts: false,
48            },
49            MarkdownFlavor::Gfm => Self {
50                omit_raw_html: false,
51                enable_tables: true,
52                enable_task_list: true,
53                enable_strikethrough: true,
54                enable_autolink_literals: true,
55                enable_footnotes: true,
56                enable_charts: true,
57            },
58        }
59    }
60
61    fn with_raw_html_omitted(mut self) -> Self {
62        self.omit_raw_html = true;
63        self
64    }
65}
66
67#[derive(Default, Clone)]
68struct DefinitionStore {
69    links: HashMap<String, String>,
70    footnotes: HashMap<String, String>,
71    skip_lines: HashSet<usize>,
72}
73
74struct Parser<'a> {
75    lines: Vec<&'a str>,
76    defs: DefinitionStore,
77    footnote_order: Vec<String>,
78    config: ParserConfig,
79}
80
81/// Parse markdown with default flavor (GFM) and return HTML.
82pub fn parse(input: &str) -> String {
83    parse_with_flavor(input, MarkdownFlavor::Gfm)
84}
85
86/// Parse markdown using the requested flavor and return HTML.
87pub fn parse_with_flavor(input: &str, flavor: MarkdownFlavor) -> String {
88    parse_internal(input, ParserConfig::from_flavor(flavor))
89}
90
91pub fn safe_parse(input: &str) -> Result<String, Box<dyn Error>> {
92    safe_parse_with_flavor(input, MarkdownFlavor::Gfm)
93}
94
95pub fn safe_parse_with_flavor(
96    input: &str,
97    flavor: MarkdownFlavor,
98) -> Result<String, Box<dyn Error>> {
99    reject_script_tag(input)?;
100    let rendered = parse_internal(
101        input,
102        ParserConfig::from_flavor(flavor).with_raw_html_omitted(),
103    );
104    if rendered.contains(RAW_HTML_OMITTED_MARKER) {
105        return Err(Box::new(MarkdownSecurityError));
106    }
107    Ok(rendered)
108}
109
110pub fn parse_from_file(path: &str, output_path: &str) -> Result<(), Box<dyn Error>> {
111    parse_from_file_with_flavor(path, output_path, MarkdownFlavor::Gfm)
112}
113
114pub fn parse_from_file_with_flavor(
115    path: &str,
116    output_path: &str,
117    flavor: MarkdownFlavor,
118) -> Result<(), Box<dyn Error>> {
119    let content = fs::read_to_string(path)?;
120    let rendered = parse_with_flavor(&content, flavor);
121    let rendered = with_chart_runtime_if_needed(rendered, flavor);
122    fs::write(output_path, rendered)?;
123    Ok(())
124}
125
126pub fn safe_parse_from_file(path: &str, output_path: &str) -> Result<(), Box<dyn Error>> {
127    safe_parse_from_file_with_flavor(path, output_path, MarkdownFlavor::Gfm)
128}
129
130pub fn safe_parse_from_file_with_flavor(
131    path: &str,
132    output_path: &str,
133    flavor: MarkdownFlavor,
134) -> Result<(), Box<dyn Error>> {
135    let content = fs::read_to_string(path)?;
136    let rendered = safe_parse_with_flavor(&content, flavor)?;
137    fs::write(output_path, rendered)?;
138    Ok(())
139}
140
141fn parse_internal(input: &str, config: ParserConfig) -> String {
142    let normalized = normalize_newlines(input);
143    let lines: Vec<&str> = normalized.lines().collect();
144    let defs = collect_definitions(&lines, config);
145    let mut parser = Parser {
146        lines,
147        defs,
148        footnote_order: Vec::new(),
149        config,
150    };
151    parser.parse_blocks()
152}
153
154fn with_chart_runtime_if_needed(mut rendered: String, flavor: MarkdownFlavor) -> String {
155    if flavor == MarkdownFlavor::Gfm
156        && rendered.contains("<pre class=\"mermaid\">")
157        && !rendered.contains("mermaid.initialize(")
158    {
159        rendered.push('\n');
160        rendered.push_str(MERMAID_BOOTSTRAP);
161    }
162    rendered
163}
164
165fn reject_script_tag(input: &str) -> Result<(), Box<dyn Error>> {
166    if contains_script_tag(input) {
167        return Err(Box::new(MarkdownSecurityError));
168    }
169    Ok(())
170}
171
172fn contains_script_tag(input: &str) -> bool {
173    let lowered = input.to_ascii_lowercase();
174    let bytes = lowered.as_bytes();
175    let mut i = 0usize;
176
177    while i < bytes.len() {
178        if bytes[i] != b'<' {
179            i += 1;
180            continue;
181        }
182        let mut j = i + 1;
183        while j < bytes.len() && bytes[j].is_ascii_whitespace() {
184            j += 1;
185        }
186        if j < bytes.len() && bytes[j] == b'/' {
187            j += 1;
188            while j < bytes.len() && bytes[j].is_ascii_whitespace() {
189                j += 1;
190            }
191        }
192        if j + 6 > bytes.len() {
193            i += 1;
194            continue;
195        }
196        if &lowered[j..j + 6] == "script" {
197            let next = bytes.get(j + 6).copied().unwrap_or(b'>');
198            if next.is_ascii_whitespace() || next == b'>' || next == b'/' {
199                return true;
200            }
201        }
202        i += 1;
203    }
204    false
205}
206
207impl<'a> Parser<'a> {
208    fn parse_blocks(&mut self) -> String {
209        let mut pos = 0usize;
210        let mut out = String::new();
211
212        while pos < self.lines.len() {
213            if self.is_skipped(pos) || self.lines[pos].trim().is_empty() {
214                pos += 1;
215                continue;
216            }
217
218            if let Some((level, text, next)) = parse_setext_heading(&self.lines, pos) {
219                let heading_text = text.trim().to_string();
220                out.push_str(&format!(
221                    "<h{level}>{}</h{level}>\n",
222                    self.parse_inlines(&heading_text)
223                ));
224                pos = next;
225                continue;
226            }
227
228            if is_thematic_break(self.lines[pos]) {
229                out.push_str("<hr />\n");
230                pos += 1;
231                continue;
232            }
233
234            if let Some((level, text)) = parse_atx_heading(self.lines[pos]) {
235                out.push_str(&format!(
236                    "<h{level}>{}</h{level}>\n",
237                    self.parse_inlines(text.trim())
238                ));
239                pos += 1;
240                continue;
241            }
242
243            if is_fence_start(self.lines[pos]) {
244                let (html, next) = self.parse_fenced_code(pos);
245                out.push_str(&html);
246                pos = next;
247                continue;
248            }
249
250            if is_indented_code_line(self.lines[pos]) {
251                let (html, next) = self.parse_indented_code(pos);
252                out.push_str(&html);
253                pos = next;
254                continue;
255            }
256
257            if is_blockquote_line(self.lines[pos]) {
258                let (html, next) = self.parse_blockquote(pos);
259                out.push_str(&html);
260                pos = next;
261                continue;
262            }
263
264            if is_html_line(self.lines[pos]) {
265                let (html, next) = self.parse_html_block(pos);
266                out.push_str(&html);
267                pos = next;
268                continue;
269            }
270
271            if self.config.enable_tables && is_table_header(&self.lines, pos) {
272                let (html, next) = self.parse_table(pos);
273                out.push_str(&html);
274                pos = next;
275                continue;
276            }
277
278            if parse_list_prefix(self.lines[pos]).is_some() {
279                let (html, next) = self.parse_list(pos);
280                out.push_str(&html);
281                pos = next;
282                continue;
283            }
284
285            let (html, next) = self.parse_paragraph(pos);
286            out.push_str(&html);
287            pos = next;
288        }
289
290        if self.config.enable_footnotes && !self.footnote_order.is_empty() {
291            out.push_str(&self.render_footnotes());
292        }
293
294        out
295    }
296
297    fn parse_subdocument(&mut self, markdown: &str) -> String {
298        let normalized = normalize_newlines(markdown);
299        let lines: Vec<&str> = normalized.lines().collect();
300        let mut nested = Parser {
301            lines,
302            defs: self.defs.clone(),
303            footnote_order: Vec::new(),
304            config: self.config,
305        };
306        let html = nested.parse_blocks();
307        for id in nested.footnote_order {
308            self.note_footnote(id);
309        }
310        html
311    }
312
313    fn parse_fenced_code(&self, start: usize) -> (String, usize) {
314        let first = self.lines[start].trim_start();
315        let fence_char = first.chars().next().unwrap_or('`');
316        let fence_len = first.chars().take_while(|c| *c == fence_char).count();
317        let info = first[fence_len..].trim();
318        let mut pos = start + 1;
319        let mut code_lines = Vec::new();
320
321        while pos < self.lines.len() {
322            let line = self.lines[pos].trim_start();
323            if is_fence_closing_line(line, fence_char, fence_len) {
324                pos += 1;
325                break;
326            }
327            code_lines.push(self.lines[pos]);
328            pos += 1;
329        }
330
331        let code_raw = code_lines.join("\n");
332        let code = html_escape(&code_raw);
333        let lang = info.split_whitespace().next().unwrap_or("");
334        let is_mermaid = self.config.enable_charts && lang.eq_ignore_ascii_case("mermaid");
335
336        let html = if is_mermaid {
337            format!("<pre class=\"mermaid\">{}</pre>\n", code)
338        } else if info.is_empty() {
339            format!("<pre><code>{}</code></pre>\n", code)
340        } else {
341            format!(
342                "<pre><code class=\"language-{}\">{}</code></pre>\n",
343                html_attr_escape(lang),
344                code
345            )
346        };
347        (html, pos)
348    }
349
350    fn parse_indented_code(&self, start: usize) -> (String, usize) {
351        let mut pos = start;
352        let mut code_lines = Vec::new();
353
354        while pos < self.lines.len() {
355            let line = self.lines[pos];
356            if line.trim().is_empty() {
357                code_lines.push("");
358                pos += 1;
359                continue;
360            }
361
362            if let Some(stripped) = strip_indented_code_prefix(line) {
363                code_lines.push(stripped);
364                pos += 1;
365            } else {
366                break;
367            }
368        }
369
370        let code = html_escape(&code_lines.join("\n"));
371        (format!("<pre><code>{}</code></pre>\n", code), pos)
372    }
373
374    fn parse_blockquote(&mut self, start: usize) -> (String, usize) {
375        let mut pos = start;
376        let mut parts = Vec::new();
377
378        while pos < self.lines.len() {
379            let line = self.lines[pos];
380            if line.trim().is_empty() {
381                parts.push(String::new());
382                pos += 1;
383                continue;
384            }
385            if !is_blockquote_line(line) {
386                break;
387            }
388            parts.push(strip_blockquote_prefix(line).to_string());
389            pos += 1;
390        }
391
392        let body = parts.join("\n");
393        let inner = self.parse_subdocument(&body);
394        (format!("<blockquote>\n{}</blockquote>\n", inner), pos)
395    }
396
397    fn parse_html_block(&self, start: usize) -> (String, usize) {
398        if !self.config.omit_raw_html {
399            let mut pos = start;
400            while pos < self.lines.len() {
401                if self.lines[pos].trim().is_empty() {
402                    break;
403                }
404                pos += 1;
405            }
406            let raw = self.lines[start..pos].join("\n");
407            return (format!("{raw}\n"), pos);
408        }
409
410        let mut pos = start;
411        while pos < self.lines.len() {
412            if self.lines[pos].trim().is_empty() {
413                break;
414            }
415            pos += 1;
416        }
417        (format!("{RAW_HTML_OMITTED_MARKER}\n"), pos)
418    }
419
420    fn parse_table(&mut self, start: usize) -> (String, usize) {
421        let headers = split_table_row(self.lines[start]);
422        let aligns = parse_table_alignments(self.lines[start + 1]);
423        let mut pos = start + 2;
424        let mut rows: Vec<Vec<String>> = Vec::new();
425
426        while pos < self.lines.len() {
427            if self.is_skipped(pos) || self.lines[pos].trim().is_empty() {
428                break;
429            }
430            if !self.lines[pos].contains('|') {
431                break;
432            }
433            rows.push(split_table_row(self.lines[pos]));
434            pos += 1;
435        }
436
437        let mut out = String::new();
438        out.push_str("<table>\n<thead>\n<tr>");
439        for (idx, cell) in headers.into_iter().enumerate() {
440            push_table_cell_open(&mut out, "th", aligns.get(idx).copied().flatten());
441            out.push_str(&self.parse_inlines(cell.trim()));
442            out.push_str("</th>");
443        }
444        out.push_str("</tr>\n</thead>\n<tbody>\n");
445
446        for row in rows {
447            out.push_str("<tr>");
448            for (idx, cell) in row.into_iter().enumerate() {
449                push_table_cell_open(&mut out, "td", aligns.get(idx).copied().flatten());
450                out.push_str(&self.parse_inlines(cell.trim()));
451                out.push_str("</td>");
452            }
453            out.push_str("</tr>\n");
454        }
455
456        out.push_str("</tbody>\n</table>\n");
457        (out, pos)
458    }
459
460    fn parse_list(&mut self, start: usize) -> (String, usize) {
461        let (first_kind, _, base_indent) = parse_list_prefix_with_indent(self.lines[start])
462            .unwrap_or((ListKind::Unordered, "", 0));
463        let mut pos = start;
464        let mut out = String::new();
465
466        match first_kind {
467            ListKind::Unordered => out.push_str("<ul>\n"),
468            ListKind::Ordered(start_num) => {
469                if start_num != 1 {
470                    out.push_str(&format!("<ol start=\"{start_num}\">\n"));
471                } else {
472                    out.push_str("<ol>\n");
473                }
474            }
475        }
476
477        while pos < self.lines.len() {
478            if self.is_skipped(pos) {
479                break;
480            }
481
482            let Some((kind, item_line, indent)) = parse_list_prefix_with_indent(self.lines[pos])
483            else {
484                break;
485            };
486            if indent != base_indent || !same_kind_value(kind, first_kind) {
487                break;
488            }
489
490            let mut item_parts = vec![item_line.to_string()];
491            pos += 1;
492            let mut loose = false;
493
494            while pos < self.lines.len() {
495                if self.is_skipped(pos) {
496                    break;
497                }
498
499                let line = self.lines[pos];
500                if line.trim().is_empty() {
501                    loose = true;
502                    item_parts.push(String::new());
503                    pos += 1;
504                    continue;
505                }
506
507                if let Some((next_kind, _, next_indent)) = parse_list_prefix_with_indent(line) {
508                    if next_indent == base_indent && same_kind_value(next_kind, first_kind) {
509                        break;
510                    }
511                    if next_indent <= base_indent && !same_kind_value(next_kind, first_kind) {
512                        break;
513                    }
514                }
515
516                if leading_indent(line) <= base_indent
517                    && is_block_start(&self.lines, pos, self.config)
518                {
519                    break;
520                }
521
522                item_parts.push(dedent_list_continuation(line, base_indent).to_string());
523                pos += 1;
524            }
525
526            out.push_str("<li>");
527
528            let mut checkbox: Option<bool> = None;
529            if self.config.enable_task_list && matches!(first_kind, ListKind::Unordered) {
530                if let Some((checked, rest)) = parse_task_item(&item_parts[0]) {
531                    checkbox = Some(checked);
532                    item_parts[0] = rest.to_string();
533                }
534            }
535
536            if let Some(checked) = checkbox {
537                if checked {
538                    out.push_str("<input type=\"checkbox\" checked=\"\" disabled=\"\" /> ");
539                } else {
540                    out.push_str("<input type=\"checkbox\" disabled=\"\" /> ");
541                }
542            }
543
544            let item_markdown = item_parts.join("\n");
545            let rendered = self.parse_subdocument(&item_markdown);
546            if !loose {
547                if let Some(stripped) = strip_single_paragraph_wrapper(&rendered) {
548                    out.push_str(stripped);
549                } else {
550                    out.push_str(&rendered);
551                }
552            } else {
553                out.push_str(&rendered);
554            }
555            out.push_str("</li>\n");
556        }
557
558        match first_kind {
559            ListKind::Unordered => out.push_str("</ul>\n"),
560            ListKind::Ordered(_) => out.push_str("</ol>\n"),
561        }
562
563        (out, pos)
564    }
565
566    fn parse_paragraph(&mut self, start: usize) -> (String, usize) {
567        let mut pos = start;
568        let mut parts = Vec::new();
569
570        while pos < self.lines.len() {
571            if self.is_skipped(pos) || self.lines[pos].trim().is_empty() {
572                break;
573            }
574            if pos != start && is_block_start(&self.lines, pos, self.config) {
575                break;
576            }
577            parts.push(self.lines[pos]);
578            pos += 1;
579        }
580
581        let text = parts.join("\n");
582        (format!("<p>{}</p>\n", self.parse_inlines(&text)), pos)
583    }
584
585    fn parse_inlines(&mut self, text: &str) -> String {
586        let mut out = String::new();
587        let mut i = 0usize;
588
589        while i < text.len() {
590            let rest = &text[i..];
591
592            if rest.starts_with("\\\n") {
593                out.push_str("<br />\n");
594                i += 2;
595                continue;
596            }
597
598            if rest.starts_with('\n') {
599                match detect_hard_break(text, i) {
600                    HardBreak::Spaces => {
601                        trim_trailing_spaces(&mut out);
602                        out.push_str("<br />\n");
603                    }
604                    HardBreak::Backslash => {
605                        if out.ends_with('\\') {
606                            out.pop();
607                        }
608                        out.push_str("<br />\n");
609                    }
610                    HardBreak::None => out.push('\n'),
611                }
612                i += 1;
613                continue;
614            }
615
616            if let Some((ch, consumed)) = parse_escaped_char(rest) {
617                push_escaped_char(&mut out, ch);
618                i += consumed;
619                continue;
620            }
621
622            if rest.starts_with('`') {
623                if let Some((content, consumed)) = parse_code_span(rest) {
624                    out.push_str("<code>");
625                    out.push_str(&html_escape(content));
626                    out.push_str("</code>");
627                    i += consumed;
628                    continue;
629                }
630            }
631
632            if self.config.enable_footnotes && rest.starts_with("[^") {
633                if let Some(end) = rest.find(']') {
634                    let raw_id = &rest[2..end];
635                    let key = normalize_key(raw_id);
636                    if self.defs.footnotes.contains_key(&key) {
637                        let index = self.note_footnote(key.clone());
638                        let safe = footnote_id(&key);
639                        out.push_str(&format!(
640                            "<sup class=\"footnote-ref\"><a href=\"#fn-{safe}\" id=\"fnref-{safe}\">{index}</a></sup>"
641                        ));
642                        i += end + 1;
643                        continue;
644                    }
645                }
646            }
647
648            if rest.starts_with("![") {
649                if let Some((html, consumed)) = self.parse_image(rest) {
650                    out.push_str(&html);
651                    i += consumed;
652                    continue;
653                }
654            }
655
656            if rest.starts_with('[') {
657                if let Some((html, consumed)) = self.parse_link_like(rest) {
658                    out.push_str(&html);
659                    i += consumed;
660                    continue;
661                }
662            }
663
664            if let Some((html, consumed)) = parse_angle_autolink(rest) {
665                out.push_str(&html);
666                i += consumed;
667                continue;
668            }
669
670            if let Some((raw, consumed)) = parse_inline_html(rest) {
671                if !self.config.omit_raw_html {
672                    out.push_str(raw);
673                } else {
674                    out.push_str(RAW_HTML_OMITTED_MARKER);
675                }
676                i += consumed;
677                continue;
678            }
679
680            if self.config.enable_autolink_literals {
681                if let Some((href, text_value, consumed)) = parse_autolink_literal(rest) {
682                    let href_escaped = html_escape(&href);
683                    let text_escaped = html_escape(&text_value);
684                    out.push_str(&format!("<a href=\"{href_escaped}\">{text_escaped}</a>"));
685                    i += consumed;
686                    continue;
687                }
688            }
689
690            if let Some((content, consumed)) = wrapped(rest, "**") {
691                out.push_str("<strong>");
692                out.push_str(&self.parse_inlines(content));
693                out.push_str("</strong>");
694                i += consumed;
695                continue;
696            }
697
698            if let Some((content, consumed)) = wrapped(rest, "__") {
699                out.push_str("<strong>");
700                out.push_str(&self.parse_inlines(content));
701                out.push_str("</strong>");
702                i += consumed;
703                continue;
704            }
705
706            if self.config.enable_strikethrough {
707                if let Some((content, consumed)) = wrapped(rest, "~~") {
708                    out.push_str("<del>");
709                    out.push_str(&self.parse_inlines(content));
710                    out.push_str("</del>");
711                    i += consumed;
712                    continue;
713                }
714            }
715
716            if let Some((content, consumed)) = wrapped(rest, "*") {
717                out.push_str("<em>");
718                out.push_str(&self.parse_inlines(content));
719                out.push_str("</em>");
720                i += consumed;
721                continue;
722            }
723
724            if let Some((content, consumed)) = wrapped(rest, "_") {
725                out.push_str("<em>");
726                out.push_str(&self.parse_inlines(content));
727                out.push_str("</em>");
728                i += consumed;
729                continue;
730            }
731
732            if let Some(ch) = rest.chars().next() {
733                push_escaped_char(&mut out, ch);
734                i += ch.len_utf8();
735            } else {
736                break;
737            }
738        }
739
740        out
741    }
742
743    fn parse_image(&mut self, rest: &str) -> Option<(String, usize)> {
744        let (alt, consumed_label) = parse_bracketed_label(&rest[1..])?;
745        let after = &rest[1 + consumed_label..];
746
747        let (url, consumed_after) = parse_inline_link_target(after)?;
748        let html = format!(
749            "<img src=\"{}\" alt=\"{}\" />",
750            html_attr_escape(&url),
751            html_attr_escape(alt)
752        );
753        Some((html, 1 + consumed_label + consumed_after))
754    }
755
756    fn parse_link_like(&mut self, rest: &str) -> Option<(String, usize)> {
757        let (label, consumed_label) = parse_bracketed_label(rest)?;
758        let after = &rest[consumed_label..];
759
760        if let Some((url, consumed_after)) = parse_inline_link_target(after) {
761            let html = format!(
762                "<a href=\"{}\">{}</a>",
763                html_attr_escape(&url),
764                self.parse_inlines(label)
765            );
766            return Some((html, consumed_label + consumed_after));
767        }
768
769        if after.starts_with('[') {
770            let (raw_ref, consumed_ref) = parse_bracketed_label(after)?;
771            let key = if raw_ref.trim().is_empty() {
772                normalize_key(label)
773            } else {
774                normalize_key(raw_ref)
775            };
776            if let Some(url) = self.defs.links.get(&key) {
777                let html = format!(
778                    "<a href=\"{}\">{}</a>",
779                    html_attr_escape(url),
780                    self.parse_inlines(label)
781                );
782                return Some((html, consumed_label + consumed_ref));
783            }
784        }
785
786        let key = normalize_key(label);
787        if let Some(url) = self.defs.links.get(&key) {
788            let html = format!(
789                "<a href=\"{}\">{}</a>",
790                html_attr_escape(url),
791                self.parse_inlines(label)
792            );
793            return Some((html, consumed_label));
794        }
795
796        None
797    }
798
799    fn note_footnote(&mut self, id: String) -> usize {
800        if let Some(idx) = self.footnote_order.iter().position(|x| x == &id) {
801            idx + 1
802        } else {
803            self.footnote_order.push(id);
804            self.footnote_order.len()
805        }
806    }
807
808    fn render_footnotes(&mut self) -> String {
809        let mut out = String::new();
810        out.push_str("<section class=\"footnotes\">\n<ol>\n");
811
812        let footnote_ids = self.footnote_order.clone();
813        for id in footnote_ids {
814            let safe = footnote_id(&id);
815            let text = self.defs.footnotes.get(&id).cloned().unwrap_or_default();
816            out.push_str(&format!(
817                "<li id=\"fn-{safe}\">{} <a href=\"#fnref-{safe}\" class=\"footnote-backref\">↩</a></li>\n",
818                self.parse_inlines(text.trim())
819            ));
820        }
821
822        out.push_str("</ol>\n</section>\n");
823        out
824    }
825
826    fn is_skipped(&self, line: usize) -> bool {
827        self.defs.skip_lines.contains(&line)
828    }
829}
830
831#[derive(Debug, Clone, Copy, PartialEq, Eq)]
832enum ListKind {
833    Unordered,
834    Ordered(usize),
835}
836
837fn normalize_newlines(input: &str) -> String {
838    input.replace("\r\n", "\n").replace('\r', "\n")
839}
840
841fn collect_definitions(lines: &[&str], config: ParserConfig) -> DefinitionStore {
842    let mut defs = DefinitionStore::default();
843    let mut i = 0usize;
844
845    while i < lines.len() {
846        let line = lines[i].trim();
847
848        if let Some((id, url)) = parse_link_definition(line) {
849            defs.links.insert(normalize_key(id), url.to_string());
850            defs.skip_lines.insert(i);
851            i += 1;
852            continue;
853        }
854
855        if config.enable_footnotes {
856            if let Some((id, first_text)) = parse_footnote_definition(line) {
857                let mut text_parts = vec![first_text.to_string()];
858                defs.skip_lines.insert(i);
859                i += 1;
860
861                while i < lines.len() {
862                    let next = lines[i];
863                    if next.starts_with("    ") || next.starts_with('\t') {
864                        text_parts.push(next.trim().to_string());
865                        defs.skip_lines.insert(i);
866                        i += 1;
867                    } else {
868                        break;
869                    }
870                }
871
872                defs.footnotes
873                    .insert(normalize_key(id), text_parts.join(" "));
874                continue;
875            }
876        }
877
878        i += 1;
879    }
880
881    defs
882}
883
884fn parse_atx_heading(line: &str) -> Option<(usize, &str)> {
885    let trimmed = line.trim_start();
886    let mut count = 0usize;
887    for ch in trimmed.chars() {
888        if ch == '#' {
889            count += 1;
890        } else {
891            break;
892        }
893    }
894    if count == 0 || count > 6 {
895        return None;
896    }
897    let rest = trimmed[count..].trim_start();
898    if rest.is_empty() {
899        return None;
900    }
901    Some((count, rest.trim_end_matches('#').trim_end()))
902}
903
904fn parse_setext_heading<'a>(lines: &'a [&str], pos: usize) -> Option<(usize, &'a str, usize)> {
905    if pos + 1 >= lines.len() {
906        return None;
907    }
908    if lines[pos].trim().is_empty() {
909        return None;
910    }
911    if !can_be_setext_content_line(lines[pos]) {
912        return None;
913    }
914
915    let underline = lines[pos + 1].trim();
916    if is_setext_underline(underline, '=') {
917        return Some((1, lines[pos], pos + 2));
918    }
919    if is_setext_underline(underline, '-') {
920        return Some((2, lines[pos], pos + 2));
921    }
922    None
923}
924
925fn can_be_setext_content_line(line: &str) -> bool {
926    !line.trim().is_empty()
927        && !is_thematic_break(line)
928        && parse_atx_heading(line).is_none()
929        && !is_fence_start(line)
930        && !is_indented_code_line(line)
931        && !is_blockquote_line(line)
932        && !is_html_line(line)
933        && parse_list_prefix(line).is_none()
934}
935
936fn is_setext_underline(line: &str, marker: char) -> bool {
937    let trimmed = line.trim();
938    !trimmed.is_empty() && trimmed.chars().all(|ch| ch == marker) && trimmed.len() >= 3
939}
940
941fn is_thematic_break(line: &str) -> bool {
942    let trimmed = line.trim();
943    if trimmed.len() < 3 {
944        return false;
945    }
946    let candidate: String = trimmed.chars().filter(|c| !c.is_whitespace()).collect();
947    if candidate.len() < 3 {
948        return false;
949    }
950    candidate.chars().all(|ch| ch == '-')
951        || candidate.chars().all(|ch| ch == '*')
952        || candidate.chars().all(|ch| ch == '_')
953}
954
955fn is_fence_start(line: &str) -> bool {
956    let trimmed = line.trim_start();
957    trimmed.starts_with("```") || trimmed.starts_with("~~~")
958}
959
960fn is_indented_code_line(line: &str) -> bool {
961    strip_indented_code_prefix(line).is_some()
962}
963
964fn strip_indented_code_prefix(line: &str) -> Option<&str> {
965    if let Some(stripped) = line.strip_prefix("    ") {
966        return Some(stripped);
967    }
968    line.strip_prefix('\t')
969}
970
971fn is_blockquote_line(line: &str) -> bool {
972    line.trim_start().starts_with('>')
973}
974
975fn strip_blockquote_prefix(line: &str) -> &str {
976    let trimmed = line.trim_start();
977    let tail = trimmed.strip_prefix('>').unwrap_or(trimmed);
978    tail.strip_prefix(' ').unwrap_or(tail)
979}
980
981fn is_html_line(line: &str) -> bool {
982    line.trim_start().starts_with('<')
983}
984
985fn is_table_header(lines: &[&str], pos: usize) -> bool {
986    if pos + 1 >= lines.len() {
987        return false;
988    }
989    if !lines[pos].contains('|') {
990        return false;
991    }
992    is_table_separator(lines[pos + 1])
993}
994
995fn is_table_separator(line: &str) -> bool {
996    let trimmed = line.trim();
997    if !trimmed.contains('-') {
998        return false;
999    }
1000    let cells = split_table_row(trimmed);
1001    if cells.is_empty() {
1002        return false;
1003    }
1004    cells.into_iter().all(|cell| {
1005        let c = cell.trim();
1006        c.len() >= 3 && c.chars().all(|ch| ch == '-' || ch == ':')
1007    })
1008}
1009
1010fn split_table_row(line: &str) -> Vec<String> {
1011    line.trim()
1012        .trim_matches('|')
1013        .split('|')
1014        .map(|s| s.trim().to_string())
1015        .collect()
1016}
1017
1018fn parse_list_prefix(line: &str) -> Option<(ListKind, &str)> {
1019    parse_list_prefix_with_indent(line).map(|(kind, rest, _)| (kind, rest))
1020}
1021
1022fn parse_list_prefix_with_indent(line: &str) -> Option<(ListKind, &str, usize)> {
1023    let indent = leading_indent(line);
1024    let trimmed = line.trim_start_matches([' ', '\t']);
1025    if trimmed.len() < 2 {
1026        return None;
1027    }
1028
1029    if (trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ "))
1030        && trimmed.len() > 2
1031    {
1032        return Some((ListKind::Unordered, &trimmed[2..], indent));
1033    }
1034
1035    let mut digits_end = 0usize;
1036    for (idx, ch) in trimmed.char_indices() {
1037        if ch.is_ascii_digit() {
1038            digits_end = idx + ch.len_utf8();
1039        } else {
1040            break;
1041        }
1042    }
1043
1044    if digits_end == 0 || digits_end + 2 > trimmed.len() {
1045        return None;
1046    }
1047
1048    let marker = trimmed.as_bytes()[digits_end] as char;
1049    if marker != '.' && marker != ')' {
1050        return None;
1051    }
1052    if trimmed.as_bytes()[digits_end + 1] != b' ' {
1053        return None;
1054    }
1055
1056    let start = trimmed[..digits_end].parse::<usize>().ok()?;
1057    Some((ListKind::Ordered(start), &trimmed[digits_end + 2..], indent))
1058}
1059
1060fn same_kind_value(current: ListKind, expected: ListKind) -> bool {
1061    matches!(
1062        (current, expected),
1063        (ListKind::Unordered, ListKind::Unordered) | (ListKind::Ordered(_), ListKind::Ordered(_))
1064    )
1065}
1066
1067fn leading_indent(line: &str) -> usize {
1068    let mut count = 0usize;
1069    for ch in line.chars() {
1070        match ch {
1071            ' ' => count += 1,
1072            '\t' => count += 4,
1073            _ => break,
1074        }
1075    }
1076    count
1077}
1078
1079fn dedent_list_continuation(line: &str, base_indent: usize) -> &str {
1080    if leading_indent(line) <= base_indent {
1081        return line.trim_start();
1082    }
1083    let mut removed_cols = 0usize;
1084    let mut byte_idx = 0usize;
1085    for (idx, ch) in line.char_indices() {
1086        match ch {
1087            ' ' => {
1088                removed_cols += 1;
1089                byte_idx = idx + 1;
1090            }
1091            '\t' => {
1092                removed_cols += 4;
1093                byte_idx = idx + 1;
1094            }
1095            _ => break,
1096        }
1097        if removed_cols >= base_indent + 2 {
1098            break;
1099        }
1100    }
1101    &line[byte_idx..]
1102}
1103
1104fn strip_single_paragraph_wrapper(html: &str) -> Option<&str> {
1105    if !html.starts_with("<p>") || !html.ends_with("</p>\n") {
1106        return None;
1107    }
1108    if html[3..html.len() - 5].contains("\n<p>") {
1109        return None;
1110    }
1111    Some(&html[3..html.len() - 5])
1112}
1113
1114fn is_fence_closing_line(line: &str, marker: char, min_len: usize) -> bool {
1115    let trimmed = line.trim_end();
1116    let count = trimmed.chars().take_while(|c| *c == marker).count();
1117    if count < min_len {
1118        return false;
1119    }
1120    trimmed[count..].trim().is_empty()
1121}
1122
1123fn parse_table_alignments(separator_line: &str) -> Vec<Option<&'static str>> {
1124    split_table_row(separator_line)
1125        .into_iter()
1126        .map(|cell| {
1127            let c = cell.trim();
1128            let starts = c.starts_with(':');
1129            let ends = c.ends_with(':');
1130            match (starts, ends) {
1131                (true, true) => Some("center"),
1132                (true, false) => Some("left"),
1133                (false, true) => Some("right"),
1134                (false, false) => None,
1135            }
1136        })
1137        .collect()
1138}
1139
1140fn push_table_cell_open(out: &mut String, tag: &str, align: Option<&str>) {
1141    if let Some(al) = align {
1142        out.push_str(&format!("<{tag} align=\"{al}\">"));
1143    } else {
1144        out.push_str(&format!("<{tag}>"));
1145    }
1146}
1147
1148fn is_block_start(lines: &[&str], pos: usize, config: ParserConfig) -> bool {
1149    parse_setext_heading(lines, pos).is_some()
1150        || is_thematic_break(lines[pos])
1151        || parse_atx_heading(lines[pos]).is_some()
1152        || is_fence_start(lines[pos])
1153        || is_indented_code_line(lines[pos])
1154        || is_blockquote_line(lines[pos])
1155        || is_html_line(lines[pos])
1156        || parse_list_prefix(lines[pos]).is_some()
1157        || (config.enable_tables && is_table_header(lines, pos))
1158}
1159
1160fn parse_task_item(item: &str) -> Option<(bool, &str)> {
1161    let trimmed = item.trim_start();
1162    if trimmed.len() < 4 || !trimmed.starts_with('[') {
1163        return None;
1164    }
1165    let close = trimmed.find(']')?;
1166    let marker = &trimmed[1..close];
1167    let checked = match marker.to_ascii_lowercase().as_str() {
1168        "x" => true,
1169        " " => false,
1170        _ => return None,
1171    };
1172    let rest = trimmed[close + 1..].trim_start();
1173    Some((checked, rest))
1174}
1175
1176fn parse_link_definition(line: &str) -> Option<(&str, &str)> {
1177    if !line.starts_with('[') || line.starts_with("[^") {
1178        return None;
1179    }
1180    let close = line.find("]:")?;
1181    let id = line[1..close].trim();
1182    let url = line[close + 2..].trim();
1183    if id.is_empty() || url.is_empty() {
1184        return None;
1185    }
1186    Some((id, url))
1187}
1188
1189fn parse_footnote_definition(line: &str) -> Option<(&str, &str)> {
1190    if !line.starts_with("[^") {
1191        return None;
1192    }
1193    let close = line.find("]:")?;
1194    let id = line[2..close].trim();
1195    let text = line[close + 2..].trim();
1196    if id.is_empty() {
1197        return None;
1198    }
1199    Some((id, text))
1200}
1201
1202#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1203enum HardBreak {
1204    None,
1205    Spaces,
1206    Backslash,
1207}
1208
1209fn detect_hard_break(text: &str, newline_idx: usize) -> HardBreak {
1210    if newline_idx == 0 {
1211        return HardBreak::None;
1212    }
1213
1214    let bytes = text.as_bytes();
1215    let mut idx = newline_idx;
1216    let mut spaces = 0usize;
1217    while idx > 0 && bytes[idx - 1] == b' ' {
1218        spaces += 1;
1219        idx -= 1;
1220    }
1221
1222    if spaces >= 2 {
1223        return HardBreak::Spaces;
1224    }
1225    if idx > 0 && bytes[idx - 1] == b'\\' {
1226        return HardBreak::Backslash;
1227    }
1228    HardBreak::None
1229}
1230
1231fn trim_trailing_spaces(out: &mut String) {
1232    while out.ends_with(' ') {
1233        out.pop();
1234    }
1235}
1236
1237fn parse_inline_link_target(after: &str) -> Option<(String, usize)> {
1238    if !after.starts_with('(') {
1239        return None;
1240    }
1241    let bytes = after.as_bytes();
1242    let mut i = 1usize;
1243
1244    while i < bytes.len() && bytes[i].is_ascii_whitespace() {
1245        i += 1;
1246    }
1247    if i >= bytes.len() {
1248        return None;
1249    }
1250
1251    let url_start = i;
1252    let url: String;
1253
1254    if bytes[i] == b'<' {
1255        i += 1;
1256        let start = i;
1257        while i < bytes.len() && bytes[i] != b'>' {
1258            if bytes[i] == b'\n' {
1259                return None;
1260            }
1261            i += 1;
1262        }
1263        if i >= bytes.len() {
1264            return None;
1265        }
1266        url = after[start..i].to_string();
1267        i += 1;
1268    } else {
1269        let mut depth = 0usize;
1270        while i < bytes.len() {
1271            let ch = bytes[i] as char;
1272            if ch == '\\' && i + 1 < bytes.len() {
1273                i += 2;
1274                continue;
1275            }
1276            if ch == '(' {
1277                depth += 1;
1278                i += 1;
1279                continue;
1280            }
1281            if ch == ')' {
1282                if depth == 0 {
1283                    break;
1284                }
1285                depth -= 1;
1286                i += 1;
1287                continue;
1288            }
1289            if ch.is_ascii_whitespace() && depth == 0 {
1290                break;
1291            }
1292            i += 1;
1293        }
1294        if i <= url_start {
1295            return None;
1296        }
1297        url = after[url_start..i].to_string();
1298    }
1299
1300    while i < bytes.len() && bytes[i].is_ascii_whitespace() {
1301        i += 1;
1302    }
1303
1304    if i < bytes.len() && (bytes[i] == b'"' || bytes[i] == b'\'' || bytes[i] == b'(') {
1305        let quote = bytes[i];
1306        let closing = if quote == b'(' { b')' } else { quote };
1307        i += 1;
1308        while i < bytes.len() && bytes[i] != closing {
1309            if bytes[i] == b'\\' && i + 1 < bytes.len() {
1310                i += 2;
1311            } else {
1312                i += 1;
1313            }
1314        }
1315        if i >= bytes.len() {
1316            return None;
1317        }
1318        i += 1;
1319        while i < bytes.len() && bytes[i].is_ascii_whitespace() {
1320            i += 1;
1321        }
1322    }
1323
1324    if i >= bytes.len() || bytes[i] != b')' {
1325        return None;
1326    }
1327
1328    Some((url, i + 1))
1329}
1330
1331fn parse_autolink_literal(text: &str) -> Option<(String, String, usize)> {
1332    if text.starts_with("https://") || text.starts_with("http://") {
1333        let link = parse_url_like_token(text)?;
1334        return Some((link.to_string(), link.to_string(), link.len()));
1335    }
1336    if text.starts_with("www.") {
1337        let link = parse_url_like_token(text)?;
1338        return Some((format!("http://{link}"), link.to_string(), link.len()));
1339    }
1340    if let Some((email, consumed)) = parse_email_literal(text) {
1341        return Some((format!("mailto:{email}"), email, consumed));
1342    }
1343    None
1344}
1345
1346fn parse_url_like_token(text: &str) -> Option<&str> {
1347    let mut end = 0usize;
1348    for (idx, ch) in text.char_indices() {
1349        if ch.is_whitespace() || ch == '<' {
1350            break;
1351        }
1352        end = idx + ch.len_utf8();
1353    }
1354    if end == 0 {
1355        return None;
1356    }
1357
1358    let mut link_end = end;
1359    while link_end > 0 {
1360        let ch = text[..link_end].chars().next_back().unwrap_or('\0');
1361        if matches!(ch, '.' | ',' | ';' | ':' | '!' | '?') {
1362            link_end -= ch.len_utf8();
1363        } else {
1364            break;
1365        }
1366    }
1367    if link_end == 0 {
1368        return None;
1369    }
1370    Some(&text[..link_end])
1371}
1372
1373fn parse_email_literal(text: &str) -> Option<(String, usize)> {
1374    let mut end = 0usize;
1375    let mut at_pos: Option<usize> = None;
1376
1377    for (idx, ch) in text.char_indices() {
1378        if ch.is_whitespace() || ch == '<' {
1379            break;
1380        }
1381        if ch == '@' {
1382            at_pos = Some(idx);
1383        }
1384        end = idx + ch.len_utf8();
1385    }
1386
1387    if end == 0 {
1388        return None;
1389    }
1390    let mut candidate_end = end;
1391    while candidate_end > 0 {
1392        let ch = text[..candidate_end].chars().next_back().unwrap_or('\0');
1393        if matches!(ch, '.' | ',' | ';' | ':' | '!' | '?') {
1394            candidate_end -= ch.len_utf8();
1395        } else {
1396            break;
1397        }
1398    }
1399    if candidate_end == 0 {
1400        return None;
1401    }
1402
1403    let candidate = &text[..candidate_end];
1404    let at = at_pos?;
1405    if at == 0 || at >= candidate.len() - 1 {
1406        return None;
1407    }
1408
1409    let local = &candidate[..at];
1410    let domain = &candidate[at + 1..];
1411    if !is_email_local(local) || !is_email_domain(domain) {
1412        return None;
1413    }
1414    Some((candidate.to_string(), candidate_end))
1415}
1416
1417fn is_email_local(local: &str) -> bool {
1418    !local.is_empty()
1419        && local.chars().all(|ch| {
1420            ch.is_ascii_alphanumeric()
1421                || matches!(
1422                    ch,
1423                    '!' | '#'
1424                        | '$'
1425                        | '%'
1426                        | '&'
1427                        | '\''
1428                        | '*'
1429                        | '+'
1430                        | '-'
1431                        | '/'
1432                        | '='
1433                        | '?'
1434                        | '^'
1435                        | '_'
1436                        | '`'
1437                        | '{'
1438                        | '|'
1439                        | '}'
1440                        | '~'
1441                        | '.'
1442                )
1443        })
1444}
1445
1446fn is_email_domain(domain: &str) -> bool {
1447    if domain.is_empty() || !domain.contains('.') {
1448        return false;
1449    }
1450    for label in domain.split('.') {
1451        if label.is_empty() || label.starts_with('-') || label.ends_with('-') {
1452            return false;
1453        }
1454        if !label
1455            .chars()
1456            .all(|ch| ch.is_ascii_alphanumeric() || ch == '-')
1457        {
1458            return false;
1459        }
1460    }
1461    true
1462}
1463
1464fn parse_angle_autolink(text: &str) -> Option<(String, usize)> {
1465    if !text.starts_with('<') {
1466        return None;
1467    }
1468    let end = text.find('>')?;
1469    let inner = &text[1..end];
1470    if inner.starts_with("http://") || inner.starts_with("https://") {
1471        let esc = html_escape(inner);
1472        return Some((format!("<a href=\"{esc}\">{esc}</a>"), end + 1));
1473    }
1474    if inner.contains('@') && !inner.contains(' ') {
1475        let esc = html_escape(inner);
1476        return Some((format!("<a href=\"mailto:{esc}\">{esc}</a>"), end + 1));
1477    }
1478    None
1479}
1480
1481fn parse_inline_html(text: &str) -> Option<(&str, usize)> {
1482    if !text.starts_with('<') {
1483        return None;
1484    }
1485
1486    if text.starts_with("<!--") {
1487        let end = text.find("-->")?;
1488        return Some((&text[..end + 3], end + 3));
1489    }
1490    if text.starts_with("<?") {
1491        let end = text.find("?>")?;
1492        return Some((&text[..end + 2], end + 2));
1493    }
1494    if text.starts_with("<!") {
1495        let end = text.find('>')?;
1496        return Some((&text[..end + 1], end + 1));
1497    }
1498
1499    let bytes = text.as_bytes();
1500    if bytes.len() < 3 {
1501        return None;
1502    }
1503
1504    let mut i = 1usize;
1505    if bytes[i] == b'/' {
1506        i += 1;
1507    }
1508
1509    let mut saw_alpha = false;
1510    while i < bytes.len() {
1511        let ch = bytes[i] as char;
1512        if ch.is_ascii_alphanumeric() || ch == '-' {
1513            saw_alpha = true;
1514            i += 1;
1515            continue;
1516        }
1517        break;
1518    }
1519    if !saw_alpha {
1520        return None;
1521    }
1522
1523    while i < bytes.len() {
1524        if bytes[i] == b'>' {
1525            return Some((&text[..i + 1], i + 1));
1526        }
1527        if bytes[i] == b'\n' {
1528            return None;
1529        }
1530        i += 1;
1531    }
1532    None
1533}
1534
1535fn parse_code_span(text: &str) -> Option<(&str, usize)> {
1536    let ticks = text.chars().take_while(|c| *c == '`').count();
1537    if ticks == 0 {
1538        return None;
1539    }
1540    let marker = "`".repeat(ticks);
1541    let rest = &text[ticks..];
1542    let end = rest.find(&marker)?;
1543    Some((&rest[..end], ticks + end + ticks))
1544}
1545
1546fn parse_escaped_char(text: &str) -> Option<(char, usize)> {
1547    if !text.starts_with('\\') {
1548        return None;
1549    }
1550    let mut chars = text.chars();
1551    chars.next()?;
1552    let ch = chars.next()?;
1553    Some((ch, 1 + ch.len_utf8()))
1554}
1555
1556fn parse_bracketed_label(text: &str) -> Option<(&str, usize)> {
1557    if !text.starts_with('[') {
1558        return None;
1559    }
1560
1561    let bytes = text.as_bytes();
1562    let mut i = 1usize;
1563    let mut depth = 0usize;
1564
1565    while i < bytes.len() {
1566        match bytes[i] {
1567            b'\\' => {
1568                i += 1;
1569                if i < bytes.len() {
1570                    i += 1;
1571                }
1572            }
1573            b'[' => {
1574                depth += 1;
1575                i += 1;
1576            }
1577            b']' => {
1578                if depth == 0 {
1579                    return Some((&text[1..i], i + 1));
1580                }
1581                depth -= 1;
1582                i += 1;
1583            }
1584            _ => i += 1,
1585        }
1586    }
1587
1588    None
1589}
1590
1591fn wrapped<'a>(text: &'a str, marker: &str) -> Option<(&'a str, usize)> {
1592    if !text.starts_with(marker) {
1593        return None;
1594    }
1595    if text.len() <= marker.len() * 2 {
1596        return None;
1597    }
1598    let tail = &text[marker.len()..];
1599    let end = tail.find(marker)?;
1600    if end == 0 {
1601        return None;
1602    }
1603    Some((&tail[..end], marker.len() + end + marker.len()))
1604}
1605
1606fn normalize_key(text: &str) -> String {
1607    text.trim().to_ascii_lowercase()
1608}
1609
1610fn footnote_id(key: &str) -> String {
1611    let mut out = String::with_capacity(key.len());
1612    for ch in key.chars() {
1613        if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
1614            out.push(ch);
1615        } else {
1616            out.push('-');
1617        }
1618    }
1619    out
1620}
1621
1622fn push_escaped_char(out: &mut String, ch: char) {
1623    match ch {
1624        '&' => out.push_str("&amp;"),
1625        '<' => out.push_str("&lt;"),
1626        '>' => out.push_str("&gt;"),
1627        '"' => out.push_str("&quot;"),
1628        '\'' => out.push_str("&#39;"),
1629        _ => out.push(ch),
1630    }
1631}
1632
1633fn html_escape(text: &str) -> String {
1634    let mut out = String::with_capacity(text.len());
1635    for ch in text.chars() {
1636        push_escaped_char(&mut out, ch);
1637    }
1638    out
1639}
1640
1641fn html_attr_escape(text: &str) -> String {
1642    html_escape(text)
1643}
1644
1645#[cfg(test)]
1646mod tests {
1647    use super::{parse, parse_with_flavor, safe_parse, safe_parse_with_flavor, MarkdownFlavor};
1648
1649    #[test]
1650    fn renders_table_in_gfm() {
1651        let md = "| a | b |\n|---|---|\n| 1 | 2 |";
1652        let html = parse(md);
1653        assert!(html.contains("<table>"));
1654        assert!(html.contains("<thead>"));
1655        assert!(html.contains("<tbody>"));
1656    }
1657
1658    #[test]
1659    fn does_not_render_table_in_commonmark() {
1660        let md = "| a | b |\n|---|---|\n| 1 | 2 |";
1661        let html = parse_with_flavor(md, MarkdownFlavor::CommonMark);
1662        assert!(!html.contains("<table>"));
1663    }
1664
1665    #[test]
1666    fn renders_strikethrough_only_in_gfm() {
1667        let gfm = parse_with_flavor("~~done~~", MarkdownFlavor::Gfm);
1668        let cm = parse_with_flavor("~~done~~", MarkdownFlavor::CommonMark);
1669        assert!(gfm.contains("<del>done</del>"));
1670        assert!(!cm.contains("<del>done</del>"));
1671    }
1672
1673    #[test]
1674    fn renders_task_list_only_in_gfm() {
1675        let gfm = parse_with_flavor("- [x] finish", MarkdownFlavor::Gfm);
1676        let cm = parse_with_flavor("- [x] finish", MarkdownFlavor::CommonMark);
1677        assert!(gfm.contains("type=\"checkbox\""));
1678        assert!(!cm.contains("type=\"checkbox\""));
1679    }
1680
1681    #[test]
1682    fn renders_autolink_literal_only_in_gfm() {
1683        let gfm = parse_with_flavor("visit https://example.com now", MarkdownFlavor::Gfm);
1684        let cm = parse_with_flavor("visit https://example.com now", MarkdownFlavor::CommonMark);
1685        assert!(gfm.contains("<a href=\"https://example.com\">https://example.com</a>"));
1686        assert!(!cm.contains("<a href=\"https://example.com\">https://example.com</a>"));
1687    }
1688
1689    #[test]
1690    fn renders_footnotes_only_in_gfm() {
1691        let md = "note[^1]\n\n[^1]: footnote";
1692        let gfm = parse_with_flavor(md, MarkdownFlavor::Gfm);
1693        let cm = parse_with_flavor(md, MarkdownFlavor::CommonMark);
1694        assert!(gfm.contains("footnote-ref"));
1695        assert!(gfm.contains("footnotes"));
1696        assert!(!cm.contains("footnote-ref"));
1697    }
1698
1699    #[test]
1700    fn renders_reference_links() {
1701        let md = "[Rust]\n\n[Rust]: https://www.rust-lang.org/";
1702        let html = parse(md);
1703        assert!(html.contains("<a href=\"https://www.rust-lang.org/\">Rust</a>"));
1704    }
1705
1706    #[test]
1707    fn blocks_script_in_safe_parse() {
1708        let md = "<script>alert(1)</script>";
1709        assert!(safe_parse(md).is_err());
1710    }
1711
1712    #[test]
1713    fn safe_parse_flavor_works() {
1714        let html = safe_parse_with_flavor("~~x~~", MarkdownFlavor::CommonMark).unwrap();
1715        assert!(!html.contains("<del>x</del>"));
1716    }
1717
1718    #[test]
1719    fn renders_ordered_list_with_start() {
1720        let html = parse("3. three\n4. four");
1721        assert!(html.contains("<ol start=\"3\">"));
1722        assert!(html.contains("<li>three</li>"));
1723    }
1724
1725    #[test]
1726    fn renders_nested_list() {
1727        let html = parse("- parent\n  - child\n- next");
1728        assert!(html.matches("<ul>").count() >= 2);
1729        assert!(html.contains("child"));
1730    }
1731
1732    #[test]
1733    fn parses_link_with_title_and_parentheses() {
1734        let html = parse("[x](https://example.com/a_(b) \"title\")");
1735        assert!(html.contains("href=\"https://example.com/a_(b)\""));
1736    }
1737
1738    #[test]
1739    fn renders_gfm_literal_www_and_email_autolinks() {
1740        let html = parse_with_flavor(
1741            "visit www.example.com or me@example.com",
1742            MarkdownFlavor::Gfm,
1743        );
1744        assert!(html.contains("href=\"http://www.example.com\""));
1745        assert!(html.contains("href=\"mailto:me@example.com\""));
1746    }
1747
1748    #[test]
1749    fn renders_hard_line_breaks() {
1750        let html_spaces = parse("a  \nb");
1751        let html_backslash = parse("a\\\nb");
1752        assert!(html_spaces.contains("a<br />\nb"));
1753        assert!(html_backslash.contains("a<br />\nb"));
1754    }
1755
1756    #[test]
1757    fn parse_preserves_inline_html_in_gfm_and_commonmark() {
1758        let cm = parse_with_flavor("x <span>y</span>", MarkdownFlavor::CommonMark);
1759        let gfm = parse_with_flavor("x <span>y</span>", MarkdownFlavor::Gfm);
1760        assert!(cm.contains("<span>y</span>"));
1761        assert!(gfm.contains("<span>y</span>"));
1762    }
1763
1764    #[test]
1765    fn parse_preserves_html_block_in_gfm_and_commonmark() {
1766        let cm = parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::CommonMark);
1767        let gfm = parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::Gfm);
1768        assert!(cm.contains("<div>"));
1769        assert!(cm.contains("</div>"));
1770        assert!(gfm.contains("<div>"));
1771        assert!(gfm.contains("</div>"));
1772    }
1773
1774    #[test]
1775    fn safe_parse_rejects_inline_html() {
1776        let cm = safe_parse_with_flavor("x <span>y</span>", MarkdownFlavor::CommonMark);
1777        let gfm = safe_parse_with_flavor("x <span>y</span>", MarkdownFlavor::Gfm);
1778        assert!(cm.is_err());
1779        assert!(gfm.is_err());
1780    }
1781
1782    #[test]
1783    fn safe_parse_rejects_html_block() {
1784        let cm = safe_parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::CommonMark);
1785        let gfm = safe_parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::Gfm);
1786        assert!(cm.is_err());
1787        assert!(gfm.is_err());
1788    }
1789
1790    #[test]
1791    fn supports_setext_heading_and_blockquote() {
1792        let html = parse("Title\n---\n\n> quote");
1793        assert!(html.contains("<h2>Title</h2>"));
1794        assert!(html.contains("<blockquote>"));
1795    }
1796
1797    #[test]
1798    fn supports_table_alignment_in_gfm() {
1799        let md = "| a | b | c |\n| :-- | :-: | --: |\n| 1 | 2 | 3 |";
1800        let html = parse(md);
1801        assert!(html.contains("<th align=\"left\">a</th>"));
1802        assert!(html.contains("<th align=\"center\">b</th>"));
1803        assert!(html.contains("<th align=\"right\">c</th>"));
1804    }
1805
1806    #[test]
1807    fn renders_mermaid_chart_in_gfm() {
1808        let md = "```mermaid\nflowchart TD\nA-->B\n```";
1809        let html = parse_with_flavor(md, MarkdownFlavor::Gfm);
1810        assert!(html.contains("<pre class=\"mermaid\">flowchart TD\nA--&gt;B</pre>"));
1811    }
1812
1813    #[test]
1814    fn keeps_mermaid_as_code_in_commonmark() {
1815        let md = "```mermaid\nflowchart TD\nA-->B\n```";
1816        let html = parse_with_flavor(md, MarkdownFlavor::CommonMark);
1817        assert!(html.contains("<pre><code class=\"language-mermaid\">flowchart TD\nA--&gt;B</code></pre>"));
1818    }
1819
1820    #[test]
1821    fn appends_mermaid_runtime_for_gfm_file_output() {
1822        let html = super::with_chart_runtime_if_needed(
1823            "<pre class=\"mermaid\">graph TD\nA--&gt;B</pre>\n".to_string(),
1824            MarkdownFlavor::Gfm,
1825        );
1826        assert!(html.contains("mermaid.min.js"));
1827        assert!(html.contains("mermaid.initialize({ startOnLoad: true })"));
1828    }
1829
1830    #[test]
1831    fn does_not_append_mermaid_runtime_for_commonmark() {
1832        let html = super::with_chart_runtime_if_needed(
1833            "<pre><code class=\"language-mermaid\">graph TD\nA--&gt;B</code></pre>\n".to_string(),
1834            MarkdownFlavor::CommonMark,
1835        );
1836        assert!(!html.contains("mermaid.min.js"));
1837    }
1838
1839    #[test]
1840    fn safe_parse_blocks_script_variants() {
1841        assert!(safe_parse("<script>alert(1)</script>").is_err());
1842        assert!(safe_parse("<ScRiPt src=x></ScRiPt>").is_err());
1843        assert!(safe_parse("< / script >").is_err());
1844        assert!(safe_parse("<  script>").is_err());
1845    }
1846
1847    #[test]
1848    fn renders_link_wrapped_image_badge() {
1849        let md = "[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?logo=telegram&logoColor=white)](https://t.me/+Ka9i6CNwe71hMWQy)";
1850        let html = parse(md);
1851        assert!(html.contains(
1852            "<a href=\"https://t.me/+Ka9i6CNwe71hMWQy\"><img src=\"https://img.shields.io/badge/Telegram-2CA5E0?logo=telegram&amp;logoColor=white\" alt=\"Telegram\" /></a>"
1853        ));
1854    }
1855
1856    #[test]
1857    fn renders_discord_and_telegram_badges_together() {
1858        let md = "![Discord](https://discord.gg/2xrMh7qX6m)⠀[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?logo=telegram&logoColor=white)](https://t.me/+Ka9i6CNwe71hMWQy)";
1859        let html = parse(md);
1860        assert!(html.contains("<img src=\"https://discord.gg/2xrMh7qX6m\" alt=\"Discord\" />"));
1861        assert!(html.contains(
1862            "<a href=\"https://t.me/+Ka9i6CNwe71hMWQy\"><img src=\"https://img.shields.io/badge/Telegram-2CA5E0?logo=telegram&amp;logoColor=white\" alt=\"Telegram\" /></a>"
1863        ));
1864    }
1865}
umark_lib/lib.rs

umark_lib/
lib.rs