1use std::collections::{HashMap, HashSet};
29use std::error::Error;
30use std::fmt;
31use std::fs;
32
33#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum MarkdownFlavor {
36 CommonMark,
38 Gfm,
40}
41
42#[derive(Debug)]
43struct MarkdownSecurityError;
44
45impl fmt::Display for MarkdownSecurityError {
46 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47 write!(f, "raw html tag is not allowed in safe_parse")
48 }
49}
50
51impl Error for MarkdownSecurityError {}
52
53const RAW_HTML_OMITTED_MARKER: &str = "<!-- raw HTML omitted -->";
54const MERMAID_BOOTSTRAP: &str = "<script src=\"https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.min.js\"></script>\n<script>if (typeof mermaid !== \"undefined\") { mermaid.initialize({ startOnLoad: true }); }</script>\n";
55
56#[derive(Debug, Clone, Copy)]
57struct ParserConfig {
58 omit_raw_html: bool,
59 enable_tables: bool,
60 enable_task_list: bool,
61 enable_strikethrough: bool,
62 enable_autolink_literals: bool,
63 enable_footnotes: bool,
64 enable_charts: bool,
65}
66
67impl ParserConfig {
68 fn from_flavor(flavor: MarkdownFlavor) -> Self {
69 match flavor {
70 MarkdownFlavor::CommonMark => Self {
71 omit_raw_html: false,
72 enable_tables: false,
73 enable_task_list: false,
74 enable_strikethrough: false,
75 enable_autolink_literals: false,
76 enable_footnotes: false,
77 enable_charts: false,
78 },
79 MarkdownFlavor::Gfm => Self {
80 omit_raw_html: false,
81 enable_tables: true,
82 enable_task_list: true,
83 enable_strikethrough: true,
84 enable_autolink_literals: true,
85 enable_footnotes: true,
86 enable_charts: true,
87 },
88 }
89 }
90
91 fn with_raw_html_omitted(mut self) -> Self {
92 self.omit_raw_html = true;
93 self
94 }
95}
96
97#[derive(Default, Clone)]
98struct DefinitionStore {
99 links: HashMap<String, String>,
100 footnotes: HashMap<String, String>,
101 skip_lines: HashSet<usize>,
102}
103
104struct Parser<'a> {
105 lines: Vec<&'a str>,
106 defs: DefinitionStore,
107 footnote_order: Vec<String>,
108 config: ParserConfig,
109}
110
111pub fn parse(input: &str) -> String {
121 parse_with_flavor(input, MarkdownFlavor::Gfm)
122}
123
124pub fn parse_with_flavor(input: &str, flavor: MarkdownFlavor) -> String {
137 parse_internal(input, ParserConfig::from_flavor(flavor))
138}
139
140pub fn safe_parse(input: &str) -> Result<String, Box<dyn Error>> {
154 safe_parse_with_flavor(input, MarkdownFlavor::Gfm)
155}
156
157pub fn safe_parse_with_flavor(
167 input: &str,
168 flavor: MarkdownFlavor,
169) -> Result<String, Box<dyn Error>> {
170 reject_script_tag(input)?;
171 let rendered = parse_internal(
172 input,
173 ParserConfig::from_flavor(flavor).with_raw_html_omitted(),
174 );
175 if rendered.contains(RAW_HTML_OMITTED_MARKER) {
176 return Err(Box::new(MarkdownSecurityError));
177 }
178 Ok(rendered)
179}
180
181pub fn parse_from_file(path: &str, output_path: &str) -> Result<(), Box<dyn Error>> {
207 parse_from_file_with_flavor(path, output_path, MarkdownFlavor::Gfm)
208}
209
210pub fn parse_from_file_with_flavor(
239 path: &str,
240 output_path: &str,
241 flavor: MarkdownFlavor,
242) -> Result<(), Box<dyn Error>> {
243 let content = fs::read_to_string(path)?;
244 let rendered = parse_with_flavor(&content, flavor);
245 let rendered = with_chart_runtime_if_needed(rendered, flavor);
246 fs::write(output_path, rendered)?;
247 Ok(())
248}
249
250pub fn safe_parse_from_file(path: &str, output_path: &str) -> Result<(), Box<dyn Error>> {
272 safe_parse_from_file_with_flavor(path, output_path, MarkdownFlavor::Gfm)
273}
274
275pub fn safe_parse_from_file_with_flavor(
300 path: &str,
301 output_path: &str,
302 flavor: MarkdownFlavor,
303) -> Result<(), Box<dyn Error>> {
304 let content = fs::read_to_string(path)?;
305 let rendered = safe_parse_with_flavor(&content, flavor)?;
306 fs::write(output_path, rendered)?;
307 Ok(())
308}
309
310fn parse_internal(input: &str, config: ParserConfig) -> String {
311 let normalized = normalize_newlines(input);
312 let lines: Vec<&str> = normalized.lines().collect();
313 let defs = collect_definitions(&lines, config);
314 let mut parser = Parser {
315 lines,
316 defs,
317 footnote_order: Vec::new(),
318 config,
319 };
320 parser.parse_blocks()
321}
322
323fn with_chart_runtime_if_needed(mut rendered: String, flavor: MarkdownFlavor) -> String {
324 if flavor == MarkdownFlavor::Gfm
325 && rendered.contains("<pre class=\"mermaid\">")
326 && !rendered.contains("mermaid.initialize(")
327 {
328 rendered.push('\n');
329 rendered.push_str(MERMAID_BOOTSTRAP);
330 }
331 rendered
332}
333
334fn reject_script_tag(input: &str) -> Result<(), Box<dyn Error>> {
335 if contains_script_tag(input) {
336 return Err(Box::new(MarkdownSecurityError));
337 }
338 Ok(())
339}
340
341fn contains_script_tag(input: &str) -> bool {
342 let lowered = input.to_ascii_lowercase();
343 let bytes = lowered.as_bytes();
344 let mut i = 0usize;
345
346 while i < bytes.len() {
347 if bytes[i] != b'<' {
348 i += 1;
349 continue;
350 }
351 let mut j = i + 1;
352 while j < bytes.len() && bytes[j].is_ascii_whitespace() {
353 j += 1;
354 }
355 if j < bytes.len() && bytes[j] == b'/' {
356 j += 1;
357 while j < bytes.len() && bytes[j].is_ascii_whitespace() {
358 j += 1;
359 }
360 }
361 if j + 6 > bytes.len() {
362 i += 1;
363 continue;
364 }
365 if &lowered[j..j + 6] == "script" {
366 let next = bytes.get(j + 6).copied().unwrap_or(b'>');
367 if next.is_ascii_whitespace() || next == b'>' || next == b'/' {
368 return true;
369 }
370 }
371 i += 1;
372 }
373 false
374}
375
376impl<'a> Parser<'a> {
377 fn parse_blocks(&mut self) -> String {
378 let mut pos = 0usize;
379 let mut out = String::new();
380
381 while pos < self.lines.len() {
382 if self.is_skipped(pos) || self.lines[pos].trim().is_empty() {
383 pos += 1;
384 continue;
385 }
386
387 if let Some((level, text, next)) = parse_setext_heading(&self.lines, pos) {
388 let heading_text = text.trim().to_string();
389 out.push_str(&format!(
390 "<h{level}>{}</h{level}>\n",
391 self.parse_inlines(&heading_text)
392 ));
393 pos = next;
394 continue;
395 }
396
397 if is_thematic_break(self.lines[pos]) {
398 out.push_str("<hr />\n");
399 pos += 1;
400 continue;
401 }
402
403 if let Some((level, text)) = parse_atx_heading(self.lines[pos]) {
404 out.push_str(&format!(
405 "<h{level}>{}</h{level}>\n",
406 self.parse_inlines(text.trim())
407 ));
408 pos += 1;
409 continue;
410 }
411
412 if is_fence_start(self.lines[pos]) {
413 let (html, next) = self.parse_fenced_code(pos);
414 out.push_str(&html);
415 pos = next;
416 continue;
417 }
418
419 if is_indented_code_line(self.lines[pos]) {
420 let (html, next) = self.parse_indented_code(pos);
421 out.push_str(&html);
422 pos = next;
423 continue;
424 }
425
426 if is_blockquote_line(self.lines[pos]) {
427 let (html, next) = self.parse_blockquote(pos);
428 out.push_str(&html);
429 pos = next;
430 continue;
431 }
432
433 if is_html_line(self.lines[pos]) {
434 let (html, next) = self.parse_html_block(pos);
435 out.push_str(&html);
436 pos = next;
437 continue;
438 }
439
440 if self.config.enable_tables && is_table_header(&self.lines, pos) {
441 let (html, next) = self.parse_table(pos);
442 out.push_str(&html);
443 pos = next;
444 continue;
445 }
446
447 if parse_list_prefix(self.lines[pos]).is_some() {
448 let (html, next) = self.parse_list(pos);
449 out.push_str(&html);
450 pos = next;
451 continue;
452 }
453
454 let (html, next) = self.parse_paragraph(pos);
455 out.push_str(&html);
456 pos = next;
457 }
458
459 if self.config.enable_footnotes && !self.footnote_order.is_empty() {
460 out.push_str(&self.render_footnotes());
461 }
462
463 out
464 }
465
466 fn parse_subdocument(&mut self, markdown: &str) -> String {
467 let normalized = normalize_newlines(markdown);
468 let lines: Vec<&str> = normalized.lines().collect();
469 let mut nested = Parser {
470 lines,
471 defs: self.defs.clone(),
472 footnote_order: Vec::new(),
473 config: self.config,
474 };
475 let html = nested.parse_blocks();
476 for id in nested.footnote_order {
477 self.note_footnote(id);
478 }
479 html
480 }
481
482 fn parse_fenced_code(&self, start: usize) -> (String, usize) {
483 let first = self.lines[start].trim_start();
484 let fence_char = first.chars().next().unwrap_or('`');
485 let fence_len = first.chars().take_while(|c| *c == fence_char).count();
486 let info = first[fence_len..].trim();
487 let mut pos = start + 1;
488 let mut code_lines = Vec::new();
489
490 while pos < self.lines.len() {
491 let line = self.lines[pos].trim_start();
492 if is_fence_closing_line(line, fence_char, fence_len) {
493 pos += 1;
494 break;
495 }
496 code_lines.push(self.lines[pos]);
497 pos += 1;
498 }
499
500 let code_raw = code_lines.join("\n");
501 let code = html_escape(&code_raw);
502 let lang = info.split_whitespace().next().unwrap_or("");
503 let is_mermaid = self.config.enable_charts && lang.eq_ignore_ascii_case("mermaid");
504
505 let html = if is_mermaid {
506 format!("<pre class=\"mermaid\">{}</pre>\n", code)
507 } else if info.is_empty() {
508 format!("<pre><code>{}</code></pre>\n", code)
509 } else {
510 format!(
511 "<pre><code class=\"language-{}\">{}</code></pre>\n",
512 html_attr_escape(lang),
513 code
514 )
515 };
516 (html, pos)
517 }
518
519 fn parse_indented_code(&self, start: usize) -> (String, usize) {
520 let mut pos = start;
521 let mut code_lines = Vec::new();
522
523 while pos < self.lines.len() {
524 let line = self.lines[pos];
525 if line.trim().is_empty() {
526 code_lines.push("");
527 pos += 1;
528 continue;
529 }
530
531 if let Some(stripped) = strip_indented_code_prefix(line) {
532 code_lines.push(stripped);
533 pos += 1;
534 } else {
535 break;
536 }
537 }
538
539 let code = html_escape(&code_lines.join("\n"));
540 (format!("<pre><code>{}</code></pre>\n", code), pos)
541 }
542
543 fn parse_blockquote(&mut self, start: usize) -> (String, usize) {
544 let mut pos = start;
545 let mut parts = Vec::new();
546
547 while pos < self.lines.len() {
548 let line = self.lines[pos];
549 if line.trim().is_empty() {
550 parts.push(String::new());
551 pos += 1;
552 continue;
553 }
554 if !is_blockquote_line(line) {
555 break;
556 }
557 parts.push(strip_blockquote_prefix(line).to_string());
558 pos += 1;
559 }
560
561 let body = parts.join("\n");
562 let inner = self.parse_subdocument(&body);
563 (format!("<blockquote>\n{}</blockquote>\n", inner), pos)
564 }
565
566 fn parse_html_block(&self, start: usize) -> (String, usize) {
567 if !self.config.omit_raw_html {
568 let mut pos = start;
569 while pos < self.lines.len() {
570 if self.lines[pos].trim().is_empty() {
571 break;
572 }
573 pos += 1;
574 }
575 let raw = self.lines[start..pos].join("\n");
576 return (format!("{raw}\n"), pos);
577 }
578
579 let mut pos = start;
580 while pos < self.lines.len() {
581 if self.lines[pos].trim().is_empty() {
582 break;
583 }
584 pos += 1;
585 }
586 (format!("{RAW_HTML_OMITTED_MARKER}\n"), pos)
587 }
588
589 fn parse_table(&mut self, start: usize) -> (String, usize) {
590 let headers = split_table_row(self.lines[start]);
591 let aligns = parse_table_alignments(self.lines[start + 1]);
592 let mut pos = start + 2;
593 let mut rows: Vec<Vec<String>> = Vec::new();
594
595 while pos < self.lines.len() {
596 if self.is_skipped(pos) || self.lines[pos].trim().is_empty() {
597 break;
598 }
599 if !self.lines[pos].contains('|') {
600 break;
601 }
602 rows.push(split_table_row(self.lines[pos]));
603 pos += 1;
604 }
605
606 let mut out = String::new();
607 out.push_str("<table>\n<thead>\n<tr>");
608 for (idx, cell) in headers.into_iter().enumerate() {
609 push_table_cell_open(&mut out, "th", aligns.get(idx).copied().flatten());
610 out.push_str(&self.parse_inlines(cell.trim()));
611 out.push_str("</th>");
612 }
613 out.push_str("</tr>\n</thead>\n<tbody>\n");
614
615 for row in rows {
616 out.push_str("<tr>");
617 for (idx, cell) in row.into_iter().enumerate() {
618 push_table_cell_open(&mut out, "td", aligns.get(idx).copied().flatten());
619 out.push_str(&self.parse_inlines(cell.trim()));
620 out.push_str("</td>");
621 }
622 out.push_str("</tr>\n");
623 }
624
625 out.push_str("</tbody>\n</table>\n");
626 (out, pos)
627 }
628
629 fn parse_list(&mut self, start: usize) -> (String, usize) {
630 let (first_kind, _, base_indent) = parse_list_prefix_with_indent(self.lines[start])
631 .unwrap_or((ListKind::Unordered, "", 0));
632 let mut pos = start;
633 let mut out = String::new();
634
635 match first_kind {
636 ListKind::Unordered => out.push_str("<ul>\n"),
637 ListKind::Ordered(start_num) => {
638 if start_num != 1 {
639 out.push_str(&format!("<ol start=\"{start_num}\">\n"));
640 } else {
641 out.push_str("<ol>\n");
642 }
643 }
644 }
645
646 while pos < self.lines.len() {
647 if self.is_skipped(pos) {
648 break;
649 }
650
651 let Some((kind, item_line, indent)) = parse_list_prefix_with_indent(self.lines[pos])
652 else {
653 break;
654 };
655 if indent != base_indent || !same_kind_value(kind, first_kind) {
656 break;
657 }
658
659 let mut item_parts = vec![item_line.to_string()];
660 pos += 1;
661 let mut loose = false;
662
663 while pos < self.lines.len() {
664 if self.is_skipped(pos) {
665 break;
666 }
667
668 let line = self.lines[pos];
669 if line.trim().is_empty() {
670 loose = true;
671 item_parts.push(String::new());
672 pos += 1;
673 continue;
674 }
675
676 if let Some((next_kind, _, next_indent)) = parse_list_prefix_with_indent(line) {
677 if next_indent == base_indent && same_kind_value(next_kind, first_kind) {
678 break;
679 }
680 if next_indent <= base_indent && !same_kind_value(next_kind, first_kind) {
681 break;
682 }
683 }
684
685 if leading_indent(line) <= base_indent
686 && is_block_start(&self.lines, pos, self.config)
687 {
688 break;
689 }
690
691 item_parts.push(dedent_list_continuation(line, base_indent).to_string());
692 pos += 1;
693 }
694
695 out.push_str("<li>");
696
697 let mut checkbox: Option<bool> = None;
698 if self.config.enable_task_list && matches!(first_kind, ListKind::Unordered) {
699 if let Some((checked, rest)) = parse_task_item(&item_parts[0]) {
700 checkbox = Some(checked);
701 item_parts[0] = rest.to_string();
702 }
703 }
704
705 if let Some(checked) = checkbox {
706 if checked {
707 out.push_str("<input type=\"checkbox\" checked=\"\" disabled=\"\" /> ");
708 } else {
709 out.push_str("<input type=\"checkbox\" disabled=\"\" /> ");
710 }
711 }
712
713 let item_markdown = item_parts.join("\n");
714 let rendered = self.parse_subdocument(&item_markdown);
715 if !loose {
716 if let Some(stripped) = strip_single_paragraph_wrapper(&rendered) {
717 out.push_str(stripped);
718 } else {
719 out.push_str(&rendered);
720 }
721 } else {
722 out.push_str(&rendered);
723 }
724 out.push_str("</li>\n");
725 }
726
727 match first_kind {
728 ListKind::Unordered => out.push_str("</ul>\n"),
729 ListKind::Ordered(_) => out.push_str("</ol>\n"),
730 }
731
732 (out, pos)
733 }
734
735 fn parse_paragraph(&mut self, start: usize) -> (String, usize) {
736 let mut pos = start;
737 let mut parts = Vec::new();
738
739 while pos < self.lines.len() {
740 if self.is_skipped(pos) || self.lines[pos].trim().is_empty() {
741 break;
742 }
743 if pos != start && is_block_start(&self.lines, pos, self.config) {
744 break;
745 }
746 parts.push(self.lines[pos]);
747 pos += 1;
748 }
749
750 let text = parts.join("\n");
751 (format!("<p>{}</p>\n", self.parse_inlines(&text)), pos)
752 }
753
754 fn parse_inlines(&mut self, text: &str) -> String {
755 let mut out = String::new();
756 let mut i = 0usize;
757
758 while i < text.len() {
759 let rest = &text[i..];
760
761 if rest.starts_with("\\\n") {
762 out.push_str("<br />\n");
763 i += 2;
764 continue;
765 }
766
767 if rest.starts_with('\n') {
768 match detect_hard_break(text, i) {
769 HardBreak::Spaces => {
770 trim_trailing_spaces(&mut out);
771 out.push_str("<br />\n");
772 }
773 HardBreak::Backslash => {
774 if out.ends_with('\\') {
775 out.pop();
776 }
777 out.push_str("<br />\n");
778 }
779 HardBreak::None => out.push('\n'),
780 }
781 i += 1;
782 continue;
783 }
784
785 if let Some((ch, consumed)) = parse_escaped_char(rest) {
786 push_escaped_char(&mut out, ch);
787 i += consumed;
788 continue;
789 }
790
791 if rest.starts_with('`') {
792 if let Some((content, consumed)) = parse_code_span(rest) {
793 out.push_str("<code>");
794 out.push_str(&html_escape(content));
795 out.push_str("</code>");
796 i += consumed;
797 continue;
798 }
799 }
800
801 if self.config.enable_footnotes && rest.starts_with("[^") {
802 if let Some(end) = rest.find(']') {
803 let raw_id = &rest[2..end];
804 let key = normalize_key(raw_id);
805 if self.defs.footnotes.contains_key(&key) {
806 let index = self.note_footnote(key.clone());
807 let safe = footnote_id(&key);
808 out.push_str(&format!(
809 "<sup class=\"footnote-ref\"><a href=\"#fn-{safe}\" id=\"fnref-{safe}\">{index}</a></sup>"
810 ));
811 i += end + 1;
812 continue;
813 }
814 }
815 }
816
817 if rest.starts_with("![") {
818 if let Some((html, consumed)) = self.parse_image(rest) {
819 out.push_str(&html);
820 i += consumed;
821 continue;
822 }
823 }
824
825 if rest.starts_with('[') {
826 if let Some((html, consumed)) = self.parse_link_like(rest) {
827 out.push_str(&html);
828 i += consumed;
829 continue;
830 }
831 }
832
833 if let Some((html, consumed)) = parse_angle_autolink(rest) {
834 out.push_str(&html);
835 i += consumed;
836 continue;
837 }
838
839 if let Some((raw, consumed)) = parse_inline_html(rest) {
840 if !self.config.omit_raw_html {
841 out.push_str(raw);
842 } else {
843 out.push_str(RAW_HTML_OMITTED_MARKER);
844 }
845 i += consumed;
846 continue;
847 }
848
849 if self.config.enable_autolink_literals {
850 if let Some((href, text_value, consumed)) = parse_autolink_literal(rest) {
851 let href_escaped = html_escape(&href);
852 let text_escaped = html_escape(&text_value);
853 out.push_str(&format!("<a href=\"{href_escaped}\">{text_escaped}</a>"));
854 i += consumed;
855 continue;
856 }
857 }
858
859 if let Some((content, consumed)) = wrapped(rest, "**") {
860 out.push_str("<strong>");
861 out.push_str(&self.parse_inlines(content));
862 out.push_str("</strong>");
863 i += consumed;
864 continue;
865 }
866
867 if let Some((content, consumed)) = wrapped(rest, "__") {
868 out.push_str("<strong>");
869 out.push_str(&self.parse_inlines(content));
870 out.push_str("</strong>");
871 i += consumed;
872 continue;
873 }
874
875 if self.config.enable_strikethrough {
876 if let Some((content, consumed)) = wrapped(rest, "~~") {
877 out.push_str("<del>");
878 out.push_str(&self.parse_inlines(content));
879 out.push_str("</del>");
880 i += consumed;
881 continue;
882 }
883 }
884
885 if let Some((content, consumed)) = wrapped(rest, "*") {
886 out.push_str("<em>");
887 out.push_str(&self.parse_inlines(content));
888 out.push_str("</em>");
889 i += consumed;
890 continue;
891 }
892
893 if let Some((content, consumed)) = wrapped(rest, "_") {
894 out.push_str("<em>");
895 out.push_str(&self.parse_inlines(content));
896 out.push_str("</em>");
897 i += consumed;
898 continue;
899 }
900
901 if let Some(ch) = rest.chars().next() {
902 push_escaped_char(&mut out, ch);
903 i += ch.len_utf8();
904 } else {
905 break;
906 }
907 }
908
909 out
910 }
911
912 fn parse_image(&mut self, rest: &str) -> Option<(String, usize)> {
913 let (alt, consumed_label) = parse_bracketed_label(&rest[1..])?;
914 let after = &rest[1 + consumed_label..];
915
916 let (url, consumed_after) = parse_inline_link_target(after)?;
917 let html = format!(
918 "<img src=\"{}\" alt=\"{}\" />",
919 html_attr_escape(&url),
920 html_attr_escape(alt)
921 );
922 Some((html, 1 + consumed_label + consumed_after))
923 }
924
925 fn parse_link_like(&mut self, rest: &str) -> Option<(String, usize)> {
926 let (label, consumed_label) = parse_bracketed_label(rest)?;
927 let after = &rest[consumed_label..];
928
929 if let Some((url, consumed_after)) = parse_inline_link_target(after) {
930 let html = format!(
931 "<a href=\"{}\">{}</a>",
932 html_attr_escape(&url),
933 self.parse_inlines(label)
934 );
935 return Some((html, consumed_label + consumed_after));
936 }
937
938 if after.starts_with('[') {
939 let (raw_ref, consumed_ref) = parse_bracketed_label(after)?;
940 let key = if raw_ref.trim().is_empty() {
941 normalize_key(label)
942 } else {
943 normalize_key(raw_ref)
944 };
945 if let Some(url) = self.defs.links.get(&key) {
946 let html = format!(
947 "<a href=\"{}\">{}</a>",
948 html_attr_escape(url),
949 self.parse_inlines(label)
950 );
951 return Some((html, consumed_label + consumed_ref));
952 }
953 }
954
955 let key = normalize_key(label);
956 if let Some(url) = self.defs.links.get(&key) {
957 let html = format!(
958 "<a href=\"{}\">{}</a>",
959 html_attr_escape(url),
960 self.parse_inlines(label)
961 );
962 return Some((html, consumed_label));
963 }
964
965 None
966 }
967
968 fn note_footnote(&mut self, id: String) -> usize {
969 if let Some(idx) = self.footnote_order.iter().position(|x| x == &id) {
970 idx + 1
971 } else {
972 self.footnote_order.push(id);
973 self.footnote_order.len()
974 }
975 }
976
977 fn render_footnotes(&mut self) -> String {
978 let mut out = String::new();
979 out.push_str("<section class=\"footnotes\">\n<ol>\n");
980
981 let footnote_ids = self.footnote_order.clone();
982 for id in footnote_ids {
983 let safe = footnote_id(&id);
984 let text = self.defs.footnotes.get(&id).cloned().unwrap_or_default();
985 out.push_str(&format!(
986 "<li id=\"fn-{safe}\">{} <a href=\"#fnref-{safe}\" class=\"footnote-backref\">↩</a></li>\n",
987 self.parse_inlines(text.trim())
988 ));
989 }
990
991 out.push_str("</ol>\n</section>\n");
992 out
993 }
994
995 fn is_skipped(&self, line: usize) -> bool {
996 self.defs.skip_lines.contains(&line)
997 }
998}
999
1000#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1001enum ListKind {
1002 Unordered,
1003 Ordered(usize),
1004}
1005
1006fn normalize_newlines(input: &str) -> String {
1007 input.replace("\r\n", "\n").replace('\r', "\n")
1008}
1009
1010fn collect_definitions(lines: &[&str], config: ParserConfig) -> DefinitionStore {
1011 let mut defs = DefinitionStore::default();
1012 let mut i = 0usize;
1013
1014 while i < lines.len() {
1015 let line = lines[i].trim();
1016
1017 if let Some((id, url)) = parse_link_definition(line) {
1018 defs.links.insert(normalize_key(id), url.to_string());
1019 defs.skip_lines.insert(i);
1020 i += 1;
1021 continue;
1022 }
1023
1024 if config.enable_footnotes {
1025 if let Some((id, first_text)) = parse_footnote_definition(line) {
1026 let mut text_parts = vec![first_text.to_string()];
1027 defs.skip_lines.insert(i);
1028 i += 1;
1029
1030 while i < lines.len() {
1031 let next = lines[i];
1032 if next.starts_with(" ") || next.starts_with('\t') {
1033 text_parts.push(next.trim().to_string());
1034 defs.skip_lines.insert(i);
1035 i += 1;
1036 } else {
1037 break;
1038 }
1039 }
1040
1041 defs.footnotes
1042 .insert(normalize_key(id), text_parts.join(" "));
1043 continue;
1044 }
1045 }
1046
1047 i += 1;
1048 }
1049
1050 defs
1051}
1052
1053fn parse_atx_heading(line: &str) -> Option<(usize, &str)> {
1054 let trimmed = line.trim_start();
1055 let mut count = 0usize;
1056 for ch in trimmed.chars() {
1057 if ch == '#' {
1058 count += 1;
1059 } else {
1060 break;
1061 }
1062 }
1063 if count == 0 || count > 6 {
1064 return None;
1065 }
1066 let rest = trimmed[count..].trim_start();
1067 if rest.is_empty() {
1068 return None;
1069 }
1070 Some((count, rest.trim_end_matches('#').trim_end()))
1071}
1072
1073fn parse_setext_heading<'a>(lines: &'a [&str], pos: usize) -> Option<(usize, &'a str, usize)> {
1074 if pos + 1 >= lines.len() {
1075 return None;
1076 }
1077 if lines[pos].trim().is_empty() {
1078 return None;
1079 }
1080 if !can_be_setext_content_line(lines[pos]) {
1081 return None;
1082 }
1083
1084 let underline = lines[pos + 1].trim();
1085 if is_setext_underline(underline, '=') {
1086 return Some((1, lines[pos], pos + 2));
1087 }
1088 if is_setext_underline(underline, '-') {
1089 return Some((2, lines[pos], pos + 2));
1090 }
1091 None
1092}
1093
1094fn can_be_setext_content_line(line: &str) -> bool {
1095 !line.trim().is_empty()
1096 && !is_thematic_break(line)
1097 && parse_atx_heading(line).is_none()
1098 && !is_fence_start(line)
1099 && !is_indented_code_line(line)
1100 && !is_blockquote_line(line)
1101 && !is_html_line(line)
1102 && parse_list_prefix(line).is_none()
1103}
1104
1105fn is_setext_underline(line: &str, marker: char) -> bool {
1106 let trimmed = line.trim();
1107 !trimmed.is_empty() && trimmed.chars().all(|ch| ch == marker) && trimmed.len() >= 3
1108}
1109
1110fn is_thematic_break(line: &str) -> bool {
1111 let trimmed = line.trim();
1112 if trimmed.len() < 3 {
1113 return false;
1114 }
1115 let candidate: String = trimmed.chars().filter(|c| !c.is_whitespace()).collect();
1116 if candidate.len() < 3 {
1117 return false;
1118 }
1119 candidate.chars().all(|ch| ch == '-')
1120 || candidate.chars().all(|ch| ch == '*')
1121 || candidate.chars().all(|ch| ch == '_')
1122}
1123
1124fn is_fence_start(line: &str) -> bool {
1125 let trimmed = line.trim_start();
1126 trimmed.starts_with("```") || trimmed.starts_with("~~~")
1127}
1128
1129fn is_indented_code_line(line: &str) -> bool {
1130 strip_indented_code_prefix(line).is_some()
1131}
1132
1133fn strip_indented_code_prefix(line: &str) -> Option<&str> {
1134 if let Some(stripped) = line.strip_prefix(" ") {
1135 return Some(stripped);
1136 }
1137 line.strip_prefix('\t')
1138}
1139
1140fn is_blockquote_line(line: &str) -> bool {
1141 line.trim_start().starts_with('>')
1142}
1143
1144fn strip_blockquote_prefix(line: &str) -> &str {
1145 let trimmed = line.trim_start();
1146 let tail = trimmed.strip_prefix('>').unwrap_or(trimmed);
1147 tail.strip_prefix(' ').unwrap_or(tail)
1148}
1149
1150fn is_html_line(line: &str) -> bool {
1151 line.trim_start().starts_with('<')
1152}
1153
1154fn is_table_header(lines: &[&str], pos: usize) -> bool {
1155 if pos + 1 >= lines.len() {
1156 return false;
1157 }
1158 if !lines[pos].contains('|') {
1159 return false;
1160 }
1161 is_table_separator(lines[pos + 1])
1162}
1163
1164fn is_table_separator(line: &str) -> bool {
1165 let trimmed = line.trim();
1166 if !trimmed.contains('-') {
1167 return false;
1168 }
1169 let cells = split_table_row(trimmed);
1170 if cells.is_empty() {
1171 return false;
1172 }
1173 cells.into_iter().all(|cell| {
1174 let c = cell.trim();
1175 c.len() >= 3 && c.chars().all(|ch| ch == '-' || ch == ':')
1176 })
1177}
1178
1179fn split_table_row(line: &str) -> Vec<String> {
1180 line.trim()
1181 .trim_matches('|')
1182 .split('|')
1183 .map(|s| s.trim().to_string())
1184 .collect()
1185}
1186
1187fn parse_list_prefix(line: &str) -> Option<(ListKind, &str)> {
1188 parse_list_prefix_with_indent(line).map(|(kind, rest, _)| (kind, rest))
1189}
1190
1191fn parse_list_prefix_with_indent(line: &str) -> Option<(ListKind, &str, usize)> {
1192 let indent = leading_indent(line);
1193 let trimmed = line.trim_start_matches([' ', '\t']);
1194 if trimmed.len() < 2 {
1195 return None;
1196 }
1197
1198 if (trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ "))
1199 && trimmed.len() > 2
1200 {
1201 return Some((ListKind::Unordered, &trimmed[2..], indent));
1202 }
1203
1204 let mut digits_end = 0usize;
1205 for (idx, ch) in trimmed.char_indices() {
1206 if ch.is_ascii_digit() {
1207 digits_end = idx + ch.len_utf8();
1208 } else {
1209 break;
1210 }
1211 }
1212
1213 if digits_end == 0 || digits_end + 2 > trimmed.len() {
1214 return None;
1215 }
1216
1217 let marker = trimmed.as_bytes()[digits_end] as char;
1218 if marker != '.' && marker != ')' {
1219 return None;
1220 }
1221 if trimmed.as_bytes()[digits_end + 1] != b' ' {
1222 return None;
1223 }
1224
1225 let start = trimmed[..digits_end].parse::<usize>().ok()?;
1226 Some((ListKind::Ordered(start), &trimmed[digits_end + 2..], indent))
1227}
1228
1229fn same_kind_value(current: ListKind, expected: ListKind) -> bool {
1230 matches!(
1231 (current, expected),
1232 (ListKind::Unordered, ListKind::Unordered) | (ListKind::Ordered(_), ListKind::Ordered(_))
1233 )
1234}
1235
1236fn leading_indent(line: &str) -> usize {
1237 let mut count = 0usize;
1238 for ch in line.chars() {
1239 match ch {
1240 ' ' => count += 1,
1241 '\t' => count += 4,
1242 _ => break,
1243 }
1244 }
1245 count
1246}
1247
1248fn dedent_list_continuation(line: &str, base_indent: usize) -> &str {
1249 if leading_indent(line) <= base_indent {
1250 return line.trim_start();
1251 }
1252 let mut removed_cols = 0usize;
1253 let mut byte_idx = 0usize;
1254 for (idx, ch) in line.char_indices() {
1255 match ch {
1256 ' ' => {
1257 removed_cols += 1;
1258 byte_idx = idx + 1;
1259 }
1260 '\t' => {
1261 removed_cols += 4;
1262 byte_idx = idx + 1;
1263 }
1264 _ => break,
1265 }
1266 if removed_cols >= base_indent + 2 {
1267 break;
1268 }
1269 }
1270 &line[byte_idx..]
1271}
1272
1273fn strip_single_paragraph_wrapper(html: &str) -> Option<&str> {
1274 if !html.starts_with("<p>") || !html.ends_with("</p>\n") {
1275 return None;
1276 }
1277 if html[3..html.len() - 5].contains("\n<p>") {
1278 return None;
1279 }
1280 Some(&html[3..html.len() - 5])
1281}
1282
1283fn is_fence_closing_line(line: &str, marker: char, min_len: usize) -> bool {
1284 let trimmed = line.trim_end();
1285 let count = trimmed.chars().take_while(|c| *c == marker).count();
1286 if count < min_len {
1287 return false;
1288 }
1289 trimmed[count..].trim().is_empty()
1290}
1291
1292fn parse_table_alignments(separator_line: &str) -> Vec<Option<&'static str>> {
1293 split_table_row(separator_line)
1294 .into_iter()
1295 .map(|cell| {
1296 let c = cell.trim();
1297 let starts = c.starts_with(':');
1298 let ends = c.ends_with(':');
1299 match (starts, ends) {
1300 (true, true) => Some("center"),
1301 (true, false) => Some("left"),
1302 (false, true) => Some("right"),
1303 (false, false) => None,
1304 }
1305 })
1306 .collect()
1307}
1308
1309fn push_table_cell_open(out: &mut String, tag: &str, align: Option<&str>) {
1310 if let Some(al) = align {
1311 out.push_str(&format!("<{tag} align=\"{al}\">"));
1312 } else {
1313 out.push_str(&format!("<{tag}>"));
1314 }
1315}
1316
1317fn is_block_start(lines: &[&str], pos: usize, config: ParserConfig) -> bool {
1318 parse_setext_heading(lines, pos).is_some()
1319 || is_thematic_break(lines[pos])
1320 || parse_atx_heading(lines[pos]).is_some()
1321 || is_fence_start(lines[pos])
1322 || is_indented_code_line(lines[pos])
1323 || is_blockquote_line(lines[pos])
1324 || is_html_line(lines[pos])
1325 || parse_list_prefix(lines[pos]).is_some()
1326 || (config.enable_tables && is_table_header(lines, pos))
1327}
1328
1329fn parse_task_item(item: &str) -> Option<(bool, &str)> {
1330 let trimmed = item.trim_start();
1331 if trimmed.len() < 4 || !trimmed.starts_with('[') {
1332 return None;
1333 }
1334 let close = trimmed.find(']')?;
1335 let marker = &trimmed[1..close];
1336 let checked = match marker.to_ascii_lowercase().as_str() {
1337 "x" => true,
1338 " " => false,
1339 _ => return None,
1340 };
1341 let rest = trimmed[close + 1..].trim_start();
1342 Some((checked, rest))
1343}
1344
1345fn parse_link_definition(line: &str) -> Option<(&str, &str)> {
1346 if !line.starts_with('[') || line.starts_with("[^") {
1347 return None;
1348 }
1349 let close = line.find("]:")?;
1350 let id = line[1..close].trim();
1351 let url = line[close + 2..].trim();
1352 if id.is_empty() || url.is_empty() {
1353 return None;
1354 }
1355 Some((id, url))
1356}
1357
1358fn parse_footnote_definition(line: &str) -> Option<(&str, &str)> {
1359 if !line.starts_with("[^") {
1360 return None;
1361 }
1362 let close = line.find("]:")?;
1363 let id = line[2..close].trim();
1364 let text = line[close + 2..].trim();
1365 if id.is_empty() {
1366 return None;
1367 }
1368 Some((id, text))
1369}
1370
1371#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1372enum HardBreak {
1373 None,
1374 Spaces,
1375 Backslash,
1376}
1377
1378fn detect_hard_break(text: &str, newline_idx: usize) -> HardBreak {
1379 if newline_idx == 0 {
1380 return HardBreak::None;
1381 }
1382
1383 let bytes = text.as_bytes();
1384 let mut idx = newline_idx;
1385 let mut spaces = 0usize;
1386 while idx > 0 && bytes[idx - 1] == b' ' {
1387 spaces += 1;
1388 idx -= 1;
1389 }
1390
1391 if spaces >= 2 {
1392 return HardBreak::Spaces;
1393 }
1394 if idx > 0 && bytes[idx - 1] == b'\\' {
1395 return HardBreak::Backslash;
1396 }
1397 HardBreak::None
1398}
1399
1400fn trim_trailing_spaces(out: &mut String) {
1401 while out.ends_with(' ') {
1402 out.pop();
1403 }
1404}
1405
1406fn parse_inline_link_target(after: &str) -> Option<(String, usize)> {
1407 if !after.starts_with('(') {
1408 return None;
1409 }
1410 let bytes = after.as_bytes();
1411 let mut i = 1usize;
1412
1413 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
1414 i += 1;
1415 }
1416 if i >= bytes.len() {
1417 return None;
1418 }
1419
1420 let url_start = i;
1421 let url: String;
1422
1423 if bytes[i] == b'<' {
1424 i += 1;
1425 let start = i;
1426 while i < bytes.len() && bytes[i] != b'>' {
1427 if bytes[i] == b'\n' {
1428 return None;
1429 }
1430 i += 1;
1431 }
1432 if i >= bytes.len() {
1433 return None;
1434 }
1435 url = after[start..i].to_string();
1436 i += 1;
1437 } else {
1438 let mut depth = 0usize;
1439 while i < bytes.len() {
1440 let ch = bytes[i] as char;
1441 if ch == '\\' && i + 1 < bytes.len() {
1442 i += 2;
1443 continue;
1444 }
1445 if ch == '(' {
1446 depth += 1;
1447 i += 1;
1448 continue;
1449 }
1450 if ch == ')' {
1451 if depth == 0 {
1452 break;
1453 }
1454 depth -= 1;
1455 i += 1;
1456 continue;
1457 }
1458 if ch.is_ascii_whitespace() && depth == 0 {
1459 break;
1460 }
1461 i += 1;
1462 }
1463 if i <= url_start {
1464 return None;
1465 }
1466 url = after[url_start..i].to_string();
1467 }
1468
1469 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
1470 i += 1;
1471 }
1472
1473 if i < bytes.len() && (bytes[i] == b'"' || bytes[i] == b'\'' || bytes[i] == b'(') {
1474 let quote = bytes[i];
1475 let closing = if quote == b'(' { b')' } else { quote };
1476 i += 1;
1477 while i < bytes.len() && bytes[i] != closing {
1478 if bytes[i] == b'\\' && i + 1 < bytes.len() {
1479 i += 2;
1480 } else {
1481 i += 1;
1482 }
1483 }
1484 if i >= bytes.len() {
1485 return None;
1486 }
1487 i += 1;
1488 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
1489 i += 1;
1490 }
1491 }
1492
1493 if i >= bytes.len() || bytes[i] != b')' {
1494 return None;
1495 }
1496
1497 Some((url, i + 1))
1498}
1499
1500fn parse_autolink_literal(text: &str) -> Option<(String, String, usize)> {
1501 if text.starts_with("https://") || text.starts_with("http://") {
1502 let link = parse_url_like_token(text)?;
1503 return Some((link.to_string(), link.to_string(), link.len()));
1504 }
1505 if text.starts_with("www.") {
1506 let link = parse_url_like_token(text)?;
1507 return Some((format!("http://{link}"), link.to_string(), link.len()));
1508 }
1509 if let Some((email, consumed)) = parse_email_literal(text) {
1510 return Some((format!("mailto:{email}"), email, consumed));
1511 }
1512 None
1513}
1514
1515fn parse_url_like_token(text: &str) -> Option<&str> {
1516 let mut end = 0usize;
1517 for (idx, ch) in text.char_indices() {
1518 if ch.is_whitespace() || ch == '<' {
1519 break;
1520 }
1521 end = idx + ch.len_utf8();
1522 }
1523 if end == 0 {
1524 return None;
1525 }
1526
1527 let mut link_end = end;
1528 while link_end > 0 {
1529 let ch = text[..link_end].chars().next_back().unwrap_or('\0');
1530 if matches!(ch, '.' | ',' | ';' | ':' | '!' | '?') {
1531 link_end -= ch.len_utf8();
1532 } else {
1533 break;
1534 }
1535 }
1536 if link_end == 0 {
1537 return None;
1538 }
1539 Some(&text[..link_end])
1540}
1541
1542fn parse_email_literal(text: &str) -> Option<(String, usize)> {
1543 let mut end = 0usize;
1544 let mut at_pos: Option<usize> = None;
1545
1546 for (idx, ch) in text.char_indices() {
1547 if ch.is_whitespace() || ch == '<' {
1548 break;
1549 }
1550 if ch == '@' {
1551 at_pos = Some(idx);
1552 }
1553 end = idx + ch.len_utf8();
1554 }
1555
1556 if end == 0 {
1557 return None;
1558 }
1559 let mut candidate_end = end;
1560 while candidate_end > 0 {
1561 let ch = text[..candidate_end].chars().next_back().unwrap_or('\0');
1562 if matches!(ch, '.' | ',' | ';' | ':' | '!' | '?') {
1563 candidate_end -= ch.len_utf8();
1564 } else {
1565 break;
1566 }
1567 }
1568 if candidate_end == 0 {
1569 return None;
1570 }
1571
1572 let candidate = &text[..candidate_end];
1573 let at = at_pos?;
1574 if at == 0 || at >= candidate.len() - 1 {
1575 return None;
1576 }
1577
1578 let local = &candidate[..at];
1579 let domain = &candidate[at + 1..];
1580 if !is_email_local(local) || !is_email_domain(domain) {
1581 return None;
1582 }
1583 Some((candidate.to_string(), candidate_end))
1584}
1585
1586fn is_email_local(local: &str) -> bool {
1587 !local.is_empty()
1588 && local.chars().all(|ch| {
1589 ch.is_ascii_alphanumeric()
1590 || matches!(
1591 ch,
1592 '!' | '#'
1593 | '$'
1594 | '%'
1595 | '&'
1596 | '\''
1597 | '*'
1598 | '+'
1599 | '-'
1600 | '/'
1601 | '='
1602 | '?'
1603 | '^'
1604 | '_'
1605 | '`'
1606 | '{'
1607 | '|'
1608 | '}'
1609 | '~'
1610 | '.'
1611 )
1612 })
1613}
1614
1615fn is_email_domain(domain: &str) -> bool {
1616 if domain.is_empty() || !domain.contains('.') {
1617 return false;
1618 }
1619 for label in domain.split('.') {
1620 if label.is_empty() || label.starts_with('-') || label.ends_with('-') {
1621 return false;
1622 }
1623 if !label
1624 .chars()
1625 .all(|ch| ch.is_ascii_alphanumeric() || ch == '-')
1626 {
1627 return false;
1628 }
1629 }
1630 true
1631}
1632
1633fn parse_angle_autolink(text: &str) -> Option<(String, usize)> {
1634 if !text.starts_with('<') {
1635 return None;
1636 }
1637 let end = text.find('>')?;
1638 let inner = &text[1..end];
1639 if inner.starts_with("http://") || inner.starts_with("https://") {
1640 let esc = html_escape(inner);
1641 return Some((format!("<a href=\"{esc}\">{esc}</a>"), end + 1));
1642 }
1643 if inner.contains('@') && !inner.contains(' ') {
1644 let esc = html_escape(inner);
1645 return Some((format!("<a href=\"mailto:{esc}\">{esc}</a>"), end + 1));
1646 }
1647 None
1648}
1649
1650fn parse_inline_html(text: &str) -> Option<(&str, usize)> {
1651 if !text.starts_with('<') {
1652 return None;
1653 }
1654
1655 if text.starts_with("<!--") {
1656 let end = text.find("-->")?;
1657 return Some((&text[..end + 3], end + 3));
1658 }
1659 if text.starts_with("<?") {
1660 let end = text.find("?>")?;
1661 return Some((&text[..end + 2], end + 2));
1662 }
1663 if text.starts_with("<!") {
1664 let end = text.find('>')?;
1665 return Some((&text[..end + 1], end + 1));
1666 }
1667
1668 let bytes = text.as_bytes();
1669 if bytes.len() < 3 {
1670 return None;
1671 }
1672
1673 let mut i = 1usize;
1674 if bytes[i] == b'/' {
1675 i += 1;
1676 }
1677
1678 let mut saw_alpha = false;
1679 while i < bytes.len() {
1680 let ch = bytes[i] as char;
1681 if ch.is_ascii_alphanumeric() || ch == '-' {
1682 saw_alpha = true;
1683 i += 1;
1684 continue;
1685 }
1686 break;
1687 }
1688 if !saw_alpha {
1689 return None;
1690 }
1691
1692 while i < bytes.len() {
1693 if bytes[i] == b'>' {
1694 return Some((&text[..i + 1], i + 1));
1695 }
1696 if bytes[i] == b'\n' {
1697 return None;
1698 }
1699 i += 1;
1700 }
1701 None
1702}
1703
1704fn parse_code_span(text: &str) -> Option<(&str, usize)> {
1705 let ticks = text.chars().take_while(|c| *c == '`').count();
1706 if ticks == 0 {
1707 return None;
1708 }
1709 let marker = "`".repeat(ticks);
1710 let rest = &text[ticks..];
1711 let end = rest.find(&marker)?;
1712 Some((&rest[..end], ticks + end + ticks))
1713}
1714
1715fn parse_escaped_char(text: &str) -> Option<(char, usize)> {
1716 if !text.starts_with('\\') {
1717 return None;
1718 }
1719 let mut chars = text.chars();
1720 chars.next()?;
1721 let ch = chars.next()?;
1722 Some((ch, 1 + ch.len_utf8()))
1723}
1724
1725fn parse_bracketed_label(text: &str) -> Option<(&str, usize)> {
1726 if !text.starts_with('[') {
1727 return None;
1728 }
1729
1730 let bytes = text.as_bytes();
1731 let mut i = 1usize;
1732 let mut depth = 0usize;
1733
1734 while i < bytes.len() {
1735 match bytes[i] {
1736 b'\\' => {
1737 i += 1;
1738 if i < bytes.len() {
1739 i += 1;
1740 }
1741 }
1742 b'[' => {
1743 depth += 1;
1744 i += 1;
1745 }
1746 b']' => {
1747 if depth == 0 {
1748 return Some((&text[1..i], i + 1));
1749 }
1750 depth -= 1;
1751 i += 1;
1752 }
1753 _ => i += 1,
1754 }
1755 }
1756
1757 None
1758}
1759
1760fn wrapped<'a>(text: &'a str, marker: &str) -> Option<(&'a str, usize)> {
1761 if !text.starts_with(marker) {
1762 return None;
1763 }
1764 if text.len() <= marker.len() * 2 {
1765 return None;
1766 }
1767 let tail = &text[marker.len()..];
1768 let end = tail.find(marker)?;
1769 if end == 0 {
1770 return None;
1771 }
1772 Some((&tail[..end], marker.len() + end + marker.len()))
1773}
1774
1775fn normalize_key(text: &str) -> String {
1776 text.trim().to_ascii_lowercase()
1777}
1778
1779fn footnote_id(key: &str) -> String {
1780 let mut out = String::with_capacity(key.len());
1781 for ch in key.chars() {
1782 if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
1783 out.push(ch);
1784 } else {
1785 out.push('-');
1786 }
1787 }
1788 out
1789}
1790
1791fn push_escaped_char(out: &mut String, ch: char) {
1792 match ch {
1793 '&' => out.push_str("&"),
1794 '<' => out.push_str("<"),
1795 '>' => out.push_str(">"),
1796 '"' => out.push_str("""),
1797 '\'' => out.push_str("'"),
1798 _ => out.push(ch),
1799 }
1800}
1801
1802fn html_escape(text: &str) -> String {
1803 let mut out = String::with_capacity(text.len());
1804 for ch in text.chars() {
1805 push_escaped_char(&mut out, ch);
1806 }
1807 out
1808}
1809
1810fn html_attr_escape(text: &str) -> String {
1811 html_escape(text)
1812}
1813
1814#[cfg(test)]
1815mod tests {
1816 use super::{parse, parse_with_flavor, safe_parse, safe_parse_with_flavor, MarkdownFlavor};
1817
1818 #[test]
1819 fn renders_table_in_gfm() {
1820 let md = "| a | b |\n|---|---|\n| 1 | 2 |";
1821 let html = parse(md);
1822 assert!(html.contains("<table>"));
1823 assert!(html.contains("<thead>"));
1824 assert!(html.contains("<tbody>"));
1825 }
1826
1827 #[test]
1828 fn does_not_render_table_in_commonmark() {
1829 let md = "| a | b |\n|---|---|\n| 1 | 2 |";
1830 let html = parse_with_flavor(md, MarkdownFlavor::CommonMark);
1831 assert!(!html.contains("<table>"));
1832 }
1833
1834 #[test]
1835 fn renders_strikethrough_only_in_gfm() {
1836 let gfm = parse_with_flavor("~~done~~", MarkdownFlavor::Gfm);
1837 let cm = parse_with_flavor("~~done~~", MarkdownFlavor::CommonMark);
1838 assert!(gfm.contains("<del>done</del>"));
1839 assert!(!cm.contains("<del>done</del>"));
1840 }
1841
1842 #[test]
1843 fn renders_task_list_only_in_gfm() {
1844 let gfm = parse_with_flavor("- [x] finish", MarkdownFlavor::Gfm);
1845 let cm = parse_with_flavor("- [x] finish", MarkdownFlavor::CommonMark);
1846 assert!(gfm.contains("type=\"checkbox\""));
1847 assert!(!cm.contains("type=\"checkbox\""));
1848 }
1849
1850 #[test]
1851 fn renders_autolink_literal_only_in_gfm() {
1852 let gfm = parse_with_flavor("visit https://example.com now", MarkdownFlavor::Gfm);
1853 let cm = parse_with_flavor("visit https://example.com now", MarkdownFlavor::CommonMark);
1854 assert!(gfm.contains("<a href=\"https://example.com\">https://example.com</a>"));
1855 assert!(!cm.contains("<a href=\"https://example.com\">https://example.com</a>"));
1856 }
1857
1858 #[test]
1859 fn renders_footnotes_only_in_gfm() {
1860 let md = "note[^1]\n\n[^1]: footnote";
1861 let gfm = parse_with_flavor(md, MarkdownFlavor::Gfm);
1862 let cm = parse_with_flavor(md, MarkdownFlavor::CommonMark);
1863 assert!(gfm.contains("footnote-ref"));
1864 assert!(gfm.contains("footnotes"));
1865 assert!(!cm.contains("footnote-ref"));
1866 }
1867
1868 #[test]
1869 fn renders_reference_links() {
1870 let md = "[Rust]\n\n[Rust]: https://www.rust-lang.org/";
1871 let html = parse(md);
1872 assert!(html.contains("<a href=\"https://www.rust-lang.org/\">Rust</a>"));
1873 }
1874
1875 #[test]
1876 fn blocks_script_in_safe_parse() {
1877 let md = "<script>alert(1)</script>";
1878 assert!(safe_parse(md).is_err());
1879 }
1880
1881 #[test]
1882 fn safe_parse_flavor_works() {
1883 let html = safe_parse_with_flavor("~~x~~", MarkdownFlavor::CommonMark).unwrap();
1884 assert!(!html.contains("<del>x</del>"));
1885 }
1886
1887 #[test]
1888 fn renders_ordered_list_with_start() {
1889 let html = parse("3. three\n4. four");
1890 assert!(html.contains("<ol start=\"3\">"));
1891 assert!(html.contains("<li>three</li>"));
1892 }
1893
1894 #[test]
1895 fn renders_nested_list() {
1896 let html = parse("- parent\n - child\n- next");
1897 assert!(html.matches("<ul>").count() >= 2);
1898 assert!(html.contains("child"));
1899 }
1900
1901 #[test]
1902 fn parses_link_with_title_and_parentheses() {
1903 let html = parse("[x](https://example.com/a_(b) \"title\")");
1904 assert!(html.contains("href=\"https://example.com/a_(b)\""));
1905 }
1906
1907 #[test]
1908 fn renders_gfm_literal_www_and_email_autolinks() {
1909 let html = parse_with_flavor(
1910 "visit www.example.com or me@example.com",
1911 MarkdownFlavor::Gfm,
1912 );
1913 assert!(html.contains("href=\"http://www.example.com\""));
1914 assert!(html.contains("href=\"mailto:me@example.com\""));
1915 }
1916
1917 #[test]
1918 fn renders_hard_line_breaks() {
1919 let html_spaces = parse("a \nb");
1920 let html_backslash = parse("a\\\nb");
1921 assert!(html_spaces.contains("a<br />\nb"));
1922 assert!(html_backslash.contains("a<br />\nb"));
1923 }
1924
1925 #[test]
1926 fn parse_preserves_inline_html_in_gfm_and_commonmark() {
1927 let cm = parse_with_flavor("x <span>y</span>", MarkdownFlavor::CommonMark);
1928 let gfm = parse_with_flavor("x <span>y</span>", MarkdownFlavor::Gfm);
1929 assert!(cm.contains("<span>y</span>"));
1930 assert!(gfm.contains("<span>y</span>"));
1931 }
1932
1933 #[test]
1934 fn parse_preserves_html_block_in_gfm_and_commonmark() {
1935 let cm = parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::CommonMark);
1936 let gfm = parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::Gfm);
1937 assert!(cm.contains("<div>"));
1938 assert!(cm.contains("</div>"));
1939 assert!(gfm.contains("<div>"));
1940 assert!(gfm.contains("</div>"));
1941 }
1942
1943 #[test]
1944 fn safe_parse_rejects_inline_html() {
1945 let cm = safe_parse_with_flavor("x <span>y</span>", MarkdownFlavor::CommonMark);
1946 let gfm = safe_parse_with_flavor("x <span>y</span>", MarkdownFlavor::Gfm);
1947 assert!(cm.is_err());
1948 assert!(gfm.is_err());
1949 }
1950
1951 #[test]
1952 fn safe_parse_rejects_html_block() {
1953 let cm = safe_parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::CommonMark);
1954 let gfm = safe_parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::Gfm);
1955 assert!(cm.is_err());
1956 assert!(gfm.is_err());
1957 }
1958
1959 #[test]
1960 fn supports_setext_heading_and_blockquote() {
1961 let html = parse("Title\n---\n\n> quote");
1962 assert!(html.contains("<h2>Title</h2>"));
1963 assert!(html.contains("<blockquote>"));
1964 }
1965
1966 #[test]
1967 fn supports_table_alignment_in_gfm() {
1968 let md = "| a | b | c |\n| :-- | :-: | --: |\n| 1 | 2 | 3 |";
1969 let html = parse(md);
1970 assert!(html.contains("<th align=\"left\">a</th>"));
1971 assert!(html.contains("<th align=\"center\">b</th>"));
1972 assert!(html.contains("<th align=\"right\">c</th>"));
1973 }
1974
1975 #[test]
1976 fn renders_mermaid_chart_in_gfm() {
1977 let md = "```mermaid\nflowchart TD\nA-->B\n```";
1978 let html = parse_with_flavor(md, MarkdownFlavor::Gfm);
1979 assert!(html.contains("<pre class=\"mermaid\">flowchart TD\nA-->B</pre>"));
1980 }
1981
1982 #[test]
1983 fn keeps_mermaid_as_code_in_commonmark() {
1984 let md = "```mermaid\nflowchart TD\nA-->B\n```";
1985 let html = parse_with_flavor(md, MarkdownFlavor::CommonMark);
1986 assert!(html
1987 .contains("<pre><code class=\"language-mermaid\">flowchart TD\nA-->B</code></pre>"));
1988 }
1989
1990 #[test]
1991 fn appends_mermaid_runtime_for_gfm_file_output() {
1992 let html = super::with_chart_runtime_if_needed(
1993 "<pre class=\"mermaid\">graph TD\nA-->B</pre>\n".to_string(),
1994 MarkdownFlavor::Gfm,
1995 );
1996 assert!(html.contains("mermaid.min.js"));
1997 assert!(html.contains("mermaid.initialize({ startOnLoad: true })"));
1998 }
1999
2000 #[test]
2001 fn does_not_append_mermaid_runtime_for_commonmark() {
2002 let html = super::with_chart_runtime_if_needed(
2003 "<pre><code class=\"language-mermaid\">graph TD\nA-->B</code></pre>\n".to_string(),
2004 MarkdownFlavor::CommonMark,
2005 );
2006 assert!(!html.contains("mermaid.min.js"));
2007 }
2008
2009 #[test]
2010 fn safe_parse_blocks_script_variants() {
2011 assert!(safe_parse("<script>alert(1)</script>").is_err());
2012 assert!(safe_parse("<ScRiPt src=x></ScRiPt>").is_err());
2013 assert!(safe_parse("< / script >").is_err());
2014 assert!(safe_parse("< script>").is_err());
2015 }
2016
2017 #[test]
2018 fn renders_link_wrapped_image_badge() {
2019 let md = "[](https://t.me/+Ka9i6CNwe71hMWQy)";
2020 let html = parse(md);
2021 assert!(html.contains(
2022 "<a href=\"https://t.me/+Ka9i6CNwe71hMWQy\"><img src=\"https://img.shields.io/badge/Telegram-2CA5E0?logo=telegram&logoColor=white\" alt=\"Telegram\" /></a>"
2023 ));
2024 }
2025
2026 #[test]
2027 fn renders_discord_and_telegram_badges_together() {
2028 let md = "⠀[](https://t.me/+Ka9i6CNwe71hMWQy)";
2029 let html = parse(md);
2030 assert!(html.contains("<img src=\"https://discord.gg/2xrMh7qX6m\" alt=\"Discord\" />"));
2031 assert!(html.contains(
2032 "<a href=\"https://t.me/+Ka9i6CNwe71hMWQy\"><img src=\"https://img.shields.io/badge/Telegram-2CA5E0?logo=telegram&logoColor=white\" alt=\"Telegram\" /></a>"
2033 ));
2034 }
2035}