1use std::collections::{HashMap, HashSet};
2use std::error::Error;
3use std::fmt;
4use std::fs;
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq)]
7pub enum MarkdownFlavor {
8 CommonMark,
9 Gfm,
10}
11
12#[derive(Debug)]
13struct MarkdownSecurityError;
14
15impl fmt::Display for MarkdownSecurityError {
16 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
17 write!(f, "raw html tag is not allowed in safe_parse")
18 }
19}
20
21impl Error for MarkdownSecurityError {}
22
23const RAW_HTML_OMITTED_MARKER: &str = "<!-- raw HTML omitted -->";
24const MERMAID_BOOTSTRAP: &str = "<script src=\"https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.min.js\"></script>\n<script>if (typeof mermaid !== \"undefined\") { mermaid.initialize({ startOnLoad: true }); }</script>\n";
25
26#[derive(Debug, Clone, Copy)]
27struct ParserConfig {
28 omit_raw_html: bool,
29 enable_tables: bool,
30 enable_task_list: bool,
31 enable_strikethrough: bool,
32 enable_autolink_literals: bool,
33 enable_footnotes: bool,
34 enable_charts: bool,
35}
36
37impl ParserConfig {
38 fn from_flavor(flavor: MarkdownFlavor) -> Self {
39 match flavor {
40 MarkdownFlavor::CommonMark => Self {
41 omit_raw_html: false,
42 enable_tables: false,
43 enable_task_list: false,
44 enable_strikethrough: false,
45 enable_autolink_literals: false,
46 enable_footnotes: false,
47 enable_charts: false,
48 },
49 MarkdownFlavor::Gfm => Self {
50 omit_raw_html: false,
51 enable_tables: true,
52 enable_task_list: true,
53 enable_strikethrough: true,
54 enable_autolink_literals: true,
55 enable_footnotes: true,
56 enable_charts: true,
57 },
58 }
59 }
60
61 fn with_raw_html_omitted(mut self) -> Self {
62 self.omit_raw_html = true;
63 self
64 }
65}
66
67#[derive(Default, Clone)]
68struct DefinitionStore {
69 links: HashMap<String, String>,
70 footnotes: HashMap<String, String>,
71 skip_lines: HashSet<usize>,
72}
73
74struct Parser<'a> {
75 lines: Vec<&'a str>,
76 defs: DefinitionStore,
77 footnote_order: Vec<String>,
78 config: ParserConfig,
79}
80
81pub fn parse(input: &str) -> String {
83 parse_with_flavor(input, MarkdownFlavor::Gfm)
84}
85
86pub fn parse_with_flavor(input: &str, flavor: MarkdownFlavor) -> String {
88 parse_internal(input, ParserConfig::from_flavor(flavor))
89}
90
91pub fn safe_parse(input: &str) -> Result<String, Box<dyn Error>> {
92 safe_parse_with_flavor(input, MarkdownFlavor::Gfm)
93}
94
95pub fn safe_parse_with_flavor(
96 input: &str,
97 flavor: MarkdownFlavor,
98) -> Result<String, Box<dyn Error>> {
99 reject_script_tag(input)?;
100 let rendered = parse_internal(
101 input,
102 ParserConfig::from_flavor(flavor).with_raw_html_omitted(),
103 );
104 if rendered.contains(RAW_HTML_OMITTED_MARKER) {
105 return Err(Box::new(MarkdownSecurityError));
106 }
107 Ok(rendered)
108}
109
110pub fn parse_from_file(path: &str, output_path: &str) -> Result<(), Box<dyn Error>> {
111 parse_from_file_with_flavor(path, output_path, MarkdownFlavor::Gfm)
112}
113
114pub fn parse_from_file_with_flavor(
115 path: &str,
116 output_path: &str,
117 flavor: MarkdownFlavor,
118) -> Result<(), Box<dyn Error>> {
119 let content = fs::read_to_string(path)?;
120 let rendered = parse_with_flavor(&content, flavor);
121 let rendered = with_chart_runtime_if_needed(rendered, flavor);
122 fs::write(output_path, rendered)?;
123 Ok(())
124}
125
126pub fn safe_parse_from_file(path: &str, output_path: &str) -> Result<(), Box<dyn Error>> {
127 safe_parse_from_file_with_flavor(path, output_path, MarkdownFlavor::Gfm)
128}
129
130pub fn safe_parse_from_file_with_flavor(
131 path: &str,
132 output_path: &str,
133 flavor: MarkdownFlavor,
134) -> Result<(), Box<dyn Error>> {
135 let content = fs::read_to_string(path)?;
136 let rendered = safe_parse_with_flavor(&content, flavor)?;
137 fs::write(output_path, rendered)?;
138 Ok(())
139}
140
141fn parse_internal(input: &str, config: ParserConfig) -> String {
142 let normalized = normalize_newlines(input);
143 let lines: Vec<&str> = normalized.lines().collect();
144 let defs = collect_definitions(&lines, config);
145 let mut parser = Parser {
146 lines,
147 defs,
148 footnote_order: Vec::new(),
149 config,
150 };
151 parser.parse_blocks()
152}
153
154fn with_chart_runtime_if_needed(mut rendered: String, flavor: MarkdownFlavor) -> String {
155 if flavor == MarkdownFlavor::Gfm
156 && rendered.contains("<pre class=\"mermaid\">")
157 && !rendered.contains("mermaid.initialize(")
158 {
159 rendered.push('\n');
160 rendered.push_str(MERMAID_BOOTSTRAP);
161 }
162 rendered
163}
164
165fn reject_script_tag(input: &str) -> Result<(), Box<dyn Error>> {
166 if contains_script_tag(input) {
167 return Err(Box::new(MarkdownSecurityError));
168 }
169 Ok(())
170}
171
172fn contains_script_tag(input: &str) -> bool {
173 let lowered = input.to_ascii_lowercase();
174 let bytes = lowered.as_bytes();
175 let mut i = 0usize;
176
177 while i < bytes.len() {
178 if bytes[i] != b'<' {
179 i += 1;
180 continue;
181 }
182 let mut j = i + 1;
183 while j < bytes.len() && bytes[j].is_ascii_whitespace() {
184 j += 1;
185 }
186 if j < bytes.len() && bytes[j] == b'/' {
187 j += 1;
188 while j < bytes.len() && bytes[j].is_ascii_whitespace() {
189 j += 1;
190 }
191 }
192 if j + 6 > bytes.len() {
193 i += 1;
194 continue;
195 }
196 if &lowered[j..j + 6] == "script" {
197 let next = bytes.get(j + 6).copied().unwrap_or(b'>');
198 if next.is_ascii_whitespace() || next == b'>' || next == b'/' {
199 return true;
200 }
201 }
202 i += 1;
203 }
204 false
205}
206
207impl<'a> Parser<'a> {
208 fn parse_blocks(&mut self) -> String {
209 let mut pos = 0usize;
210 let mut out = String::new();
211
212 while pos < self.lines.len() {
213 if self.is_skipped(pos) || self.lines[pos].trim().is_empty() {
214 pos += 1;
215 continue;
216 }
217
218 if let Some((level, text, next)) = parse_setext_heading(&self.lines, pos) {
219 let heading_text = text.trim().to_string();
220 out.push_str(&format!(
221 "<h{level}>{}</h{level}>\n",
222 self.parse_inlines(&heading_text)
223 ));
224 pos = next;
225 continue;
226 }
227
228 if is_thematic_break(self.lines[pos]) {
229 out.push_str("<hr />\n");
230 pos += 1;
231 continue;
232 }
233
234 if let Some((level, text)) = parse_atx_heading(self.lines[pos]) {
235 out.push_str(&format!(
236 "<h{level}>{}</h{level}>\n",
237 self.parse_inlines(text.trim())
238 ));
239 pos += 1;
240 continue;
241 }
242
243 if is_fence_start(self.lines[pos]) {
244 let (html, next) = self.parse_fenced_code(pos);
245 out.push_str(&html);
246 pos = next;
247 continue;
248 }
249
250 if is_indented_code_line(self.lines[pos]) {
251 let (html, next) = self.parse_indented_code(pos);
252 out.push_str(&html);
253 pos = next;
254 continue;
255 }
256
257 if is_blockquote_line(self.lines[pos]) {
258 let (html, next) = self.parse_blockquote(pos);
259 out.push_str(&html);
260 pos = next;
261 continue;
262 }
263
264 if is_html_line(self.lines[pos]) {
265 let (html, next) = self.parse_html_block(pos);
266 out.push_str(&html);
267 pos = next;
268 continue;
269 }
270
271 if self.config.enable_tables && is_table_header(&self.lines, pos) {
272 let (html, next) = self.parse_table(pos);
273 out.push_str(&html);
274 pos = next;
275 continue;
276 }
277
278 if parse_list_prefix(self.lines[pos]).is_some() {
279 let (html, next) = self.parse_list(pos);
280 out.push_str(&html);
281 pos = next;
282 continue;
283 }
284
285 let (html, next) = self.parse_paragraph(pos);
286 out.push_str(&html);
287 pos = next;
288 }
289
290 if self.config.enable_footnotes && !self.footnote_order.is_empty() {
291 out.push_str(&self.render_footnotes());
292 }
293
294 out
295 }
296
297 fn parse_subdocument(&mut self, markdown: &str) -> String {
298 let normalized = normalize_newlines(markdown);
299 let lines: Vec<&str> = normalized.lines().collect();
300 let mut nested = Parser {
301 lines,
302 defs: self.defs.clone(),
303 footnote_order: Vec::new(),
304 config: self.config,
305 };
306 let html = nested.parse_blocks();
307 for id in nested.footnote_order {
308 self.note_footnote(id);
309 }
310 html
311 }
312
313 fn parse_fenced_code(&self, start: usize) -> (String, usize) {
314 let first = self.lines[start].trim_start();
315 let fence_char = first.chars().next().unwrap_or('`');
316 let fence_len = first.chars().take_while(|c| *c == fence_char).count();
317 let info = first[fence_len..].trim();
318 let mut pos = start + 1;
319 let mut code_lines = Vec::new();
320
321 while pos < self.lines.len() {
322 let line = self.lines[pos].trim_start();
323 if is_fence_closing_line(line, fence_char, fence_len) {
324 pos += 1;
325 break;
326 }
327 code_lines.push(self.lines[pos]);
328 pos += 1;
329 }
330
331 let code_raw = code_lines.join("\n");
332 let code = html_escape(&code_raw);
333 let lang = info.split_whitespace().next().unwrap_or("");
334 let is_mermaid = self.config.enable_charts && lang.eq_ignore_ascii_case("mermaid");
335
336 let html = if is_mermaid {
337 format!("<pre class=\"mermaid\">{}</pre>\n", code)
338 } else if info.is_empty() {
339 format!("<pre><code>{}</code></pre>\n", code)
340 } else {
341 format!(
342 "<pre><code class=\"language-{}\">{}</code></pre>\n",
343 html_attr_escape(lang),
344 code
345 )
346 };
347 (html, pos)
348 }
349
350 fn parse_indented_code(&self, start: usize) -> (String, usize) {
351 let mut pos = start;
352 let mut code_lines = Vec::new();
353
354 while pos < self.lines.len() {
355 let line = self.lines[pos];
356 if line.trim().is_empty() {
357 code_lines.push("");
358 pos += 1;
359 continue;
360 }
361
362 if let Some(stripped) = strip_indented_code_prefix(line) {
363 code_lines.push(stripped);
364 pos += 1;
365 } else {
366 break;
367 }
368 }
369
370 let code = html_escape(&code_lines.join("\n"));
371 (format!("<pre><code>{}</code></pre>\n", code), pos)
372 }
373
374 fn parse_blockquote(&mut self, start: usize) -> (String, usize) {
375 let mut pos = start;
376 let mut parts = Vec::new();
377
378 while pos < self.lines.len() {
379 let line = self.lines[pos];
380 if line.trim().is_empty() {
381 parts.push(String::new());
382 pos += 1;
383 continue;
384 }
385 if !is_blockquote_line(line) {
386 break;
387 }
388 parts.push(strip_blockquote_prefix(line).to_string());
389 pos += 1;
390 }
391
392 let body = parts.join("\n");
393 let inner = self.parse_subdocument(&body);
394 (format!("<blockquote>\n{}</blockquote>\n", inner), pos)
395 }
396
397 fn parse_html_block(&self, start: usize) -> (String, usize) {
398 if !self.config.omit_raw_html {
399 let mut pos = start;
400 while pos < self.lines.len() {
401 if self.lines[pos].trim().is_empty() {
402 break;
403 }
404 pos += 1;
405 }
406 let raw = self.lines[start..pos].join("\n");
407 return (format!("{raw}\n"), pos);
408 }
409
410 let mut pos = start;
411 while pos < self.lines.len() {
412 if self.lines[pos].trim().is_empty() {
413 break;
414 }
415 pos += 1;
416 }
417 (format!("{RAW_HTML_OMITTED_MARKER}\n"), pos)
418 }
419
420 fn parse_table(&mut self, start: usize) -> (String, usize) {
421 let headers = split_table_row(self.lines[start]);
422 let aligns = parse_table_alignments(self.lines[start + 1]);
423 let mut pos = start + 2;
424 let mut rows: Vec<Vec<String>> = Vec::new();
425
426 while pos < self.lines.len() {
427 if self.is_skipped(pos) || self.lines[pos].trim().is_empty() {
428 break;
429 }
430 if !self.lines[pos].contains('|') {
431 break;
432 }
433 rows.push(split_table_row(self.lines[pos]));
434 pos += 1;
435 }
436
437 let mut out = String::new();
438 out.push_str("<table>\n<thead>\n<tr>");
439 for (idx, cell) in headers.into_iter().enumerate() {
440 push_table_cell_open(&mut out, "th", aligns.get(idx).copied().flatten());
441 out.push_str(&self.parse_inlines(cell.trim()));
442 out.push_str("</th>");
443 }
444 out.push_str("</tr>\n</thead>\n<tbody>\n");
445
446 for row in rows {
447 out.push_str("<tr>");
448 for (idx, cell) in row.into_iter().enumerate() {
449 push_table_cell_open(&mut out, "td", aligns.get(idx).copied().flatten());
450 out.push_str(&self.parse_inlines(cell.trim()));
451 out.push_str("</td>");
452 }
453 out.push_str("</tr>\n");
454 }
455
456 out.push_str("</tbody>\n</table>\n");
457 (out, pos)
458 }
459
460 fn parse_list(&mut self, start: usize) -> (String, usize) {
461 let (first_kind, _, base_indent) = parse_list_prefix_with_indent(self.lines[start])
462 .unwrap_or((ListKind::Unordered, "", 0));
463 let mut pos = start;
464 let mut out = String::new();
465
466 match first_kind {
467 ListKind::Unordered => out.push_str("<ul>\n"),
468 ListKind::Ordered(start_num) => {
469 if start_num != 1 {
470 out.push_str(&format!("<ol start=\"{start_num}\">\n"));
471 } else {
472 out.push_str("<ol>\n");
473 }
474 }
475 }
476
477 while pos < self.lines.len() {
478 if self.is_skipped(pos) {
479 break;
480 }
481
482 let Some((kind, item_line, indent)) = parse_list_prefix_with_indent(self.lines[pos])
483 else {
484 break;
485 };
486 if indent != base_indent || !same_kind_value(kind, first_kind) {
487 break;
488 }
489
490 let mut item_parts = vec![item_line.to_string()];
491 pos += 1;
492 let mut loose = false;
493
494 while pos < self.lines.len() {
495 if self.is_skipped(pos) {
496 break;
497 }
498
499 let line = self.lines[pos];
500 if line.trim().is_empty() {
501 loose = true;
502 item_parts.push(String::new());
503 pos += 1;
504 continue;
505 }
506
507 if let Some((next_kind, _, next_indent)) = parse_list_prefix_with_indent(line) {
508 if next_indent == base_indent && same_kind_value(next_kind, first_kind) {
509 break;
510 }
511 if next_indent <= base_indent && !same_kind_value(next_kind, first_kind) {
512 break;
513 }
514 }
515
516 if leading_indent(line) <= base_indent
517 && is_block_start(&self.lines, pos, self.config)
518 {
519 break;
520 }
521
522 item_parts.push(dedent_list_continuation(line, base_indent).to_string());
523 pos += 1;
524 }
525
526 out.push_str("<li>");
527
528 let mut checkbox: Option<bool> = None;
529 if self.config.enable_task_list && matches!(first_kind, ListKind::Unordered) {
530 if let Some((checked, rest)) = parse_task_item(&item_parts[0]) {
531 checkbox = Some(checked);
532 item_parts[0] = rest.to_string();
533 }
534 }
535
536 if let Some(checked) = checkbox {
537 if checked {
538 out.push_str("<input type=\"checkbox\" checked=\"\" disabled=\"\" /> ");
539 } else {
540 out.push_str("<input type=\"checkbox\" disabled=\"\" /> ");
541 }
542 }
543
544 let item_markdown = item_parts.join("\n");
545 let rendered = self.parse_subdocument(&item_markdown);
546 if !loose {
547 if let Some(stripped) = strip_single_paragraph_wrapper(&rendered) {
548 out.push_str(stripped);
549 } else {
550 out.push_str(&rendered);
551 }
552 } else {
553 out.push_str(&rendered);
554 }
555 out.push_str("</li>\n");
556 }
557
558 match first_kind {
559 ListKind::Unordered => out.push_str("</ul>\n"),
560 ListKind::Ordered(_) => out.push_str("</ol>\n"),
561 }
562
563 (out, pos)
564 }
565
566 fn parse_paragraph(&mut self, start: usize) -> (String, usize) {
567 let mut pos = start;
568 let mut parts = Vec::new();
569
570 while pos < self.lines.len() {
571 if self.is_skipped(pos) || self.lines[pos].trim().is_empty() {
572 break;
573 }
574 if pos != start && is_block_start(&self.lines, pos, self.config) {
575 break;
576 }
577 parts.push(self.lines[pos]);
578 pos += 1;
579 }
580
581 let text = parts.join("\n");
582 (format!("<p>{}</p>\n", self.parse_inlines(&text)), pos)
583 }
584
585 fn parse_inlines(&mut self, text: &str) -> String {
586 let mut out = String::new();
587 let mut i = 0usize;
588
589 while i < text.len() {
590 let rest = &text[i..];
591
592 if rest.starts_with("\\\n") {
593 out.push_str("<br />\n");
594 i += 2;
595 continue;
596 }
597
598 if rest.starts_with('\n') {
599 match detect_hard_break(text, i) {
600 HardBreak::Spaces => {
601 trim_trailing_spaces(&mut out);
602 out.push_str("<br />\n");
603 }
604 HardBreak::Backslash => {
605 if out.ends_with('\\') {
606 out.pop();
607 }
608 out.push_str("<br />\n");
609 }
610 HardBreak::None => out.push('\n'),
611 }
612 i += 1;
613 continue;
614 }
615
616 if let Some((ch, consumed)) = parse_escaped_char(rest) {
617 push_escaped_char(&mut out, ch);
618 i += consumed;
619 continue;
620 }
621
622 if rest.starts_with('`') {
623 if let Some((content, consumed)) = parse_code_span(rest) {
624 out.push_str("<code>");
625 out.push_str(&html_escape(content));
626 out.push_str("</code>");
627 i += consumed;
628 continue;
629 }
630 }
631
632 if self.config.enable_footnotes && rest.starts_with("[^") {
633 if let Some(end) = rest.find(']') {
634 let raw_id = &rest[2..end];
635 let key = normalize_key(raw_id);
636 if self.defs.footnotes.contains_key(&key) {
637 let index = self.note_footnote(key.clone());
638 let safe = footnote_id(&key);
639 out.push_str(&format!(
640 "<sup class=\"footnote-ref\"><a href=\"#fn-{safe}\" id=\"fnref-{safe}\">{index}</a></sup>"
641 ));
642 i += end + 1;
643 continue;
644 }
645 }
646 }
647
648 if rest.starts_with("![") {
649 if let Some((html, consumed)) = self.parse_image(rest) {
650 out.push_str(&html);
651 i += consumed;
652 continue;
653 }
654 }
655
656 if rest.starts_with('[') {
657 if let Some((html, consumed)) = self.parse_link_like(rest) {
658 out.push_str(&html);
659 i += consumed;
660 continue;
661 }
662 }
663
664 if let Some((html, consumed)) = parse_angle_autolink(rest) {
665 out.push_str(&html);
666 i += consumed;
667 continue;
668 }
669
670 if let Some((raw, consumed)) = parse_inline_html(rest) {
671 if !self.config.omit_raw_html {
672 out.push_str(raw);
673 } else {
674 out.push_str(RAW_HTML_OMITTED_MARKER);
675 }
676 i += consumed;
677 continue;
678 }
679
680 if self.config.enable_autolink_literals {
681 if let Some((href, text_value, consumed)) = parse_autolink_literal(rest) {
682 let href_escaped = html_escape(&href);
683 let text_escaped = html_escape(&text_value);
684 out.push_str(&format!("<a href=\"{href_escaped}\">{text_escaped}</a>"));
685 i += consumed;
686 continue;
687 }
688 }
689
690 if let Some((content, consumed)) = wrapped(rest, "**") {
691 out.push_str("<strong>");
692 out.push_str(&self.parse_inlines(content));
693 out.push_str("</strong>");
694 i += consumed;
695 continue;
696 }
697
698 if let Some((content, consumed)) = wrapped(rest, "__") {
699 out.push_str("<strong>");
700 out.push_str(&self.parse_inlines(content));
701 out.push_str("</strong>");
702 i += consumed;
703 continue;
704 }
705
706 if self.config.enable_strikethrough {
707 if let Some((content, consumed)) = wrapped(rest, "~~") {
708 out.push_str("<del>");
709 out.push_str(&self.parse_inlines(content));
710 out.push_str("</del>");
711 i += consumed;
712 continue;
713 }
714 }
715
716 if let Some((content, consumed)) = wrapped(rest, "*") {
717 out.push_str("<em>");
718 out.push_str(&self.parse_inlines(content));
719 out.push_str("</em>");
720 i += consumed;
721 continue;
722 }
723
724 if let Some((content, consumed)) = wrapped(rest, "_") {
725 out.push_str("<em>");
726 out.push_str(&self.parse_inlines(content));
727 out.push_str("</em>");
728 i += consumed;
729 continue;
730 }
731
732 if let Some(ch) = rest.chars().next() {
733 push_escaped_char(&mut out, ch);
734 i += ch.len_utf8();
735 } else {
736 break;
737 }
738 }
739
740 out
741 }
742
743 fn parse_image(&mut self, rest: &str) -> Option<(String, usize)> {
744 let (alt, consumed_label) = parse_bracketed_label(&rest[1..])?;
745 let after = &rest[1 + consumed_label..];
746
747 let (url, consumed_after) = parse_inline_link_target(after)?;
748 let html = format!(
749 "<img src=\"{}\" alt=\"{}\" />",
750 html_attr_escape(&url),
751 html_attr_escape(alt)
752 );
753 Some((html, 1 + consumed_label + consumed_after))
754 }
755
756 fn parse_link_like(&mut self, rest: &str) -> Option<(String, usize)> {
757 let (label, consumed_label) = parse_bracketed_label(rest)?;
758 let after = &rest[consumed_label..];
759
760 if let Some((url, consumed_after)) = parse_inline_link_target(after) {
761 let html = format!(
762 "<a href=\"{}\">{}</a>",
763 html_attr_escape(&url),
764 self.parse_inlines(label)
765 );
766 return Some((html, consumed_label + consumed_after));
767 }
768
769 if after.starts_with('[') {
770 let (raw_ref, consumed_ref) = parse_bracketed_label(after)?;
771 let key = if raw_ref.trim().is_empty() {
772 normalize_key(label)
773 } else {
774 normalize_key(raw_ref)
775 };
776 if let Some(url) = self.defs.links.get(&key) {
777 let html = format!(
778 "<a href=\"{}\">{}</a>",
779 html_attr_escape(url),
780 self.parse_inlines(label)
781 );
782 return Some((html, consumed_label + consumed_ref));
783 }
784 }
785
786 let key = normalize_key(label);
787 if let Some(url) = self.defs.links.get(&key) {
788 let html = format!(
789 "<a href=\"{}\">{}</a>",
790 html_attr_escape(url),
791 self.parse_inlines(label)
792 );
793 return Some((html, consumed_label));
794 }
795
796 None
797 }
798
799 fn note_footnote(&mut self, id: String) -> usize {
800 if let Some(idx) = self.footnote_order.iter().position(|x| x == &id) {
801 idx + 1
802 } else {
803 self.footnote_order.push(id);
804 self.footnote_order.len()
805 }
806 }
807
808 fn render_footnotes(&mut self) -> String {
809 let mut out = String::new();
810 out.push_str("<section class=\"footnotes\">\n<ol>\n");
811
812 let footnote_ids = self.footnote_order.clone();
813 for id in footnote_ids {
814 let safe = footnote_id(&id);
815 let text = self.defs.footnotes.get(&id).cloned().unwrap_or_default();
816 out.push_str(&format!(
817 "<li id=\"fn-{safe}\">{} <a href=\"#fnref-{safe}\" class=\"footnote-backref\">↩</a></li>\n",
818 self.parse_inlines(text.trim())
819 ));
820 }
821
822 out.push_str("</ol>\n</section>\n");
823 out
824 }
825
826 fn is_skipped(&self, line: usize) -> bool {
827 self.defs.skip_lines.contains(&line)
828 }
829}
830
831#[derive(Debug, Clone, Copy, PartialEq, Eq)]
832enum ListKind {
833 Unordered,
834 Ordered(usize),
835}
836
837fn normalize_newlines(input: &str) -> String {
838 input.replace("\r\n", "\n").replace('\r', "\n")
839}
840
841fn collect_definitions(lines: &[&str], config: ParserConfig) -> DefinitionStore {
842 let mut defs = DefinitionStore::default();
843 let mut i = 0usize;
844
845 while i < lines.len() {
846 let line = lines[i].trim();
847
848 if let Some((id, url)) = parse_link_definition(line) {
849 defs.links.insert(normalize_key(id), url.to_string());
850 defs.skip_lines.insert(i);
851 i += 1;
852 continue;
853 }
854
855 if config.enable_footnotes {
856 if let Some((id, first_text)) = parse_footnote_definition(line) {
857 let mut text_parts = vec![first_text.to_string()];
858 defs.skip_lines.insert(i);
859 i += 1;
860
861 while i < lines.len() {
862 let next = lines[i];
863 if next.starts_with(" ") || next.starts_with('\t') {
864 text_parts.push(next.trim().to_string());
865 defs.skip_lines.insert(i);
866 i += 1;
867 } else {
868 break;
869 }
870 }
871
872 defs.footnotes
873 .insert(normalize_key(id), text_parts.join(" "));
874 continue;
875 }
876 }
877
878 i += 1;
879 }
880
881 defs
882}
883
884fn parse_atx_heading(line: &str) -> Option<(usize, &str)> {
885 let trimmed = line.trim_start();
886 let mut count = 0usize;
887 for ch in trimmed.chars() {
888 if ch == '#' {
889 count += 1;
890 } else {
891 break;
892 }
893 }
894 if count == 0 || count > 6 {
895 return None;
896 }
897 let rest = trimmed[count..].trim_start();
898 if rest.is_empty() {
899 return None;
900 }
901 Some((count, rest.trim_end_matches('#').trim_end()))
902}
903
904fn parse_setext_heading<'a>(lines: &'a [&str], pos: usize) -> Option<(usize, &'a str, usize)> {
905 if pos + 1 >= lines.len() {
906 return None;
907 }
908 if lines[pos].trim().is_empty() {
909 return None;
910 }
911 if !can_be_setext_content_line(lines[pos]) {
912 return None;
913 }
914
915 let underline = lines[pos + 1].trim();
916 if is_setext_underline(underline, '=') {
917 return Some((1, lines[pos], pos + 2));
918 }
919 if is_setext_underline(underline, '-') {
920 return Some((2, lines[pos], pos + 2));
921 }
922 None
923}
924
925fn can_be_setext_content_line(line: &str) -> bool {
926 !line.trim().is_empty()
927 && !is_thematic_break(line)
928 && parse_atx_heading(line).is_none()
929 && !is_fence_start(line)
930 && !is_indented_code_line(line)
931 && !is_blockquote_line(line)
932 && !is_html_line(line)
933 && parse_list_prefix(line).is_none()
934}
935
936fn is_setext_underline(line: &str, marker: char) -> bool {
937 let trimmed = line.trim();
938 !trimmed.is_empty() && trimmed.chars().all(|ch| ch == marker) && trimmed.len() >= 3
939}
940
941fn is_thematic_break(line: &str) -> bool {
942 let trimmed = line.trim();
943 if trimmed.len() < 3 {
944 return false;
945 }
946 let candidate: String = trimmed.chars().filter(|c| !c.is_whitespace()).collect();
947 if candidate.len() < 3 {
948 return false;
949 }
950 candidate.chars().all(|ch| ch == '-')
951 || candidate.chars().all(|ch| ch == '*')
952 || candidate.chars().all(|ch| ch == '_')
953}
954
955fn is_fence_start(line: &str) -> bool {
956 let trimmed = line.trim_start();
957 trimmed.starts_with("```") || trimmed.starts_with("~~~")
958}
959
960fn is_indented_code_line(line: &str) -> bool {
961 strip_indented_code_prefix(line).is_some()
962}
963
964fn strip_indented_code_prefix(line: &str) -> Option<&str> {
965 if let Some(stripped) = line.strip_prefix(" ") {
966 return Some(stripped);
967 }
968 line.strip_prefix('\t')
969}
970
971fn is_blockquote_line(line: &str) -> bool {
972 line.trim_start().starts_with('>')
973}
974
975fn strip_blockquote_prefix(line: &str) -> &str {
976 let trimmed = line.trim_start();
977 let tail = trimmed.strip_prefix('>').unwrap_or(trimmed);
978 tail.strip_prefix(' ').unwrap_or(tail)
979}
980
981fn is_html_line(line: &str) -> bool {
982 line.trim_start().starts_with('<')
983}
984
985fn is_table_header(lines: &[&str], pos: usize) -> bool {
986 if pos + 1 >= lines.len() {
987 return false;
988 }
989 if !lines[pos].contains('|') {
990 return false;
991 }
992 is_table_separator(lines[pos + 1])
993}
994
995fn is_table_separator(line: &str) -> bool {
996 let trimmed = line.trim();
997 if !trimmed.contains('-') {
998 return false;
999 }
1000 let cells = split_table_row(trimmed);
1001 if cells.is_empty() {
1002 return false;
1003 }
1004 cells.into_iter().all(|cell| {
1005 let c = cell.trim();
1006 c.len() >= 3 && c.chars().all(|ch| ch == '-' || ch == ':')
1007 })
1008}
1009
1010fn split_table_row(line: &str) -> Vec<String> {
1011 line.trim()
1012 .trim_matches('|')
1013 .split('|')
1014 .map(|s| s.trim().to_string())
1015 .collect()
1016}
1017
1018fn parse_list_prefix(line: &str) -> Option<(ListKind, &str)> {
1019 parse_list_prefix_with_indent(line).map(|(kind, rest, _)| (kind, rest))
1020}
1021
1022fn parse_list_prefix_with_indent(line: &str) -> Option<(ListKind, &str, usize)> {
1023 let indent = leading_indent(line);
1024 let trimmed = line.trim_start_matches([' ', '\t']);
1025 if trimmed.len() < 2 {
1026 return None;
1027 }
1028
1029 if (trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ "))
1030 && trimmed.len() > 2
1031 {
1032 return Some((ListKind::Unordered, &trimmed[2..], indent));
1033 }
1034
1035 let mut digits_end = 0usize;
1036 for (idx, ch) in trimmed.char_indices() {
1037 if ch.is_ascii_digit() {
1038 digits_end = idx + ch.len_utf8();
1039 } else {
1040 break;
1041 }
1042 }
1043
1044 if digits_end == 0 || digits_end + 2 > trimmed.len() {
1045 return None;
1046 }
1047
1048 let marker = trimmed.as_bytes()[digits_end] as char;
1049 if marker != '.' && marker != ')' {
1050 return None;
1051 }
1052 if trimmed.as_bytes()[digits_end + 1] != b' ' {
1053 return None;
1054 }
1055
1056 let start = trimmed[..digits_end].parse::<usize>().ok()?;
1057 Some((ListKind::Ordered(start), &trimmed[digits_end + 2..], indent))
1058}
1059
1060fn same_kind_value(current: ListKind, expected: ListKind) -> bool {
1061 matches!(
1062 (current, expected),
1063 (ListKind::Unordered, ListKind::Unordered) | (ListKind::Ordered(_), ListKind::Ordered(_))
1064 )
1065}
1066
1067fn leading_indent(line: &str) -> usize {
1068 let mut count = 0usize;
1069 for ch in line.chars() {
1070 match ch {
1071 ' ' => count += 1,
1072 '\t' => count += 4,
1073 _ => break,
1074 }
1075 }
1076 count
1077}
1078
1079fn dedent_list_continuation(line: &str, base_indent: usize) -> &str {
1080 if leading_indent(line) <= base_indent {
1081 return line.trim_start();
1082 }
1083 let mut removed_cols = 0usize;
1084 let mut byte_idx = 0usize;
1085 for (idx, ch) in line.char_indices() {
1086 match ch {
1087 ' ' => {
1088 removed_cols += 1;
1089 byte_idx = idx + 1;
1090 }
1091 '\t' => {
1092 removed_cols += 4;
1093 byte_idx = idx + 1;
1094 }
1095 _ => break,
1096 }
1097 if removed_cols >= base_indent + 2 {
1098 break;
1099 }
1100 }
1101 &line[byte_idx..]
1102}
1103
1104fn strip_single_paragraph_wrapper(html: &str) -> Option<&str> {
1105 if !html.starts_with("<p>") || !html.ends_with("</p>\n") {
1106 return None;
1107 }
1108 if html[3..html.len() - 5].contains("\n<p>") {
1109 return None;
1110 }
1111 Some(&html[3..html.len() - 5])
1112}
1113
1114fn is_fence_closing_line(line: &str, marker: char, min_len: usize) -> bool {
1115 let trimmed = line.trim_end();
1116 let count = trimmed.chars().take_while(|c| *c == marker).count();
1117 if count < min_len {
1118 return false;
1119 }
1120 trimmed[count..].trim().is_empty()
1121}
1122
1123fn parse_table_alignments(separator_line: &str) -> Vec<Option<&'static str>> {
1124 split_table_row(separator_line)
1125 .into_iter()
1126 .map(|cell| {
1127 let c = cell.trim();
1128 let starts = c.starts_with(':');
1129 let ends = c.ends_with(':');
1130 match (starts, ends) {
1131 (true, true) => Some("center"),
1132 (true, false) => Some("left"),
1133 (false, true) => Some("right"),
1134 (false, false) => None,
1135 }
1136 })
1137 .collect()
1138}
1139
1140fn push_table_cell_open(out: &mut String, tag: &str, align: Option<&str>) {
1141 if let Some(al) = align {
1142 out.push_str(&format!("<{tag} align=\"{al}\">"));
1143 } else {
1144 out.push_str(&format!("<{tag}>"));
1145 }
1146}
1147
1148fn is_block_start(lines: &[&str], pos: usize, config: ParserConfig) -> bool {
1149 parse_setext_heading(lines, pos).is_some()
1150 || is_thematic_break(lines[pos])
1151 || parse_atx_heading(lines[pos]).is_some()
1152 || is_fence_start(lines[pos])
1153 || is_indented_code_line(lines[pos])
1154 || is_blockquote_line(lines[pos])
1155 || is_html_line(lines[pos])
1156 || parse_list_prefix(lines[pos]).is_some()
1157 || (config.enable_tables && is_table_header(lines, pos))
1158}
1159
1160fn parse_task_item(item: &str) -> Option<(bool, &str)> {
1161 let trimmed = item.trim_start();
1162 if trimmed.len() < 4 || !trimmed.starts_with('[') {
1163 return None;
1164 }
1165 let close = trimmed.find(']')?;
1166 let marker = &trimmed[1..close];
1167 let checked = match marker.to_ascii_lowercase().as_str() {
1168 "x" => true,
1169 " " => false,
1170 _ => return None,
1171 };
1172 let rest = trimmed[close + 1..].trim_start();
1173 Some((checked, rest))
1174}
1175
1176fn parse_link_definition(line: &str) -> Option<(&str, &str)> {
1177 if !line.starts_with('[') || line.starts_with("[^") {
1178 return None;
1179 }
1180 let close = line.find("]:")?;
1181 let id = line[1..close].trim();
1182 let url = line[close + 2..].trim();
1183 if id.is_empty() || url.is_empty() {
1184 return None;
1185 }
1186 Some((id, url))
1187}
1188
1189fn parse_footnote_definition(line: &str) -> Option<(&str, &str)> {
1190 if !line.starts_with("[^") {
1191 return None;
1192 }
1193 let close = line.find("]:")?;
1194 let id = line[2..close].trim();
1195 let text = line[close + 2..].trim();
1196 if id.is_empty() {
1197 return None;
1198 }
1199 Some((id, text))
1200}
1201
1202#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1203enum HardBreak {
1204 None,
1205 Spaces,
1206 Backslash,
1207}
1208
1209fn detect_hard_break(text: &str, newline_idx: usize) -> HardBreak {
1210 if newline_idx == 0 {
1211 return HardBreak::None;
1212 }
1213
1214 let bytes = text.as_bytes();
1215 let mut idx = newline_idx;
1216 let mut spaces = 0usize;
1217 while idx > 0 && bytes[idx - 1] == b' ' {
1218 spaces += 1;
1219 idx -= 1;
1220 }
1221
1222 if spaces >= 2 {
1223 return HardBreak::Spaces;
1224 }
1225 if idx > 0 && bytes[idx - 1] == b'\\' {
1226 return HardBreak::Backslash;
1227 }
1228 HardBreak::None
1229}
1230
1231fn trim_trailing_spaces(out: &mut String) {
1232 while out.ends_with(' ') {
1233 out.pop();
1234 }
1235}
1236
1237fn parse_inline_link_target(after: &str) -> Option<(String, usize)> {
1238 if !after.starts_with('(') {
1239 return None;
1240 }
1241 let bytes = after.as_bytes();
1242 let mut i = 1usize;
1243
1244 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
1245 i += 1;
1246 }
1247 if i >= bytes.len() {
1248 return None;
1249 }
1250
1251 let url_start = i;
1252 let url: String;
1253
1254 if bytes[i] == b'<' {
1255 i += 1;
1256 let start = i;
1257 while i < bytes.len() && bytes[i] != b'>' {
1258 if bytes[i] == b'\n' {
1259 return None;
1260 }
1261 i += 1;
1262 }
1263 if i >= bytes.len() {
1264 return None;
1265 }
1266 url = after[start..i].to_string();
1267 i += 1;
1268 } else {
1269 let mut depth = 0usize;
1270 while i < bytes.len() {
1271 let ch = bytes[i] as char;
1272 if ch == '\\' && i + 1 < bytes.len() {
1273 i += 2;
1274 continue;
1275 }
1276 if ch == '(' {
1277 depth += 1;
1278 i += 1;
1279 continue;
1280 }
1281 if ch == ')' {
1282 if depth == 0 {
1283 break;
1284 }
1285 depth -= 1;
1286 i += 1;
1287 continue;
1288 }
1289 if ch.is_ascii_whitespace() && depth == 0 {
1290 break;
1291 }
1292 i += 1;
1293 }
1294 if i <= url_start {
1295 return None;
1296 }
1297 url = after[url_start..i].to_string();
1298 }
1299
1300 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
1301 i += 1;
1302 }
1303
1304 if i < bytes.len() && (bytes[i] == b'"' || bytes[i] == b'\'' || bytes[i] == b'(') {
1305 let quote = bytes[i];
1306 let closing = if quote == b'(' { b')' } else { quote };
1307 i += 1;
1308 while i < bytes.len() && bytes[i] != closing {
1309 if bytes[i] == b'\\' && i + 1 < bytes.len() {
1310 i += 2;
1311 } else {
1312 i += 1;
1313 }
1314 }
1315 if i >= bytes.len() {
1316 return None;
1317 }
1318 i += 1;
1319 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
1320 i += 1;
1321 }
1322 }
1323
1324 if i >= bytes.len() || bytes[i] != b')' {
1325 return None;
1326 }
1327
1328 Some((url, i + 1))
1329}
1330
1331fn parse_autolink_literal(text: &str) -> Option<(String, String, usize)> {
1332 if text.starts_with("https://") || text.starts_with("http://") {
1333 let link = parse_url_like_token(text)?;
1334 return Some((link.to_string(), link.to_string(), link.len()));
1335 }
1336 if text.starts_with("www.") {
1337 let link = parse_url_like_token(text)?;
1338 return Some((format!("http://{link}"), link.to_string(), link.len()));
1339 }
1340 if let Some((email, consumed)) = parse_email_literal(text) {
1341 return Some((format!("mailto:{email}"), email, consumed));
1342 }
1343 None
1344}
1345
1346fn parse_url_like_token(text: &str) -> Option<&str> {
1347 let mut end = 0usize;
1348 for (idx, ch) in text.char_indices() {
1349 if ch.is_whitespace() || ch == '<' {
1350 break;
1351 }
1352 end = idx + ch.len_utf8();
1353 }
1354 if end == 0 {
1355 return None;
1356 }
1357
1358 let mut link_end = end;
1359 while link_end > 0 {
1360 let ch = text[..link_end].chars().next_back().unwrap_or('\0');
1361 if matches!(ch, '.' | ',' | ';' | ':' | '!' | '?') {
1362 link_end -= ch.len_utf8();
1363 } else {
1364 break;
1365 }
1366 }
1367 if link_end == 0 {
1368 return None;
1369 }
1370 Some(&text[..link_end])
1371}
1372
1373fn parse_email_literal(text: &str) -> Option<(String, usize)> {
1374 let mut end = 0usize;
1375 let mut at_pos: Option<usize> = None;
1376
1377 for (idx, ch) in text.char_indices() {
1378 if ch.is_whitespace() || ch == '<' {
1379 break;
1380 }
1381 if ch == '@' {
1382 at_pos = Some(idx);
1383 }
1384 end = idx + ch.len_utf8();
1385 }
1386
1387 if end == 0 {
1388 return None;
1389 }
1390 let mut candidate_end = end;
1391 while candidate_end > 0 {
1392 let ch = text[..candidate_end].chars().next_back().unwrap_or('\0');
1393 if matches!(ch, '.' | ',' | ';' | ':' | '!' | '?') {
1394 candidate_end -= ch.len_utf8();
1395 } else {
1396 break;
1397 }
1398 }
1399 if candidate_end == 0 {
1400 return None;
1401 }
1402
1403 let candidate = &text[..candidate_end];
1404 let at = at_pos?;
1405 if at == 0 || at >= candidate.len() - 1 {
1406 return None;
1407 }
1408
1409 let local = &candidate[..at];
1410 let domain = &candidate[at + 1..];
1411 if !is_email_local(local) || !is_email_domain(domain) {
1412 return None;
1413 }
1414 Some((candidate.to_string(), candidate_end))
1415}
1416
1417fn is_email_local(local: &str) -> bool {
1418 !local.is_empty()
1419 && local.chars().all(|ch| {
1420 ch.is_ascii_alphanumeric()
1421 || matches!(
1422 ch,
1423 '!' | '#'
1424 | '$'
1425 | '%'
1426 | '&'
1427 | '\''
1428 | '*'
1429 | '+'
1430 | '-'
1431 | '/'
1432 | '='
1433 | '?'
1434 | '^'
1435 | '_'
1436 | '`'
1437 | '{'
1438 | '|'
1439 | '}'
1440 | '~'
1441 | '.'
1442 )
1443 })
1444}
1445
1446fn is_email_domain(domain: &str) -> bool {
1447 if domain.is_empty() || !domain.contains('.') {
1448 return false;
1449 }
1450 for label in domain.split('.') {
1451 if label.is_empty() || label.starts_with('-') || label.ends_with('-') {
1452 return false;
1453 }
1454 if !label
1455 .chars()
1456 .all(|ch| ch.is_ascii_alphanumeric() || ch == '-')
1457 {
1458 return false;
1459 }
1460 }
1461 true
1462}
1463
1464fn parse_angle_autolink(text: &str) -> Option<(String, usize)> {
1465 if !text.starts_with('<') {
1466 return None;
1467 }
1468 let end = text.find('>')?;
1469 let inner = &text[1..end];
1470 if inner.starts_with("http://") || inner.starts_with("https://") {
1471 let esc = html_escape(inner);
1472 return Some((format!("<a href=\"{esc}\">{esc}</a>"), end + 1));
1473 }
1474 if inner.contains('@') && !inner.contains(' ') {
1475 let esc = html_escape(inner);
1476 return Some((format!("<a href=\"mailto:{esc}\">{esc}</a>"), end + 1));
1477 }
1478 None
1479}
1480
1481fn parse_inline_html(text: &str) -> Option<(&str, usize)> {
1482 if !text.starts_with('<') {
1483 return None;
1484 }
1485
1486 if text.starts_with("<!--") {
1487 let end = text.find("-->")?;
1488 return Some((&text[..end + 3], end + 3));
1489 }
1490 if text.starts_with("<?") {
1491 let end = text.find("?>")?;
1492 return Some((&text[..end + 2], end + 2));
1493 }
1494 if text.starts_with("<!") {
1495 let end = text.find('>')?;
1496 return Some((&text[..end + 1], end + 1));
1497 }
1498
1499 let bytes = text.as_bytes();
1500 if bytes.len() < 3 {
1501 return None;
1502 }
1503
1504 let mut i = 1usize;
1505 if bytes[i] == b'/' {
1506 i += 1;
1507 }
1508
1509 let mut saw_alpha = false;
1510 while i < bytes.len() {
1511 let ch = bytes[i] as char;
1512 if ch.is_ascii_alphanumeric() || ch == '-' {
1513 saw_alpha = true;
1514 i += 1;
1515 continue;
1516 }
1517 break;
1518 }
1519 if !saw_alpha {
1520 return None;
1521 }
1522
1523 while i < bytes.len() {
1524 if bytes[i] == b'>' {
1525 return Some((&text[..i + 1], i + 1));
1526 }
1527 if bytes[i] == b'\n' {
1528 return None;
1529 }
1530 i += 1;
1531 }
1532 None
1533}
1534
1535fn parse_code_span(text: &str) -> Option<(&str, usize)> {
1536 let ticks = text.chars().take_while(|c| *c == '`').count();
1537 if ticks == 0 {
1538 return None;
1539 }
1540 let marker = "`".repeat(ticks);
1541 let rest = &text[ticks..];
1542 let end = rest.find(&marker)?;
1543 Some((&rest[..end], ticks + end + ticks))
1544}
1545
1546fn parse_escaped_char(text: &str) -> Option<(char, usize)> {
1547 if !text.starts_with('\\') {
1548 return None;
1549 }
1550 let mut chars = text.chars();
1551 chars.next()?;
1552 let ch = chars.next()?;
1553 Some((ch, 1 + ch.len_utf8()))
1554}
1555
1556fn parse_bracketed_label(text: &str) -> Option<(&str, usize)> {
1557 if !text.starts_with('[') {
1558 return None;
1559 }
1560
1561 let bytes = text.as_bytes();
1562 let mut i = 1usize;
1563 let mut depth = 0usize;
1564
1565 while i < bytes.len() {
1566 match bytes[i] {
1567 b'\\' => {
1568 i += 1;
1569 if i < bytes.len() {
1570 i += 1;
1571 }
1572 }
1573 b'[' => {
1574 depth += 1;
1575 i += 1;
1576 }
1577 b']' => {
1578 if depth == 0 {
1579 return Some((&text[1..i], i + 1));
1580 }
1581 depth -= 1;
1582 i += 1;
1583 }
1584 _ => i += 1,
1585 }
1586 }
1587
1588 None
1589}
1590
1591fn wrapped<'a>(text: &'a str, marker: &str) -> Option<(&'a str, usize)> {
1592 if !text.starts_with(marker) {
1593 return None;
1594 }
1595 if text.len() <= marker.len() * 2 {
1596 return None;
1597 }
1598 let tail = &text[marker.len()..];
1599 let end = tail.find(marker)?;
1600 if end == 0 {
1601 return None;
1602 }
1603 Some((&tail[..end], marker.len() + end + marker.len()))
1604}
1605
1606fn normalize_key(text: &str) -> String {
1607 text.trim().to_ascii_lowercase()
1608}
1609
1610fn footnote_id(key: &str) -> String {
1611 let mut out = String::with_capacity(key.len());
1612 for ch in key.chars() {
1613 if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
1614 out.push(ch);
1615 } else {
1616 out.push('-');
1617 }
1618 }
1619 out
1620}
1621
1622fn push_escaped_char(out: &mut String, ch: char) {
1623 match ch {
1624 '&' => out.push_str("&"),
1625 '<' => out.push_str("<"),
1626 '>' => out.push_str(">"),
1627 '"' => out.push_str("""),
1628 '\'' => out.push_str("'"),
1629 _ => out.push(ch),
1630 }
1631}
1632
1633fn html_escape(text: &str) -> String {
1634 let mut out = String::with_capacity(text.len());
1635 for ch in text.chars() {
1636 push_escaped_char(&mut out, ch);
1637 }
1638 out
1639}
1640
1641fn html_attr_escape(text: &str) -> String {
1642 html_escape(text)
1643}
1644
1645#[cfg(test)]
1646mod tests {
1647 use super::{parse, parse_with_flavor, safe_parse, safe_parse_with_flavor, MarkdownFlavor};
1648
1649 #[test]
1650 fn renders_table_in_gfm() {
1651 let md = "| a | b |\n|---|---|\n| 1 | 2 |";
1652 let html = parse(md);
1653 assert!(html.contains("<table>"));
1654 assert!(html.contains("<thead>"));
1655 assert!(html.contains("<tbody>"));
1656 }
1657
1658 #[test]
1659 fn does_not_render_table_in_commonmark() {
1660 let md = "| a | b |\n|---|---|\n| 1 | 2 |";
1661 let html = parse_with_flavor(md, MarkdownFlavor::CommonMark);
1662 assert!(!html.contains("<table>"));
1663 }
1664
1665 #[test]
1666 fn renders_strikethrough_only_in_gfm() {
1667 let gfm = parse_with_flavor("~~done~~", MarkdownFlavor::Gfm);
1668 let cm = parse_with_flavor("~~done~~", MarkdownFlavor::CommonMark);
1669 assert!(gfm.contains("<del>done</del>"));
1670 assert!(!cm.contains("<del>done</del>"));
1671 }
1672
1673 #[test]
1674 fn renders_task_list_only_in_gfm() {
1675 let gfm = parse_with_flavor("- [x] finish", MarkdownFlavor::Gfm);
1676 let cm = parse_with_flavor("- [x] finish", MarkdownFlavor::CommonMark);
1677 assert!(gfm.contains("type=\"checkbox\""));
1678 assert!(!cm.contains("type=\"checkbox\""));
1679 }
1680
1681 #[test]
1682 fn renders_autolink_literal_only_in_gfm() {
1683 let gfm = parse_with_flavor("visit https://example.com now", MarkdownFlavor::Gfm);
1684 let cm = parse_with_flavor("visit https://example.com now", MarkdownFlavor::CommonMark);
1685 assert!(gfm.contains("<a href=\"https://example.com\">https://example.com</a>"));
1686 assert!(!cm.contains("<a href=\"https://example.com\">https://example.com</a>"));
1687 }
1688
1689 #[test]
1690 fn renders_footnotes_only_in_gfm() {
1691 let md = "note[^1]\n\n[^1]: footnote";
1692 let gfm = parse_with_flavor(md, MarkdownFlavor::Gfm);
1693 let cm = parse_with_flavor(md, MarkdownFlavor::CommonMark);
1694 assert!(gfm.contains("footnote-ref"));
1695 assert!(gfm.contains("footnotes"));
1696 assert!(!cm.contains("footnote-ref"));
1697 }
1698
1699 #[test]
1700 fn renders_reference_links() {
1701 let md = "[Rust]\n\n[Rust]: https://www.rust-lang.org/";
1702 let html = parse(md);
1703 assert!(html.contains("<a href=\"https://www.rust-lang.org/\">Rust</a>"));
1704 }
1705
1706 #[test]
1707 fn blocks_script_in_safe_parse() {
1708 let md = "<script>alert(1)</script>";
1709 assert!(safe_parse(md).is_err());
1710 }
1711
1712 #[test]
1713 fn safe_parse_flavor_works() {
1714 let html = safe_parse_with_flavor("~~x~~", MarkdownFlavor::CommonMark).unwrap();
1715 assert!(!html.contains("<del>x</del>"));
1716 }
1717
1718 #[test]
1719 fn renders_ordered_list_with_start() {
1720 let html = parse("3. three\n4. four");
1721 assert!(html.contains("<ol start=\"3\">"));
1722 assert!(html.contains("<li>three</li>"));
1723 }
1724
1725 #[test]
1726 fn renders_nested_list() {
1727 let html = parse("- parent\n - child\n- next");
1728 assert!(html.matches("<ul>").count() >= 2);
1729 assert!(html.contains("child"));
1730 }
1731
1732 #[test]
1733 fn parses_link_with_title_and_parentheses() {
1734 let html = parse("[x](https://example.com/a_(b) \"title\")");
1735 assert!(html.contains("href=\"https://example.com/a_(b)\""));
1736 }
1737
1738 #[test]
1739 fn renders_gfm_literal_www_and_email_autolinks() {
1740 let html = parse_with_flavor(
1741 "visit www.example.com or me@example.com",
1742 MarkdownFlavor::Gfm,
1743 );
1744 assert!(html.contains("href=\"http://www.example.com\""));
1745 assert!(html.contains("href=\"mailto:me@example.com\""));
1746 }
1747
1748 #[test]
1749 fn renders_hard_line_breaks() {
1750 let html_spaces = parse("a \nb");
1751 let html_backslash = parse("a\\\nb");
1752 assert!(html_spaces.contains("a<br />\nb"));
1753 assert!(html_backslash.contains("a<br />\nb"));
1754 }
1755
1756 #[test]
1757 fn parse_preserves_inline_html_in_gfm_and_commonmark() {
1758 let cm = parse_with_flavor("x <span>y</span>", MarkdownFlavor::CommonMark);
1759 let gfm = parse_with_flavor("x <span>y</span>", MarkdownFlavor::Gfm);
1760 assert!(cm.contains("<span>y</span>"));
1761 assert!(gfm.contains("<span>y</span>"));
1762 }
1763
1764 #[test]
1765 fn parse_preserves_html_block_in_gfm_and_commonmark() {
1766 let cm = parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::CommonMark);
1767 let gfm = parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::Gfm);
1768 assert!(cm.contains("<div>"));
1769 assert!(cm.contains("</div>"));
1770 assert!(gfm.contains("<div>"));
1771 assert!(gfm.contains("</div>"));
1772 }
1773
1774 #[test]
1775 fn safe_parse_rejects_inline_html() {
1776 let cm = safe_parse_with_flavor("x <span>y</span>", MarkdownFlavor::CommonMark);
1777 let gfm = safe_parse_with_flavor("x <span>y</span>", MarkdownFlavor::Gfm);
1778 assert!(cm.is_err());
1779 assert!(gfm.is_err());
1780 }
1781
1782 #[test]
1783 fn safe_parse_rejects_html_block() {
1784 let cm = safe_parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::CommonMark);
1785 let gfm = safe_parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::Gfm);
1786 assert!(cm.is_err());
1787 assert!(gfm.is_err());
1788 }
1789
1790 #[test]
1791 fn supports_setext_heading_and_blockquote() {
1792 let html = parse("Title\n---\n\n> quote");
1793 assert!(html.contains("<h2>Title</h2>"));
1794 assert!(html.contains("<blockquote>"));
1795 }
1796
1797 #[test]
1798 fn supports_table_alignment_in_gfm() {
1799 let md = "| a | b | c |\n| :-- | :-: | --: |\n| 1 | 2 | 3 |";
1800 let html = parse(md);
1801 assert!(html.contains("<th align=\"left\">a</th>"));
1802 assert!(html.contains("<th align=\"center\">b</th>"));
1803 assert!(html.contains("<th align=\"right\">c</th>"));
1804 }
1805
1806 #[test]
1807 fn renders_mermaid_chart_in_gfm() {
1808 let md = "```mermaid\nflowchart TD\nA-->B\n```";
1809 let html = parse_with_flavor(md, MarkdownFlavor::Gfm);
1810 assert!(html.contains("<pre class=\"mermaid\">flowchart TD\nA-->B</pre>"));
1811 }
1812
1813 #[test]
1814 fn keeps_mermaid_as_code_in_commonmark() {
1815 let md = "```mermaid\nflowchart TD\nA-->B\n```";
1816 let html = parse_with_flavor(md, MarkdownFlavor::CommonMark);
1817 assert!(html.contains("<pre><code class=\"language-mermaid\">flowchart TD\nA-->B</code></pre>"));
1818 }
1819
1820 #[test]
1821 fn appends_mermaid_runtime_for_gfm_file_output() {
1822 let html = super::with_chart_runtime_if_needed(
1823 "<pre class=\"mermaid\">graph TD\nA-->B</pre>\n".to_string(),
1824 MarkdownFlavor::Gfm,
1825 );
1826 assert!(html.contains("mermaid.min.js"));
1827 assert!(html.contains("mermaid.initialize({ startOnLoad: true })"));
1828 }
1829
1830 #[test]
1831 fn does_not_append_mermaid_runtime_for_commonmark() {
1832 let html = super::with_chart_runtime_if_needed(
1833 "<pre><code class=\"language-mermaid\">graph TD\nA-->B</code></pre>\n".to_string(),
1834 MarkdownFlavor::CommonMark,
1835 );
1836 assert!(!html.contains("mermaid.min.js"));
1837 }
1838
1839 #[test]
1840 fn safe_parse_blocks_script_variants() {
1841 assert!(safe_parse("<script>alert(1)</script>").is_err());
1842 assert!(safe_parse("<ScRiPt src=x></ScRiPt>").is_err());
1843 assert!(safe_parse("< / script >").is_err());
1844 assert!(safe_parse("< script>").is_err());
1845 }
1846
1847 #[test]
1848 fn renders_link_wrapped_image_badge() {
1849 let md = "[](https://t.me/+Ka9i6CNwe71hMWQy)";
1850 let html = parse(md);
1851 assert!(html.contains(
1852 "<a href=\"https://t.me/+Ka9i6CNwe71hMWQy\"><img src=\"https://img.shields.io/badge/Telegram-2CA5E0?logo=telegram&logoColor=white\" alt=\"Telegram\" /></a>"
1853 ));
1854 }
1855
1856 #[test]
1857 fn renders_discord_and_telegram_badges_together() {
1858 let md = "⠀[](https://t.me/+Ka9i6CNwe71hMWQy)";
1859 let html = parse(md);
1860 assert!(html.contains("<img src=\"https://discord.gg/2xrMh7qX6m\" alt=\"Discord\" />"));
1861 assert!(html.contains(
1862 "<a href=\"https://t.me/+Ka9i6CNwe71hMWQy\"><img src=\"https://img.shields.io/badge/Telegram-2CA5E0?logo=telegram&logoColor=white\" alt=\"Telegram\" /></a>"
1863 ));
1864 }
1865}