umark_lib/
lib.rs

1//! `umark-lib` is a lightweight Markdown-to-HTML parser implemented in Rust.
2//!
3//! It exposes two parsing modes:
4//! - regular parsing (`parse*`): keeps inline/raw HTML
5//! - safe parsing (`safe_parse*`): rejects script tags and any raw HTML
6//!
7//! # Flavor overview
8//! - `MarkdownFlavor::CommonMark`: core CommonMark-style behavior
9//! - `MarkdownFlavor::Gfm`: CommonMark + tables, task lists, strikethrough,
10//!   literal autolinks, footnotes, and Mermaid chart blocks
11//!
12//! # Quick example
13//! ```
14//! use umark_lib::parse;
15//!
16//! let html = parse("# Hello");
17//! assert!(html.contains("<h1>Hello</h1>"));
18//! ```
19//!
20//! # Safe parsing example
21//! ```
22//! use umark_lib::safe_parse;
23//!
24//! assert!(safe_parse("plain text").is_ok());
25//! assert!(safe_parse("x <span>y</span>").is_err());
26//! ```
27//!
28use std::collections::{HashMap, HashSet};
29use std::error::Error;
30use std::fmt;
31use std::fs;
32
33/// Controls which Markdown feature set is enabled during parsing.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub enum MarkdownFlavor {
36    /// Parse with CommonMark-style baseline features.
37    CommonMark,
38    /// Parse with GitHub Flavored Markdown extensions enabled.
39    Gfm,
40}
41
42#[derive(Debug)]
43struct MarkdownSecurityError;
44
45impl fmt::Display for MarkdownSecurityError {
46    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
47        write!(f, "raw html tag is not allowed in safe_parse")
48    }
49}
50
51impl Error for MarkdownSecurityError {}
52
53const RAW_HTML_OMITTED_MARKER: &str = "<!-- raw HTML omitted -->";
54const MERMAID_BOOTSTRAP: &str = "<script src=\"https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.min.js\"></script>\n<script>if (typeof mermaid !== \"undefined\") { mermaid.initialize({ startOnLoad: true }); }</script>\n";
55
56#[derive(Debug, Clone, Copy)]
57struct ParserConfig {
58    omit_raw_html: bool,
59    enable_tables: bool,
60    enable_task_list: bool,
61    enable_strikethrough: bool,
62    enable_autolink_literals: bool,
63    enable_footnotes: bool,
64    enable_charts: bool,
65}
66
67impl ParserConfig {
68    fn from_flavor(flavor: MarkdownFlavor) -> Self {
69        match flavor {
70            MarkdownFlavor::CommonMark => Self {
71                omit_raw_html: false,
72                enable_tables: false,
73                enable_task_list: false,
74                enable_strikethrough: false,
75                enable_autolink_literals: false,
76                enable_footnotes: false,
77                enable_charts: false,
78            },
79            MarkdownFlavor::Gfm => Self {
80                omit_raw_html: false,
81                enable_tables: true,
82                enable_task_list: true,
83                enable_strikethrough: true,
84                enable_autolink_literals: true,
85                enable_footnotes: true,
86                enable_charts: true,
87            },
88        }
89    }
90
91    fn with_raw_html_omitted(mut self) -> Self {
92        self.omit_raw_html = true;
93        self
94    }
95}
96
97#[derive(Default, Clone)]
98struct DefinitionStore {
99    links: HashMap<String, String>,
100    footnotes: HashMap<String, String>,
101    skip_lines: HashSet<usize>,
102}
103
104struct Parser<'a> {
105    lines: Vec<&'a str>,
106    defs: DefinitionStore,
107    footnote_order: Vec<String>,
108    config: ParserConfig,
109}
110
111/// Parse Markdown with the default flavor (`MarkdownFlavor::Gfm`) and return HTML.
112///
113/// # Examples
114/// ```
115/// use umark_lib::parse;
116///
117/// let html = parse("~~done~~");
118/// assert!(html.contains("<del>done</del>"));
119/// ```
120pub fn parse(input: &str) -> String {
121    parse_with_flavor(input, MarkdownFlavor::Gfm)
122}
123
124/// Parse Markdown with an explicit flavor and return HTML.
125///
126/// # Examples
127/// ```
128/// use umark_lib::{parse_with_flavor, MarkdownFlavor};
129///
130/// let gfm = parse_with_flavor("| a | b |\n|---|---|\n| 1 | 2 |", MarkdownFlavor::Gfm);
131/// let commonmark = parse_with_flavor("| a | b |\n|---|---|\n| 1 | 2 |", MarkdownFlavor::CommonMark);
132///
133/// assert!(gfm.contains("<table>"));
134/// assert!(!commonmark.contains("<table>"));
135/// ```
136pub fn parse_with_flavor(input: &str, flavor: MarkdownFlavor) -> String {
137    parse_internal(input, ParserConfig::from_flavor(flavor))
138}
139
140/// Parse Markdown safely with the default flavor (`MarkdownFlavor::Gfm`).
141///
142/// This rejects:
143/// - `<script ...>` tags (case-insensitive)
144/// - any raw HTML blocks or inline raw HTML tags
145///
146/// # Examples
147/// ```
148/// use umark_lib::safe_parse;
149///
150/// assert!(safe_parse("**safe** text").is_ok());
151/// assert!(safe_parse("<script>alert(1)</script>").is_err());
152/// ```
153pub fn safe_parse(input: &str) -> Result<String, Box<dyn Error>> {
154    safe_parse_with_flavor(input, MarkdownFlavor::Gfm)
155}
156
157/// Parse Markdown safely with an explicit flavor.
158///
159/// # Examples
160/// ```
161/// use umark_lib::{safe_parse_with_flavor, MarkdownFlavor};
162///
163/// let html = safe_parse_with_flavor("~~x~~", MarkdownFlavor::CommonMark).unwrap();
164/// assert!(!html.contains("<del>x</del>"));
165/// ```
166pub fn safe_parse_with_flavor(
167    input: &str,
168    flavor: MarkdownFlavor,
169) -> Result<String, Box<dyn Error>> {
170    reject_script_tag(input)?;
171    let rendered = parse_internal(
172        input,
173        ParserConfig::from_flavor(flavor).with_raw_html_omitted(),
174    );
175    if rendered.contains(RAW_HTML_OMITTED_MARKER) {
176        return Err(Box::new(MarkdownSecurityError));
177    }
178    Ok(rendered)
179}
180
181/// Parse Markdown from a file and write rendered HTML to another file using GFM.
182///
183/// In GFM mode, if Mermaid chart blocks are detected, a Mermaid runtime bootstrap
184/// script is appended so charts can render when opening the output file in a browser.
185///
186/// # Examples
187/// ```
188/// use std::time::{SystemTime, UNIX_EPOCH};
189/// use umark_lib::parse_from_file;
190///
191/// let suffix = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos();
192/// let mut input = std::env::temp_dir();
193/// let mut output = std::env::temp_dir();
194/// input.push(format!("umark_parse_input_{suffix}.md"));
195/// output.push(format!("umark_parse_output_{suffix}.html"));
196///
197/// std::fs::write(&input, "# Title").unwrap();
198/// parse_from_file(input.to_str().unwrap(), output.to_str().unwrap()).unwrap();
199///
200/// let html = std::fs::read_to_string(&output).unwrap();
201/// assert!(html.contains("<h1>Title</h1>"));
202///
203/// let _ = std::fs::remove_file(&input);
204/// let _ = std::fs::remove_file(&output);
205/// ```
206pub fn parse_from_file(path: &str, output_path: &str) -> Result<(), Box<dyn Error>> {
207    parse_from_file_with_flavor(path, output_path, MarkdownFlavor::Gfm)
208}
209
210/// Parse Markdown from a file with an explicit flavor and write HTML to a file.
211///
212/// In GFM mode, Mermaid runtime bootstrap is appended only when Mermaid blocks are found.
213///
214/// # Examples
215/// ```
216/// use std::time::{SystemTime, UNIX_EPOCH};
217/// use umark_lib::{parse_from_file_with_flavor, MarkdownFlavor};
218///
219/// let suffix = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos();
220/// let mut input = std::env::temp_dir();
221/// let mut output = std::env::temp_dir();
222/// input.push(format!("umark_parse_flavor_input_{suffix}.md"));
223/// output.push(format!("umark_parse_flavor_output_{suffix}.html"));
224///
225/// std::fs::write(&input, "| a | b |\n|---|---|\n| 1 | 2 |").unwrap();
226/// parse_from_file_with_flavor(
227///     input.to_str().unwrap(),
228///     output.to_str().unwrap(),
229///     MarkdownFlavor::CommonMark,
230/// ).unwrap();
231///
232/// let html = std::fs::read_to_string(&output).unwrap();
233/// assert!(!html.contains("<table>"));
234///
235/// let _ = std::fs::remove_file(&input);
236/// let _ = std::fs::remove_file(&output);
237/// ```
238pub fn parse_from_file_with_flavor(
239    path: &str,
240    output_path: &str,
241    flavor: MarkdownFlavor,
242) -> Result<(), Box<dyn Error>> {
243    let content = fs::read_to_string(path)?;
244    let rendered = parse_with_flavor(&content, flavor);
245    let rendered = with_chart_runtime_if_needed(rendered, flavor);
246    fs::write(output_path, rendered)?;
247    Ok(())
248}
249
250/// Safely parse Markdown from a file and write output HTML with default GFM flavor.
251///
252/// This enforces the same safety rules as [`safe_parse`].
253///
254/// # Examples
255/// ```
256/// use std::time::{SystemTime, UNIX_EPOCH};
257/// use umark_lib::safe_parse_from_file;
258///
259/// let suffix = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos();
260/// let mut input = std::env::temp_dir();
261/// let mut output = std::env::temp_dir();
262/// input.push(format!("umark_safe_input_{suffix}.md"));
263/// output.push(format!("umark_safe_output_{suffix}.html"));
264///
265/// std::fs::write(&input, "safe text").unwrap();
266/// assert!(safe_parse_from_file(input.to_str().unwrap(), output.to_str().unwrap()).is_ok());
267///
268/// let _ = std::fs::remove_file(&input);
269/// let _ = std::fs::remove_file(&output);
270/// ```
271pub fn safe_parse_from_file(path: &str, output_path: &str) -> Result<(), Box<dyn Error>> {
272    safe_parse_from_file_with_flavor(path, output_path, MarkdownFlavor::Gfm)
273}
274
275/// Safely parse Markdown from a file with an explicit flavor and write HTML to a file.
276///
277/// # Examples
278/// ```
279/// use std::time::{SystemTime, UNIX_EPOCH};
280/// use umark_lib::{safe_parse_from_file_with_flavor, MarkdownFlavor};
281///
282/// let suffix = SystemTime::now().duration_since(UNIX_EPOCH).unwrap().as_nanos();
283/// let mut input = std::env::temp_dir();
284/// let mut output = std::env::temp_dir();
285/// input.push(format!("umark_safe_flavor_input_{suffix}.md"));
286/// output.push(format!("umark_safe_flavor_output_{suffix}.html"));
287///
288/// std::fs::write(&input, "<div>raw html</div>").unwrap();
289/// let result = safe_parse_from_file_with_flavor(
290///     input.to_str().unwrap(),
291///     output.to_str().unwrap(),
292///     MarkdownFlavor::Gfm,
293/// );
294/// assert!(result.is_err());
295///
296/// let _ = std::fs::remove_file(&input);
297/// let _ = std::fs::remove_file(&output);
298/// ```
299pub fn safe_parse_from_file_with_flavor(
300    path: &str,
301    output_path: &str,
302    flavor: MarkdownFlavor,
303) -> Result<(), Box<dyn Error>> {
304    let content = fs::read_to_string(path)?;
305    let rendered = safe_parse_with_flavor(&content, flavor)?;
306    fs::write(output_path, rendered)?;
307    Ok(())
308}
309
310fn parse_internal(input: &str, config: ParserConfig) -> String {
311    let normalized = normalize_newlines(input);
312    let lines: Vec<&str> = normalized.lines().collect();
313    let defs = collect_definitions(&lines, config);
314    let mut parser = Parser {
315        lines,
316        defs,
317        footnote_order: Vec::new(),
318        config,
319    };
320    parser.parse_blocks()
321}
322
323fn with_chart_runtime_if_needed(mut rendered: String, flavor: MarkdownFlavor) -> String {
324    if flavor == MarkdownFlavor::Gfm
325        && rendered.contains("<pre class=\"mermaid\">")
326        && !rendered.contains("mermaid.initialize(")
327    {
328        rendered.push('\n');
329        rendered.push_str(MERMAID_BOOTSTRAP);
330    }
331    rendered
332}
333
334fn reject_script_tag(input: &str) -> Result<(), Box<dyn Error>> {
335    if contains_script_tag(input) {
336        return Err(Box::new(MarkdownSecurityError));
337    }
338    Ok(())
339}
340
341fn contains_script_tag(input: &str) -> bool {
342    let lowered = input.to_ascii_lowercase();
343    let bytes = lowered.as_bytes();
344    let mut i = 0usize;
345
346    while i < bytes.len() {
347        if bytes[i] != b'<' {
348            i += 1;
349            continue;
350        }
351        let mut j = i + 1;
352        while j < bytes.len() && bytes[j].is_ascii_whitespace() {
353            j += 1;
354        }
355        if j < bytes.len() && bytes[j] == b'/' {
356            j += 1;
357            while j < bytes.len() && bytes[j].is_ascii_whitespace() {
358                j += 1;
359            }
360        }
361        if j + 6 > bytes.len() {
362            i += 1;
363            continue;
364        }
365        if &lowered[j..j + 6] == "script" {
366            let next = bytes.get(j + 6).copied().unwrap_or(b'>');
367            if next.is_ascii_whitespace() || next == b'>' || next == b'/' {
368                return true;
369            }
370        }
371        i += 1;
372    }
373    false
374}
375
376impl<'a> Parser<'a> {
377    fn parse_blocks(&mut self) -> String {
378        let mut pos = 0usize;
379        let mut out = String::new();
380
381        while pos < self.lines.len() {
382            if self.is_skipped(pos) || self.lines[pos].trim().is_empty() {
383                pos += 1;
384                continue;
385            }
386
387            if let Some((level, text, next)) = parse_setext_heading(&self.lines, pos) {
388                let heading_text = text.trim().to_string();
389                out.push_str(&format!(
390                    "<h{level}>{}</h{level}>\n",
391                    self.parse_inlines(&heading_text)
392                ));
393                pos = next;
394                continue;
395            }
396
397            if is_thematic_break(self.lines[pos]) {
398                out.push_str("<hr />\n");
399                pos += 1;
400                continue;
401            }
402
403            if let Some((level, text)) = parse_atx_heading(self.lines[pos]) {
404                out.push_str(&format!(
405                    "<h{level}>{}</h{level}>\n",
406                    self.parse_inlines(text.trim())
407                ));
408                pos += 1;
409                continue;
410            }
411
412            if is_fence_start(self.lines[pos]) {
413                let (html, next) = self.parse_fenced_code(pos);
414                out.push_str(&html);
415                pos = next;
416                continue;
417            }
418
419            if is_indented_code_line(self.lines[pos]) {
420                let (html, next) = self.parse_indented_code(pos);
421                out.push_str(&html);
422                pos = next;
423                continue;
424            }
425
426            if is_blockquote_line(self.lines[pos]) {
427                let (html, next) = self.parse_blockquote(pos);
428                out.push_str(&html);
429                pos = next;
430                continue;
431            }
432
433            if is_html_line(self.lines[pos]) {
434                let (html, next) = self.parse_html_block(pos);
435                out.push_str(&html);
436                pos = next;
437                continue;
438            }
439
440            if self.config.enable_tables && is_table_header(&self.lines, pos) {
441                let (html, next) = self.parse_table(pos);
442                out.push_str(&html);
443                pos = next;
444                continue;
445            }
446
447            if parse_list_prefix(self.lines[pos]).is_some() {
448                let (html, next) = self.parse_list(pos);
449                out.push_str(&html);
450                pos = next;
451                continue;
452            }
453
454            let (html, next) = self.parse_paragraph(pos);
455            out.push_str(&html);
456            pos = next;
457        }
458
459        if self.config.enable_footnotes && !self.footnote_order.is_empty() {
460            out.push_str(&self.render_footnotes());
461        }
462
463        out
464    }
465
466    fn parse_subdocument(&mut self, markdown: &str) -> String {
467        let normalized = normalize_newlines(markdown);
468        let lines: Vec<&str> = normalized.lines().collect();
469        let mut nested = Parser {
470            lines,
471            defs: self.defs.clone(),
472            footnote_order: Vec::new(),
473            config: self.config,
474        };
475        let html = nested.parse_blocks();
476        for id in nested.footnote_order {
477            self.note_footnote(id);
478        }
479        html
480    }
481
482    fn parse_fenced_code(&self, start: usize) -> (String, usize) {
483        let first = self.lines[start].trim_start();
484        let fence_char = first.chars().next().unwrap_or('`');
485        let fence_len = first.chars().take_while(|c| *c == fence_char).count();
486        let info = first[fence_len..].trim();
487        let mut pos = start + 1;
488        let mut code_lines = Vec::new();
489
490        while pos < self.lines.len() {
491            let line = self.lines[pos].trim_start();
492            if is_fence_closing_line(line, fence_char, fence_len) {
493                pos += 1;
494                break;
495            }
496            code_lines.push(self.lines[pos]);
497            pos += 1;
498        }
499
500        let code_raw = code_lines.join("\n");
501        let code = html_escape(&code_raw);
502        let lang = info.split_whitespace().next().unwrap_or("");
503        let is_mermaid = self.config.enable_charts && lang.eq_ignore_ascii_case("mermaid");
504
505        let html = if is_mermaid {
506            format!("<pre class=\"mermaid\">{}</pre>\n", code)
507        } else if info.is_empty() {
508            format!("<pre><code>{}</code></pre>\n", code)
509        } else {
510            format!(
511                "<pre><code class=\"language-{}\">{}</code></pre>\n",
512                html_attr_escape(lang),
513                code
514            )
515        };
516        (html, pos)
517    }
518
519    fn parse_indented_code(&self, start: usize) -> (String, usize) {
520        let mut pos = start;
521        let mut code_lines = Vec::new();
522
523        while pos < self.lines.len() {
524            let line = self.lines[pos];
525            if line.trim().is_empty() {
526                code_lines.push("");
527                pos += 1;
528                continue;
529            }
530
531            if let Some(stripped) = strip_indented_code_prefix(line) {
532                code_lines.push(stripped);
533                pos += 1;
534            } else {
535                break;
536            }
537        }
538
539        let code = html_escape(&code_lines.join("\n"));
540        (format!("<pre><code>{}</code></pre>\n", code), pos)
541    }
542
543    fn parse_blockquote(&mut self, start: usize) -> (String, usize) {
544        let mut pos = start;
545        let mut parts = Vec::new();
546
547        while pos < self.lines.len() {
548            let line = self.lines[pos];
549            if line.trim().is_empty() {
550                parts.push(String::new());
551                pos += 1;
552                continue;
553            }
554            if !is_blockquote_line(line) {
555                break;
556            }
557            parts.push(strip_blockquote_prefix(line).to_string());
558            pos += 1;
559        }
560
561        let body = parts.join("\n");
562        let inner = self.parse_subdocument(&body);
563        (format!("<blockquote>\n{}</blockquote>\n", inner), pos)
564    }
565
566    fn parse_html_block(&self, start: usize) -> (String, usize) {
567        if !self.config.omit_raw_html {
568            let mut pos = start;
569            while pos < self.lines.len() {
570                if self.lines[pos].trim().is_empty() {
571                    break;
572                }
573                pos += 1;
574            }
575            let raw = self.lines[start..pos].join("\n");
576            return (format!("{raw}\n"), pos);
577        }
578
579        let mut pos = start;
580        while pos < self.lines.len() {
581            if self.lines[pos].trim().is_empty() {
582                break;
583            }
584            pos += 1;
585        }
586        (format!("{RAW_HTML_OMITTED_MARKER}\n"), pos)
587    }
588
589    fn parse_table(&mut self, start: usize) -> (String, usize) {
590        let headers = split_table_row(self.lines[start]);
591        let aligns = parse_table_alignments(self.lines[start + 1]);
592        let mut pos = start + 2;
593        let mut rows: Vec<Vec<String>> = Vec::new();
594
595        while pos < self.lines.len() {
596            if self.is_skipped(pos) || self.lines[pos].trim().is_empty() {
597                break;
598            }
599            if !self.lines[pos].contains('|') {
600                break;
601            }
602            rows.push(split_table_row(self.lines[pos]));
603            pos += 1;
604        }
605
606        let mut out = String::new();
607        out.push_str("<table>\n<thead>\n<tr>");
608        for (idx, cell) in headers.into_iter().enumerate() {
609            push_table_cell_open(&mut out, "th", aligns.get(idx).copied().flatten());
610            out.push_str(&self.parse_inlines(cell.trim()));
611            out.push_str("</th>");
612        }
613        out.push_str("</tr>\n</thead>\n<tbody>\n");
614
615        for row in rows {
616            out.push_str("<tr>");
617            for (idx, cell) in row.into_iter().enumerate() {
618                push_table_cell_open(&mut out, "td", aligns.get(idx).copied().flatten());
619                out.push_str(&self.parse_inlines(cell.trim()));
620                out.push_str("</td>");
621            }
622            out.push_str("</tr>\n");
623        }
624
625        out.push_str("</tbody>\n</table>\n");
626        (out, pos)
627    }
628
629    fn parse_list(&mut self, start: usize) -> (String, usize) {
630        let (first_kind, _, base_indent) = parse_list_prefix_with_indent(self.lines[start])
631            .unwrap_or((ListKind::Unordered, "", 0));
632        let mut pos = start;
633        let mut out = String::new();
634
635        match first_kind {
636            ListKind::Unordered => out.push_str("<ul>\n"),
637            ListKind::Ordered(start_num) => {
638                if start_num != 1 {
639                    out.push_str(&format!("<ol start=\"{start_num}\">\n"));
640                } else {
641                    out.push_str("<ol>\n");
642                }
643            }
644        }
645
646        while pos < self.lines.len() {
647            if self.is_skipped(pos) {
648                break;
649            }
650
651            let Some((kind, item_line, indent)) = parse_list_prefix_with_indent(self.lines[pos])
652            else {
653                break;
654            };
655            if indent != base_indent || !same_kind_value(kind, first_kind) {
656                break;
657            }
658
659            let mut item_parts = vec![item_line.to_string()];
660            pos += 1;
661            let mut loose = false;
662
663            while pos < self.lines.len() {
664                if self.is_skipped(pos) {
665                    break;
666                }
667
668                let line = self.lines[pos];
669                if line.trim().is_empty() {
670                    loose = true;
671                    item_parts.push(String::new());
672                    pos += 1;
673                    continue;
674                }
675
676                if let Some((next_kind, _, next_indent)) = parse_list_prefix_with_indent(line) {
677                    if next_indent == base_indent && same_kind_value(next_kind, first_kind) {
678                        break;
679                    }
680                    if next_indent <= base_indent && !same_kind_value(next_kind, first_kind) {
681                        break;
682                    }
683                }
684
685                if leading_indent(line) <= base_indent
686                    && is_block_start(&self.lines, pos, self.config)
687                {
688                    break;
689                }
690
691                item_parts.push(dedent_list_continuation(line, base_indent).to_string());
692                pos += 1;
693            }
694
695            out.push_str("<li>");
696
697            let mut checkbox: Option<bool> = None;
698            if self.config.enable_task_list && matches!(first_kind, ListKind::Unordered) {
699                if let Some((checked, rest)) = parse_task_item(&item_parts[0]) {
700                    checkbox = Some(checked);
701                    item_parts[0] = rest.to_string();
702                }
703            }
704
705            if let Some(checked) = checkbox {
706                if checked {
707                    out.push_str("<input type=\"checkbox\" checked=\"\" disabled=\"\" /> ");
708                } else {
709                    out.push_str("<input type=\"checkbox\" disabled=\"\" /> ");
710                }
711            }
712
713            let item_markdown = item_parts.join("\n");
714            let rendered = self.parse_subdocument(&item_markdown);
715            if !loose {
716                if let Some(stripped) = strip_single_paragraph_wrapper(&rendered) {
717                    out.push_str(stripped);
718                } else {
719                    out.push_str(&rendered);
720                }
721            } else {
722                out.push_str(&rendered);
723            }
724            out.push_str("</li>\n");
725        }
726
727        match first_kind {
728            ListKind::Unordered => out.push_str("</ul>\n"),
729            ListKind::Ordered(_) => out.push_str("</ol>\n"),
730        }
731
732        (out, pos)
733    }
734
735    fn parse_paragraph(&mut self, start: usize) -> (String, usize) {
736        let mut pos = start;
737        let mut parts = Vec::new();
738
739        while pos < self.lines.len() {
740            if self.is_skipped(pos) || self.lines[pos].trim().is_empty() {
741                break;
742            }
743            if pos != start && is_block_start(&self.lines, pos, self.config) {
744                break;
745            }
746            parts.push(self.lines[pos]);
747            pos += 1;
748        }
749
750        let text = parts.join("\n");
751        (format!("<p>{}</p>\n", self.parse_inlines(&text)), pos)
752    }
753
754    fn parse_inlines(&mut self, text: &str) -> String {
755        let mut out = String::new();
756        let mut i = 0usize;
757
758        while i < text.len() {
759            let rest = &text[i..];
760
761            if rest.starts_with("\\\n") {
762                out.push_str("<br />\n");
763                i += 2;
764                continue;
765            }
766
767            if rest.starts_with('\n') {
768                match detect_hard_break(text, i) {
769                    HardBreak::Spaces => {
770                        trim_trailing_spaces(&mut out);
771                        out.push_str("<br />\n");
772                    }
773                    HardBreak::Backslash => {
774                        if out.ends_with('\\') {
775                            out.pop();
776                        }
777                        out.push_str("<br />\n");
778                    }
779                    HardBreak::None => out.push('\n'),
780                }
781                i += 1;
782                continue;
783            }
784
785            if let Some((ch, consumed)) = parse_escaped_char(rest) {
786                push_escaped_char(&mut out, ch);
787                i += consumed;
788                continue;
789            }
790
791            if rest.starts_with('`') {
792                if let Some((content, consumed)) = parse_code_span(rest) {
793                    out.push_str("<code>");
794                    out.push_str(&html_escape(content));
795                    out.push_str("</code>");
796                    i += consumed;
797                    continue;
798                }
799            }
800
801            if self.config.enable_footnotes && rest.starts_with("[^") {
802                if let Some(end) = rest.find(']') {
803                    let raw_id = &rest[2..end];
804                    let key = normalize_key(raw_id);
805                    if self.defs.footnotes.contains_key(&key) {
806                        let index = self.note_footnote(key.clone());
807                        let safe = footnote_id(&key);
808                        out.push_str(&format!(
809                            "<sup class=\"footnote-ref\"><a href=\"#fn-{safe}\" id=\"fnref-{safe}\">{index}</a></sup>"
810                        ));
811                        i += end + 1;
812                        continue;
813                    }
814                }
815            }
816
817            if rest.starts_with("![") {
818                if let Some((html, consumed)) = self.parse_image(rest) {
819                    out.push_str(&html);
820                    i += consumed;
821                    continue;
822                }
823            }
824
825            if rest.starts_with('[') {
826                if let Some((html, consumed)) = self.parse_link_like(rest) {
827                    out.push_str(&html);
828                    i += consumed;
829                    continue;
830                }
831            }
832
833            if let Some((html, consumed)) = parse_angle_autolink(rest) {
834                out.push_str(&html);
835                i += consumed;
836                continue;
837            }
838
839            if let Some((raw, consumed)) = parse_inline_html(rest) {
840                if !self.config.omit_raw_html {
841                    out.push_str(raw);
842                } else {
843                    out.push_str(RAW_HTML_OMITTED_MARKER);
844                }
845                i += consumed;
846                continue;
847            }
848
849            if self.config.enable_autolink_literals {
850                if let Some((href, text_value, consumed)) = parse_autolink_literal(rest) {
851                    let href_escaped = html_escape(&href);
852                    let text_escaped = html_escape(&text_value);
853                    out.push_str(&format!("<a href=\"{href_escaped}\">{text_escaped}</a>"));
854                    i += consumed;
855                    continue;
856                }
857            }
858
859            if let Some((content, consumed)) = wrapped(rest, "**") {
860                out.push_str("<strong>");
861                out.push_str(&self.parse_inlines(content));
862                out.push_str("</strong>");
863                i += consumed;
864                continue;
865            }
866
867            if let Some((content, consumed)) = wrapped(rest, "__") {
868                out.push_str("<strong>");
869                out.push_str(&self.parse_inlines(content));
870                out.push_str("</strong>");
871                i += consumed;
872                continue;
873            }
874
875            if self.config.enable_strikethrough {
876                if let Some((content, consumed)) = wrapped(rest, "~~") {
877                    out.push_str("<del>");
878                    out.push_str(&self.parse_inlines(content));
879                    out.push_str("</del>");
880                    i += consumed;
881                    continue;
882                }
883            }
884
885            if let Some((content, consumed)) = wrapped(rest, "*") {
886                out.push_str("<em>");
887                out.push_str(&self.parse_inlines(content));
888                out.push_str("</em>");
889                i += consumed;
890                continue;
891            }
892
893            if let Some((content, consumed)) = wrapped(rest, "_") {
894                out.push_str("<em>");
895                out.push_str(&self.parse_inlines(content));
896                out.push_str("</em>");
897                i += consumed;
898                continue;
899            }
900
901            if let Some(ch) = rest.chars().next() {
902                push_escaped_char(&mut out, ch);
903                i += ch.len_utf8();
904            } else {
905                break;
906            }
907        }
908
909        out
910    }
911
912    fn parse_image(&mut self, rest: &str) -> Option<(String, usize)> {
913        let (alt, consumed_label) = parse_bracketed_label(&rest[1..])?;
914        let after = &rest[1 + consumed_label..];
915
916        let (url, consumed_after) = parse_inline_link_target(after)?;
917        let html = format!(
918            "<img src=\"{}\" alt=\"{}\" />",
919            html_attr_escape(&url),
920            html_attr_escape(alt)
921        );
922        Some((html, 1 + consumed_label + consumed_after))
923    }
924
925    fn parse_link_like(&mut self, rest: &str) -> Option<(String, usize)> {
926        let (label, consumed_label) = parse_bracketed_label(rest)?;
927        let after = &rest[consumed_label..];
928
929        if let Some((url, consumed_after)) = parse_inline_link_target(after) {
930            let html = format!(
931                "<a href=\"{}\">{}</a>",
932                html_attr_escape(&url),
933                self.parse_inlines(label)
934            );
935            return Some((html, consumed_label + consumed_after));
936        }
937
938        if after.starts_with('[') {
939            let (raw_ref, consumed_ref) = parse_bracketed_label(after)?;
940            let key = if raw_ref.trim().is_empty() {
941                normalize_key(label)
942            } else {
943                normalize_key(raw_ref)
944            };
945            if let Some(url) = self.defs.links.get(&key) {
946                let html = format!(
947                    "<a href=\"{}\">{}</a>",
948                    html_attr_escape(url),
949                    self.parse_inlines(label)
950                );
951                return Some((html, consumed_label + consumed_ref));
952            }
953        }
954
955        let key = normalize_key(label);
956        if let Some(url) = self.defs.links.get(&key) {
957            let html = format!(
958                "<a href=\"{}\">{}</a>",
959                html_attr_escape(url),
960                self.parse_inlines(label)
961            );
962            return Some((html, consumed_label));
963        }
964
965        None
966    }
967
968    fn note_footnote(&mut self, id: String) -> usize {
969        if let Some(idx) = self.footnote_order.iter().position(|x| x == &id) {
970            idx + 1
971        } else {
972            self.footnote_order.push(id);
973            self.footnote_order.len()
974        }
975    }
976
977    fn render_footnotes(&mut self) -> String {
978        let mut out = String::new();
979        out.push_str("<section class=\"footnotes\">\n<ol>\n");
980
981        let footnote_ids = self.footnote_order.clone();
982        for id in footnote_ids {
983            let safe = footnote_id(&id);
984            let text = self.defs.footnotes.get(&id).cloned().unwrap_or_default();
985            out.push_str(&format!(
986                "<li id=\"fn-{safe}\">{} <a href=\"#fnref-{safe}\" class=\"footnote-backref\">↩</a></li>\n",
987                self.parse_inlines(text.trim())
988            ));
989        }
990
991        out.push_str("</ol>\n</section>\n");
992        out
993    }
994
995    fn is_skipped(&self, line: usize) -> bool {
996        self.defs.skip_lines.contains(&line)
997    }
998}
999
1000#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1001enum ListKind {
1002    Unordered,
1003    Ordered(usize),
1004}
1005
1006fn normalize_newlines(input: &str) -> String {
1007    input.replace("\r\n", "\n").replace('\r', "\n")
1008}
1009
1010fn collect_definitions(lines: &[&str], config: ParserConfig) -> DefinitionStore {
1011    let mut defs = DefinitionStore::default();
1012    let mut i = 0usize;
1013
1014    while i < lines.len() {
1015        let line = lines[i].trim();
1016
1017        if let Some((id, url)) = parse_link_definition(line) {
1018            defs.links.insert(normalize_key(id), url.to_string());
1019            defs.skip_lines.insert(i);
1020            i += 1;
1021            continue;
1022        }
1023
1024        if config.enable_footnotes {
1025            if let Some((id, first_text)) = parse_footnote_definition(line) {
1026                let mut text_parts = vec![first_text.to_string()];
1027                defs.skip_lines.insert(i);
1028                i += 1;
1029
1030                while i < lines.len() {
1031                    let next = lines[i];
1032                    if next.starts_with("    ") || next.starts_with('\t') {
1033                        text_parts.push(next.trim().to_string());
1034                        defs.skip_lines.insert(i);
1035                        i += 1;
1036                    } else {
1037                        break;
1038                    }
1039                }
1040
1041                defs.footnotes
1042                    .insert(normalize_key(id), text_parts.join(" "));
1043                continue;
1044            }
1045        }
1046
1047        i += 1;
1048    }
1049
1050    defs
1051}
1052
1053fn parse_atx_heading(line: &str) -> Option<(usize, &str)> {
1054    let trimmed = line.trim_start();
1055    let mut count = 0usize;
1056    for ch in trimmed.chars() {
1057        if ch == '#' {
1058            count += 1;
1059        } else {
1060            break;
1061        }
1062    }
1063    if count == 0 || count > 6 {
1064        return None;
1065    }
1066    let rest = trimmed[count..].trim_start();
1067    if rest.is_empty() {
1068        return None;
1069    }
1070    Some((count, rest.trim_end_matches('#').trim_end()))
1071}
1072
1073fn parse_setext_heading<'a>(lines: &'a [&str], pos: usize) -> Option<(usize, &'a str, usize)> {
1074    if pos + 1 >= lines.len() {
1075        return None;
1076    }
1077    if lines[pos].trim().is_empty() {
1078        return None;
1079    }
1080    if !can_be_setext_content_line(lines[pos]) {
1081        return None;
1082    }
1083
1084    let underline = lines[pos + 1].trim();
1085    if is_setext_underline(underline, '=') {
1086        return Some((1, lines[pos], pos + 2));
1087    }
1088    if is_setext_underline(underline, '-') {
1089        return Some((2, lines[pos], pos + 2));
1090    }
1091    None
1092}
1093
1094fn can_be_setext_content_line(line: &str) -> bool {
1095    !line.trim().is_empty()
1096        && !is_thematic_break(line)
1097        && parse_atx_heading(line).is_none()
1098        && !is_fence_start(line)
1099        && !is_indented_code_line(line)
1100        && !is_blockquote_line(line)
1101        && !is_html_line(line)
1102        && parse_list_prefix(line).is_none()
1103}
1104
1105fn is_setext_underline(line: &str, marker: char) -> bool {
1106    let trimmed = line.trim();
1107    !trimmed.is_empty() && trimmed.chars().all(|ch| ch == marker) && trimmed.len() >= 3
1108}
1109
1110fn is_thematic_break(line: &str) -> bool {
1111    let trimmed = line.trim();
1112    if trimmed.len() < 3 {
1113        return false;
1114    }
1115    let candidate: String = trimmed.chars().filter(|c| !c.is_whitespace()).collect();
1116    if candidate.len() < 3 {
1117        return false;
1118    }
1119    candidate.chars().all(|ch| ch == '-')
1120        || candidate.chars().all(|ch| ch == '*')
1121        || candidate.chars().all(|ch| ch == '_')
1122}
1123
1124fn is_fence_start(line: &str) -> bool {
1125    let trimmed = line.trim_start();
1126    trimmed.starts_with("```") || trimmed.starts_with("~~~")
1127}
1128
1129fn is_indented_code_line(line: &str) -> bool {
1130    strip_indented_code_prefix(line).is_some()
1131}
1132
1133fn strip_indented_code_prefix(line: &str) -> Option<&str> {
1134    if let Some(stripped) = line.strip_prefix("    ") {
1135        return Some(stripped);
1136    }
1137    line.strip_prefix('\t')
1138}
1139
1140fn is_blockquote_line(line: &str) -> bool {
1141    line.trim_start().starts_with('>')
1142}
1143
1144fn strip_blockquote_prefix(line: &str) -> &str {
1145    let trimmed = line.trim_start();
1146    let tail = trimmed.strip_prefix('>').unwrap_or(trimmed);
1147    tail.strip_prefix(' ').unwrap_or(tail)
1148}
1149
1150fn is_html_line(line: &str) -> bool {
1151    line.trim_start().starts_with('<')
1152}
1153
1154fn is_table_header(lines: &[&str], pos: usize) -> bool {
1155    if pos + 1 >= lines.len() {
1156        return false;
1157    }
1158    if !lines[pos].contains('|') {
1159        return false;
1160    }
1161    is_table_separator(lines[pos + 1])
1162}
1163
1164fn is_table_separator(line: &str) -> bool {
1165    let trimmed = line.trim();
1166    if !trimmed.contains('-') {
1167        return false;
1168    }
1169    let cells = split_table_row(trimmed);
1170    if cells.is_empty() {
1171        return false;
1172    }
1173    cells.into_iter().all(|cell| {
1174        let c = cell.trim();
1175        c.len() >= 3 && c.chars().all(|ch| ch == '-' || ch == ':')
1176    })
1177}
1178
1179fn split_table_row(line: &str) -> Vec<String> {
1180    line.trim()
1181        .trim_matches('|')
1182        .split('|')
1183        .map(|s| s.trim().to_string())
1184        .collect()
1185}
1186
1187fn parse_list_prefix(line: &str) -> Option<(ListKind, &str)> {
1188    parse_list_prefix_with_indent(line).map(|(kind, rest, _)| (kind, rest))
1189}
1190
1191fn parse_list_prefix_with_indent(line: &str) -> Option<(ListKind, &str, usize)> {
1192    let indent = leading_indent(line);
1193    let trimmed = line.trim_start_matches([' ', '\t']);
1194    if trimmed.len() < 2 {
1195        return None;
1196    }
1197
1198    if (trimmed.starts_with("- ") || trimmed.starts_with("* ") || trimmed.starts_with("+ "))
1199        && trimmed.len() > 2
1200    {
1201        return Some((ListKind::Unordered, &trimmed[2..], indent));
1202    }
1203
1204    let mut digits_end = 0usize;
1205    for (idx, ch) in trimmed.char_indices() {
1206        if ch.is_ascii_digit() {
1207            digits_end = idx + ch.len_utf8();
1208        } else {
1209            break;
1210        }
1211    }
1212
1213    if digits_end == 0 || digits_end + 2 > trimmed.len() {
1214        return None;
1215    }
1216
1217    let marker = trimmed.as_bytes()[digits_end] as char;
1218    if marker != '.' && marker != ')' {
1219        return None;
1220    }
1221    if trimmed.as_bytes()[digits_end + 1] != b' ' {
1222        return None;
1223    }
1224
1225    let start = trimmed[..digits_end].parse::<usize>().ok()?;
1226    Some((ListKind::Ordered(start), &trimmed[digits_end + 2..], indent))
1227}
1228
1229fn same_kind_value(current: ListKind, expected: ListKind) -> bool {
1230    matches!(
1231        (current, expected),
1232        (ListKind::Unordered, ListKind::Unordered) | (ListKind::Ordered(_), ListKind::Ordered(_))
1233    )
1234}
1235
1236fn leading_indent(line: &str) -> usize {
1237    let mut count = 0usize;
1238    for ch in line.chars() {
1239        match ch {
1240            ' ' => count += 1,
1241            '\t' => count += 4,
1242            _ => break,
1243        }
1244    }
1245    count
1246}
1247
1248fn dedent_list_continuation(line: &str, base_indent: usize) -> &str {
1249    if leading_indent(line) <= base_indent {
1250        return line.trim_start();
1251    }
1252    let mut removed_cols = 0usize;
1253    let mut byte_idx = 0usize;
1254    for (idx, ch) in line.char_indices() {
1255        match ch {
1256            ' ' => {
1257                removed_cols += 1;
1258                byte_idx = idx + 1;
1259            }
1260            '\t' => {
1261                removed_cols += 4;
1262                byte_idx = idx + 1;
1263            }
1264            _ => break,
1265        }
1266        if removed_cols >= base_indent + 2 {
1267            break;
1268        }
1269    }
1270    &line[byte_idx..]
1271}
1272
1273fn strip_single_paragraph_wrapper(html: &str) -> Option<&str> {
1274    if !html.starts_with("<p>") || !html.ends_with("</p>\n") {
1275        return None;
1276    }
1277    if html[3..html.len() - 5].contains("\n<p>") {
1278        return None;
1279    }
1280    Some(&html[3..html.len() - 5])
1281}
1282
1283fn is_fence_closing_line(line: &str, marker: char, min_len: usize) -> bool {
1284    let trimmed = line.trim_end();
1285    let count = trimmed.chars().take_while(|c| *c == marker).count();
1286    if count < min_len {
1287        return false;
1288    }
1289    trimmed[count..].trim().is_empty()
1290}
1291
1292fn parse_table_alignments(separator_line: &str) -> Vec<Option<&'static str>> {
1293    split_table_row(separator_line)
1294        .into_iter()
1295        .map(|cell| {
1296            let c = cell.trim();
1297            let starts = c.starts_with(':');
1298            let ends = c.ends_with(':');
1299            match (starts, ends) {
1300                (true, true) => Some("center"),
1301                (true, false) => Some("left"),
1302                (false, true) => Some("right"),
1303                (false, false) => None,
1304            }
1305        })
1306        .collect()
1307}
1308
1309fn push_table_cell_open(out: &mut String, tag: &str, align: Option<&str>) {
1310    if let Some(al) = align {
1311        out.push_str(&format!("<{tag} align=\"{al}\">"));
1312    } else {
1313        out.push_str(&format!("<{tag}>"));
1314    }
1315}
1316
1317fn is_block_start(lines: &[&str], pos: usize, config: ParserConfig) -> bool {
1318    parse_setext_heading(lines, pos).is_some()
1319        || is_thematic_break(lines[pos])
1320        || parse_atx_heading(lines[pos]).is_some()
1321        || is_fence_start(lines[pos])
1322        || is_indented_code_line(lines[pos])
1323        || is_blockquote_line(lines[pos])
1324        || is_html_line(lines[pos])
1325        || parse_list_prefix(lines[pos]).is_some()
1326        || (config.enable_tables && is_table_header(lines, pos))
1327}
1328
1329fn parse_task_item(item: &str) -> Option<(bool, &str)> {
1330    let trimmed = item.trim_start();
1331    if trimmed.len() < 4 || !trimmed.starts_with('[') {
1332        return None;
1333    }
1334    let close = trimmed.find(']')?;
1335    let marker = &trimmed[1..close];
1336    let checked = match marker.to_ascii_lowercase().as_str() {
1337        "x" => true,
1338        " " => false,
1339        _ => return None,
1340    };
1341    let rest = trimmed[close + 1..].trim_start();
1342    Some((checked, rest))
1343}
1344
1345fn parse_link_definition(line: &str) -> Option<(&str, &str)> {
1346    if !line.starts_with('[') || line.starts_with("[^") {
1347        return None;
1348    }
1349    let close = line.find("]:")?;
1350    let id = line[1..close].trim();
1351    let url = line[close + 2..].trim();
1352    if id.is_empty() || url.is_empty() {
1353        return None;
1354    }
1355    Some((id, url))
1356}
1357
1358fn parse_footnote_definition(line: &str) -> Option<(&str, &str)> {
1359    if !line.starts_with("[^") {
1360        return None;
1361    }
1362    let close = line.find("]:")?;
1363    let id = line[2..close].trim();
1364    let text = line[close + 2..].trim();
1365    if id.is_empty() {
1366        return None;
1367    }
1368    Some((id, text))
1369}
1370
1371#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1372enum HardBreak {
1373    None,
1374    Spaces,
1375    Backslash,
1376}
1377
1378fn detect_hard_break(text: &str, newline_idx: usize) -> HardBreak {
1379    if newline_idx == 0 {
1380        return HardBreak::None;
1381    }
1382
1383    let bytes = text.as_bytes();
1384    let mut idx = newline_idx;
1385    let mut spaces = 0usize;
1386    while idx > 0 && bytes[idx - 1] == b' ' {
1387        spaces += 1;
1388        idx -= 1;
1389    }
1390
1391    if spaces >= 2 {
1392        return HardBreak::Spaces;
1393    }
1394    if idx > 0 && bytes[idx - 1] == b'\\' {
1395        return HardBreak::Backslash;
1396    }
1397    HardBreak::None
1398}
1399
1400fn trim_trailing_spaces(out: &mut String) {
1401    while out.ends_with(' ') {
1402        out.pop();
1403    }
1404}
1405
1406fn parse_inline_link_target(after: &str) -> Option<(String, usize)> {
1407    if !after.starts_with('(') {
1408        return None;
1409    }
1410    let bytes = after.as_bytes();
1411    let mut i = 1usize;
1412
1413    while i < bytes.len() && bytes[i].is_ascii_whitespace() {
1414        i += 1;
1415    }
1416    if i >= bytes.len() {
1417        return None;
1418    }
1419
1420    let url_start = i;
1421    let url: String;
1422
1423    if bytes[i] == b'<' {
1424        i += 1;
1425        let start = i;
1426        while i < bytes.len() && bytes[i] != b'>' {
1427            if bytes[i] == b'\n' {
1428                return None;
1429            }
1430            i += 1;
1431        }
1432        if i >= bytes.len() {
1433            return None;
1434        }
1435        url = after[start..i].to_string();
1436        i += 1;
1437    } else {
1438        let mut depth = 0usize;
1439        while i < bytes.len() {
1440            let ch = bytes[i] as char;
1441            if ch == '\\' && i + 1 < bytes.len() {
1442                i += 2;
1443                continue;
1444            }
1445            if ch == '(' {
1446                depth += 1;
1447                i += 1;
1448                continue;
1449            }
1450            if ch == ')' {
1451                if depth == 0 {
1452                    break;
1453                }
1454                depth -= 1;
1455                i += 1;
1456                continue;
1457            }
1458            if ch.is_ascii_whitespace() && depth == 0 {
1459                break;
1460            }
1461            i += 1;
1462        }
1463        if i <= url_start {
1464            return None;
1465        }
1466        url = after[url_start..i].to_string();
1467    }
1468
1469    while i < bytes.len() && bytes[i].is_ascii_whitespace() {
1470        i += 1;
1471    }
1472
1473    if i < bytes.len() && (bytes[i] == b'"' || bytes[i] == b'\'' || bytes[i] == b'(') {
1474        let quote = bytes[i];
1475        let closing = if quote == b'(' { b')' } else { quote };
1476        i += 1;
1477        while i < bytes.len() && bytes[i] != closing {
1478            if bytes[i] == b'\\' && i + 1 < bytes.len() {
1479                i += 2;
1480            } else {
1481                i += 1;
1482            }
1483        }
1484        if i >= bytes.len() {
1485            return None;
1486        }
1487        i += 1;
1488        while i < bytes.len() && bytes[i].is_ascii_whitespace() {
1489            i += 1;
1490        }
1491    }
1492
1493    if i >= bytes.len() || bytes[i] != b')' {
1494        return None;
1495    }
1496
1497    Some((url, i + 1))
1498}
1499
1500fn parse_autolink_literal(text: &str) -> Option<(String, String, usize)> {
1501    if text.starts_with("https://") || text.starts_with("http://") {
1502        let link = parse_url_like_token(text)?;
1503        return Some((link.to_string(), link.to_string(), link.len()));
1504    }
1505    if text.starts_with("www.") {
1506        let link = parse_url_like_token(text)?;
1507        return Some((format!("http://{link}"), link.to_string(), link.len()));
1508    }
1509    if let Some((email, consumed)) = parse_email_literal(text) {
1510        return Some((format!("mailto:{email}"), email, consumed));
1511    }
1512    None
1513}
1514
1515fn parse_url_like_token(text: &str) -> Option<&str> {
1516    let mut end = 0usize;
1517    for (idx, ch) in text.char_indices() {
1518        if ch.is_whitespace() || ch == '<' {
1519            break;
1520        }
1521        end = idx + ch.len_utf8();
1522    }
1523    if end == 0 {
1524        return None;
1525    }
1526
1527    let mut link_end = end;
1528    while link_end > 0 {
1529        let ch = text[..link_end].chars().next_back().unwrap_or('\0');
1530        if matches!(ch, '.' | ',' | ';' | ':' | '!' | '?') {
1531            link_end -= ch.len_utf8();
1532        } else {
1533            break;
1534        }
1535    }
1536    if link_end == 0 {
1537        return None;
1538    }
1539    Some(&text[..link_end])
1540}
1541
1542fn parse_email_literal(text: &str) -> Option<(String, usize)> {
1543    let mut end = 0usize;
1544    let mut at_pos: Option<usize> = None;
1545
1546    for (idx, ch) in text.char_indices() {
1547        if ch.is_whitespace() || ch == '<' {
1548            break;
1549        }
1550        if ch == '@' {
1551            at_pos = Some(idx);
1552        }
1553        end = idx + ch.len_utf8();
1554    }
1555
1556    if end == 0 {
1557        return None;
1558    }
1559    let mut candidate_end = end;
1560    while candidate_end > 0 {
1561        let ch = text[..candidate_end].chars().next_back().unwrap_or('\0');
1562        if matches!(ch, '.' | ',' | ';' | ':' | '!' | '?') {
1563            candidate_end -= ch.len_utf8();
1564        } else {
1565            break;
1566        }
1567    }
1568    if candidate_end == 0 {
1569        return None;
1570    }
1571
1572    let candidate = &text[..candidate_end];
1573    let at = at_pos?;
1574    if at == 0 || at >= candidate.len() - 1 {
1575        return None;
1576    }
1577
1578    let local = &candidate[..at];
1579    let domain = &candidate[at + 1..];
1580    if !is_email_local(local) || !is_email_domain(domain) {
1581        return None;
1582    }
1583    Some((candidate.to_string(), candidate_end))
1584}
1585
1586fn is_email_local(local: &str) -> bool {
1587    !local.is_empty()
1588        && local.chars().all(|ch| {
1589            ch.is_ascii_alphanumeric()
1590                || matches!(
1591                    ch,
1592                    '!' | '#'
1593                        | '$'
1594                        | '%'
1595                        | '&'
1596                        | '\''
1597                        | '*'
1598                        | '+'
1599                        | '-'
1600                        | '/'
1601                        | '='
1602                        | '?'
1603                        | '^'
1604                        | '_'
1605                        | '`'
1606                        | '{'
1607                        | '|'
1608                        | '}'
1609                        | '~'
1610                        | '.'
1611                )
1612        })
1613}
1614
1615fn is_email_domain(domain: &str) -> bool {
1616    if domain.is_empty() || !domain.contains('.') {
1617        return false;
1618    }
1619    for label in domain.split('.') {
1620        if label.is_empty() || label.starts_with('-') || label.ends_with('-') {
1621            return false;
1622        }
1623        if !label
1624            .chars()
1625            .all(|ch| ch.is_ascii_alphanumeric() || ch == '-')
1626        {
1627            return false;
1628        }
1629    }
1630    true
1631}
1632
1633fn parse_angle_autolink(text: &str) -> Option<(String, usize)> {
1634    if !text.starts_with('<') {
1635        return None;
1636    }
1637    let end = text.find('>')?;
1638    let inner = &text[1..end];
1639    if inner.starts_with("http://") || inner.starts_with("https://") {
1640        let esc = html_escape(inner);
1641        return Some((format!("<a href=\"{esc}\">{esc}</a>"), end + 1));
1642    }
1643    if inner.contains('@') && !inner.contains(' ') {
1644        let esc = html_escape(inner);
1645        return Some((format!("<a href=\"mailto:{esc}\">{esc}</a>"), end + 1));
1646    }
1647    None
1648}
1649
1650fn parse_inline_html(text: &str) -> Option<(&str, usize)> {
1651    if !text.starts_with('<') {
1652        return None;
1653    }
1654
1655    if text.starts_with("<!--") {
1656        let end = text.find("-->")?;
1657        return Some((&text[..end + 3], end + 3));
1658    }
1659    if text.starts_with("<?") {
1660        let end = text.find("?>")?;
1661        return Some((&text[..end + 2], end + 2));
1662    }
1663    if text.starts_with("<!") {
1664        let end = text.find('>')?;
1665        return Some((&text[..end + 1], end + 1));
1666    }
1667
1668    let bytes = text.as_bytes();
1669    if bytes.len() < 3 {
1670        return None;
1671    }
1672
1673    let mut i = 1usize;
1674    if bytes[i] == b'/' {
1675        i += 1;
1676    }
1677
1678    let mut saw_alpha = false;
1679    while i < bytes.len() {
1680        let ch = bytes[i] as char;
1681        if ch.is_ascii_alphanumeric() || ch == '-' {
1682            saw_alpha = true;
1683            i += 1;
1684            continue;
1685        }
1686        break;
1687    }
1688    if !saw_alpha {
1689        return None;
1690    }
1691
1692    while i < bytes.len() {
1693        if bytes[i] == b'>' {
1694            return Some((&text[..i + 1], i + 1));
1695        }
1696        if bytes[i] == b'\n' {
1697            return None;
1698        }
1699        i += 1;
1700    }
1701    None
1702}
1703
1704fn parse_code_span(text: &str) -> Option<(&str, usize)> {
1705    let ticks = text.chars().take_while(|c| *c == '`').count();
1706    if ticks == 0 {
1707        return None;
1708    }
1709    let marker = "`".repeat(ticks);
1710    let rest = &text[ticks..];
1711    let end = rest.find(&marker)?;
1712    Some((&rest[..end], ticks + end + ticks))
1713}
1714
1715fn parse_escaped_char(text: &str) -> Option<(char, usize)> {
1716    if !text.starts_with('\\') {
1717        return None;
1718    }
1719    let mut chars = text.chars();
1720    chars.next()?;
1721    let ch = chars.next()?;
1722    Some((ch, 1 + ch.len_utf8()))
1723}
1724
1725fn parse_bracketed_label(text: &str) -> Option<(&str, usize)> {
1726    if !text.starts_with('[') {
1727        return None;
1728    }
1729
1730    let bytes = text.as_bytes();
1731    let mut i = 1usize;
1732    let mut depth = 0usize;
1733
1734    while i < bytes.len() {
1735        match bytes[i] {
1736            b'\\' => {
1737                i += 1;
1738                if i < bytes.len() {
1739                    i += 1;
1740                }
1741            }
1742            b'[' => {
1743                depth += 1;
1744                i += 1;
1745            }
1746            b']' => {
1747                if depth == 0 {
1748                    return Some((&text[1..i], i + 1));
1749                }
1750                depth -= 1;
1751                i += 1;
1752            }
1753            _ => i += 1,
1754        }
1755    }
1756
1757    None
1758}
1759
1760fn wrapped<'a>(text: &'a str, marker: &str) -> Option<(&'a str, usize)> {
1761    if !text.starts_with(marker) {
1762        return None;
1763    }
1764    if text.len() <= marker.len() * 2 {
1765        return None;
1766    }
1767    let tail = &text[marker.len()..];
1768    let end = tail.find(marker)?;
1769    if end == 0 {
1770        return None;
1771    }
1772    Some((&tail[..end], marker.len() + end + marker.len()))
1773}
1774
1775fn normalize_key(text: &str) -> String {
1776    text.trim().to_ascii_lowercase()
1777}
1778
1779fn footnote_id(key: &str) -> String {
1780    let mut out = String::with_capacity(key.len());
1781    for ch in key.chars() {
1782        if ch.is_ascii_alphanumeric() || ch == '-' || ch == '_' {
1783            out.push(ch);
1784        } else {
1785            out.push('-');
1786        }
1787    }
1788    out
1789}
1790
1791fn push_escaped_char(out: &mut String, ch: char) {
1792    match ch {
1793        '&' => out.push_str("&amp;"),
1794        '<' => out.push_str("&lt;"),
1795        '>' => out.push_str("&gt;"),
1796        '"' => out.push_str("&quot;"),
1797        '\'' => out.push_str("&#39;"),
1798        _ => out.push(ch),
1799    }
1800}
1801
1802fn html_escape(text: &str) -> String {
1803    let mut out = String::with_capacity(text.len());
1804    for ch in text.chars() {
1805        push_escaped_char(&mut out, ch);
1806    }
1807    out
1808}
1809
1810fn html_attr_escape(text: &str) -> String {
1811    html_escape(text)
1812}
1813
1814#[cfg(test)]
1815mod tests {
1816    use super::{parse, parse_with_flavor, safe_parse, safe_parse_with_flavor, MarkdownFlavor};
1817
1818    #[test]
1819    fn renders_table_in_gfm() {
1820        let md = "| a | b |\n|---|---|\n| 1 | 2 |";
1821        let html = parse(md);
1822        assert!(html.contains("<table>"));
1823        assert!(html.contains("<thead>"));
1824        assert!(html.contains("<tbody>"));
1825    }
1826
1827    #[test]
1828    fn does_not_render_table_in_commonmark() {
1829        let md = "| a | b |\n|---|---|\n| 1 | 2 |";
1830        let html = parse_with_flavor(md, MarkdownFlavor::CommonMark);
1831        assert!(!html.contains("<table>"));
1832    }
1833
1834    #[test]
1835    fn renders_strikethrough_only_in_gfm() {
1836        let gfm = parse_with_flavor("~~done~~", MarkdownFlavor::Gfm);
1837        let cm = parse_with_flavor("~~done~~", MarkdownFlavor::CommonMark);
1838        assert!(gfm.contains("<del>done</del>"));
1839        assert!(!cm.contains("<del>done</del>"));
1840    }
1841
1842    #[test]
1843    fn renders_task_list_only_in_gfm() {
1844        let gfm = parse_with_flavor("- [x] finish", MarkdownFlavor::Gfm);
1845        let cm = parse_with_flavor("- [x] finish", MarkdownFlavor::CommonMark);
1846        assert!(gfm.contains("type=\"checkbox\""));
1847        assert!(!cm.contains("type=\"checkbox\""));
1848    }
1849
1850    #[test]
1851    fn renders_autolink_literal_only_in_gfm() {
1852        let gfm = parse_with_flavor("visit https://example.com now", MarkdownFlavor::Gfm);
1853        let cm = parse_with_flavor("visit https://example.com now", MarkdownFlavor::CommonMark);
1854        assert!(gfm.contains("<a href=\"https://example.com\">https://example.com</a>"));
1855        assert!(!cm.contains("<a href=\"https://example.com\">https://example.com</a>"));
1856    }
1857
1858    #[test]
1859    fn renders_footnotes_only_in_gfm() {
1860        let md = "note[^1]\n\n[^1]: footnote";
1861        let gfm = parse_with_flavor(md, MarkdownFlavor::Gfm);
1862        let cm = parse_with_flavor(md, MarkdownFlavor::CommonMark);
1863        assert!(gfm.contains("footnote-ref"));
1864        assert!(gfm.contains("footnotes"));
1865        assert!(!cm.contains("footnote-ref"));
1866    }
1867
1868    #[test]
1869    fn renders_reference_links() {
1870        let md = "[Rust]\n\n[Rust]: https://www.rust-lang.org/";
1871        let html = parse(md);
1872        assert!(html.contains("<a href=\"https://www.rust-lang.org/\">Rust</a>"));
1873    }
1874
1875    #[test]
1876    fn blocks_script_in_safe_parse() {
1877        let md = "<script>alert(1)</script>";
1878        assert!(safe_parse(md).is_err());
1879    }
1880
1881    #[test]
1882    fn safe_parse_flavor_works() {
1883        let html = safe_parse_with_flavor("~~x~~", MarkdownFlavor::CommonMark).unwrap();
1884        assert!(!html.contains("<del>x</del>"));
1885    }
1886
1887    #[test]
1888    fn renders_ordered_list_with_start() {
1889        let html = parse("3. three\n4. four");
1890        assert!(html.contains("<ol start=\"3\">"));
1891        assert!(html.contains("<li>three</li>"));
1892    }
1893
1894    #[test]
1895    fn renders_nested_list() {
1896        let html = parse("- parent\n  - child\n- next");
1897        assert!(html.matches("<ul>").count() >= 2);
1898        assert!(html.contains("child"));
1899    }
1900
1901    #[test]
1902    fn parses_link_with_title_and_parentheses() {
1903        let html = parse("[x](https://example.com/a_(b) \"title\")");
1904        assert!(html.contains("href=\"https://example.com/a_(b)\""));
1905    }
1906
1907    #[test]
1908    fn renders_gfm_literal_www_and_email_autolinks() {
1909        let html = parse_with_flavor(
1910            "visit www.example.com or me@example.com",
1911            MarkdownFlavor::Gfm,
1912        );
1913        assert!(html.contains("href=\"http://www.example.com\""));
1914        assert!(html.contains("href=\"mailto:me@example.com\""));
1915    }
1916
1917    #[test]
1918    fn renders_hard_line_breaks() {
1919        let html_spaces = parse("a  \nb");
1920        let html_backslash = parse("a\\\nb");
1921        assert!(html_spaces.contains("a<br />\nb"));
1922        assert!(html_backslash.contains("a<br />\nb"));
1923    }
1924
1925    #[test]
1926    fn parse_preserves_inline_html_in_gfm_and_commonmark() {
1927        let cm = parse_with_flavor("x <span>y</span>", MarkdownFlavor::CommonMark);
1928        let gfm = parse_with_flavor("x <span>y</span>", MarkdownFlavor::Gfm);
1929        assert!(cm.contains("<span>y</span>"));
1930        assert!(gfm.contains("<span>y</span>"));
1931    }
1932
1933    #[test]
1934    fn parse_preserves_html_block_in_gfm_and_commonmark() {
1935        let cm = parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::CommonMark);
1936        let gfm = parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::Gfm);
1937        assert!(cm.contains("<div>"));
1938        assert!(cm.contains("</div>"));
1939        assert!(gfm.contains("<div>"));
1940        assert!(gfm.contains("</div>"));
1941    }
1942
1943    #[test]
1944    fn safe_parse_rejects_inline_html() {
1945        let cm = safe_parse_with_flavor("x <span>y</span>", MarkdownFlavor::CommonMark);
1946        let gfm = safe_parse_with_flavor("x <span>y</span>", MarkdownFlavor::Gfm);
1947        assert!(cm.is_err());
1948        assert!(gfm.is_err());
1949    }
1950
1951    #[test]
1952    fn safe_parse_rejects_html_block() {
1953        let cm = safe_parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::CommonMark);
1954        let gfm = safe_parse_with_flavor("<div>\ninside\n</div>", MarkdownFlavor::Gfm);
1955        assert!(cm.is_err());
1956        assert!(gfm.is_err());
1957    }
1958
1959    #[test]
1960    fn supports_setext_heading_and_blockquote() {
1961        let html = parse("Title\n---\n\n> quote");
1962        assert!(html.contains("<h2>Title</h2>"));
1963        assert!(html.contains("<blockquote>"));
1964    }
1965
1966    #[test]
1967    fn supports_table_alignment_in_gfm() {
1968        let md = "| a | b | c |\n| :-- | :-: | --: |\n| 1 | 2 | 3 |";
1969        let html = parse(md);
1970        assert!(html.contains("<th align=\"left\">a</th>"));
1971        assert!(html.contains("<th align=\"center\">b</th>"));
1972        assert!(html.contains("<th align=\"right\">c</th>"));
1973    }
1974
1975    #[test]
1976    fn renders_mermaid_chart_in_gfm() {
1977        let md = "```mermaid\nflowchart TD\nA-->B\n```";
1978        let html = parse_with_flavor(md, MarkdownFlavor::Gfm);
1979        assert!(html.contains("<pre class=\"mermaid\">flowchart TD\nA--&gt;B</pre>"));
1980    }
1981
1982    #[test]
1983    fn keeps_mermaid_as_code_in_commonmark() {
1984        let md = "```mermaid\nflowchart TD\nA-->B\n```";
1985        let html = parse_with_flavor(md, MarkdownFlavor::CommonMark);
1986        assert!(html
1987            .contains("<pre><code class=\"language-mermaid\">flowchart TD\nA--&gt;B</code></pre>"));
1988    }
1989
1990    #[test]
1991    fn appends_mermaid_runtime_for_gfm_file_output() {
1992        let html = super::with_chart_runtime_if_needed(
1993            "<pre class=\"mermaid\">graph TD\nA--&gt;B</pre>\n".to_string(),
1994            MarkdownFlavor::Gfm,
1995        );
1996        assert!(html.contains("mermaid.min.js"));
1997        assert!(html.contains("mermaid.initialize({ startOnLoad: true })"));
1998    }
1999
2000    #[test]
2001    fn does_not_append_mermaid_runtime_for_commonmark() {
2002        let html = super::with_chart_runtime_if_needed(
2003            "<pre><code class=\"language-mermaid\">graph TD\nA--&gt;B</code></pre>\n".to_string(),
2004            MarkdownFlavor::CommonMark,
2005        );
2006        assert!(!html.contains("mermaid.min.js"));
2007    }
2008
2009    #[test]
2010    fn safe_parse_blocks_script_variants() {
2011        assert!(safe_parse("<script>alert(1)</script>").is_err());
2012        assert!(safe_parse("<ScRiPt src=x></ScRiPt>").is_err());
2013        assert!(safe_parse("< / script >").is_err());
2014        assert!(safe_parse("<  script>").is_err());
2015    }
2016
2017    #[test]
2018    fn renders_link_wrapped_image_badge() {
2019        let md = "[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?logo=telegram&logoColor=white)](https://t.me/+Ka9i6CNwe71hMWQy)";
2020        let html = parse(md);
2021        assert!(html.contains(
2022            "<a href=\"https://t.me/+Ka9i6CNwe71hMWQy\"><img src=\"https://img.shields.io/badge/Telegram-2CA5E0?logo=telegram&amp;logoColor=white\" alt=\"Telegram\" /></a>"
2023        ));
2024    }
2025
2026    #[test]
2027    fn renders_discord_and_telegram_badges_together() {
2028        let md = "![Discord](https://discord.gg/2xrMh7qX6m)⠀[![Telegram](https://img.shields.io/badge/Telegram-2CA5E0?logo=telegram&logoColor=white)](https://t.me/+Ka9i6CNwe71hMWQy)";
2029        let html = parse(md);
2030        assert!(html.contains("<img src=\"https://discord.gg/2xrMh7qX6m\" alt=\"Discord\" />"));
2031        assert!(html.contains(
2032            "<a href=\"https://t.me/+Ka9i6CNwe71hMWQy\"><img src=\"https://img.shields.io/badge/Telegram-2CA5E0?logo=telegram&amp;logoColor=white\" alt=\"Telegram\" /></a>"
2033        ));
2034    }
2035}
umark_lib/lib.rs

umark_lib/
lib.rs