Skip to main content

mq_markdown/
markdown.rs

1#[cfg(feature = "html-to-markdown")]
2use crate::html_to_markdown;
3#[cfg(feature = "html-to-markdown")]
4use crate::html_to_markdown::ConversionOptions;
5use crate::node::{ColorTheme, Node, Position, RenderOptions, TableAlign, TableCell, render_values};
6use markdown::Constructs;
7use miette::miette;
8use std::{fmt, str::FromStr};
9
10#[derive(Debug, Clone)]
11pub struct Markdown {
12    pub nodes: Vec<Node>,
13    pub options: RenderOptions,
14}
15
16impl FromStr for Markdown {
17    type Err = miette::Error;
18
19    fn from_str(content: &str) -> Result<Self, Self::Err> {
20        Self::from_markdown_str(content)
21    }
22}
23
24impl fmt::Display for Markdown {
25    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
26        write!(f, "{}", self.render_with_theme(&ColorTheme::PLAIN))
27    }
28}
29
30impl Markdown {
31    pub fn new(nodes: Vec<Node>) -> Self {
32        Self {
33            nodes,
34            options: RenderOptions::default(),
35        }
36    }
37
38    pub fn set_options(&mut self, options: RenderOptions) {
39        self.options = options;
40    }
41
42    /// Returns a colored string representation of the markdown using ANSI escape codes.
43    #[cfg(feature = "color")]
44    pub fn to_colored_string(&self) -> String {
45        self.render_with_theme(&ColorTheme::COLORED)
46    }
47
48    /// Returns a colored string representation using the given color theme.
49    #[cfg(feature = "color")]
50    pub fn to_colored_string_with_theme(&self, theme: &ColorTheme<'_>) -> String {
51        self.render_with_theme(theme)
52    }
53
54    fn render_with_theme(&self, theme: &ColorTheme<'_>) -> String {
55        let mut pre_position: Option<Position> = None;
56        let mut is_first = true;
57        let mut current_table_row: Option<usize> = None;
58        let mut in_table = false;
59
60        let mut buffer = String::with_capacity(self.nodes.len() * 50);
61
62        for (i, node) in self.nodes.iter().enumerate() {
63            if let Node::TableCell(TableCell { row, values, .. }) = node {
64                let value = render_values(values, &self.options, theme);
65
66                let is_new_row = current_table_row != Some(*row);
67
68                if is_new_row {
69                    if current_table_row.is_some() {
70                        buffer.push_str("|\n");
71                    } else if !in_table && let Some(pos) = node.position() {
72                        // Insert newlines before the first row of a table
73                        let new_line_count = pre_position
74                            .as_ref()
75                            .map(|p| pos.start.line.saturating_sub(p.end.line))
76                            .unwrap_or_else(|| if is_first { 0 } else { 1 })
77                            .min(2);
78                        for _ in 0..new_line_count {
79                            buffer.push('\n');
80                        }
81                    }
82                    current_table_row = Some(*row);
83                }
84
85                buffer.push('|');
86                buffer.push_str(&value);
87
88                let next_node = self.nodes.get(i + 1);
89                let next_is_different_row = next_node.is_none_or(
90                    |next| !matches!(next, Node::TableCell(TableCell { row: next_row, .. }) if *next_row == *row),
91                );
92
93                if next_is_different_row {
94                    buffer.push_str("|\n");
95                    current_table_row = None;
96                }
97
98                pre_position = node.position();
99                is_first = false;
100                in_table = true;
101                continue;
102            }
103
104            if let Node::TableAlign(TableAlign { align, .. }) = node {
105                use itertools::Itertools;
106                buffer.push('|');
107                buffer.push_str(&align.iter().map(|a| a.to_string()).join("|"));
108                buffer.push_str("|\n");
109                pre_position = node.position();
110                is_first = false;
111                in_table = true;
112                continue;
113            }
114
115            current_table_row = None;
116            in_table = false;
117
118            let value = node.render_with_theme(&self.options, theme);
119
120            if value.is_empty() || value == "\n" {
121                pre_position = None;
122                continue;
123            }
124
125            if let Some(pos) = node.position() {
126                let new_line_count = pre_position
127                    .as_ref()
128                    .map(|p| pos.start.line.saturating_sub(p.end.line))
129                    .unwrap_or_else(|| if is_first { 0 } else { 1 })
130                    .min(2);
131
132                pre_position = Some(pos.clone());
133
134                for _ in 0..new_line_count {
135                    buffer.push('\n');
136                }
137                buffer.push_str(&value);
138            } else {
139                if !is_first {
140                    buffer.push('\n');
141                }
142                pre_position = None;
143                buffer.push_str(&value);
144            }
145
146            if is_first {
147                is_first = false;
148            }
149        }
150
151        if buffer.is_empty() || buffer.ends_with('\n') {
152            buffer
153        } else {
154            buffer.push('\n');
155            buffer
156        }
157    }
158
159    pub fn from_mdx_str(content: &str) -> miette::Result<Self> {
160        let root = markdown::to_mdast(content, &markdown::ParseOptions::mdx()).map_err(|e| miette!(e.reason))?;
161        let nodes = Node::from_mdast_node(root);
162
163        Ok(Self {
164            nodes,
165            options: RenderOptions::default(),
166        })
167    }
168
169    pub fn to_html(&self) -> String {
170        markdown::to_html(self.to_string().as_str())
171    }
172
173    pub fn to_text(&self) -> String {
174        let mut result = String::with_capacity(self.nodes.len() * 20); // Reasonable estimate
175        for node in &self.nodes {
176            result.push_str(&node.value());
177            result.push('\n');
178        }
179        result
180    }
181
182    #[cfg(feature = "json")]
183    pub fn to_json(&self) -> miette::Result<String> {
184        let nodes = self
185            .nodes
186            .iter()
187            .filter(|node| !node.is_empty() && !node.is_empty_fragment())
188            .collect::<Vec<_>>();
189        serde_json::to_string_pretty(&nodes).map_err(|e| miette!("Failed to serialize to JSON: {}", e))
190    }
191
192    #[cfg(feature = "html-to-markdown")]
193    pub fn from_html_str(content: &str) -> miette::Result<Self> {
194        Self::from_html_str_with_options(content, ConversionOptions::default())
195    }
196
197    #[cfg(feature = "html-to-markdown")]
198    pub fn from_html_str_with_options(content: &str, options: ConversionOptions) -> miette::Result<Self> {
199        html_to_markdown::convert_html_to_markdown(content, options)
200            .map_err(|e| miette!(e))
201            .and_then(|md_string| Self::from_markdown_str(&md_string))
202    }
203
204    pub fn from_markdown_str(content: &str) -> miette::Result<Self> {
205        let root = markdown::to_mdast(
206            content,
207            &markdown::ParseOptions {
208                gfm_strikethrough_single_tilde: true,
209                math_text_single_dollar: true,
210                mdx_expression_parse: None,
211                mdx_esm_parse: None,
212                constructs: Constructs {
213                    attention: true,
214                    autolink: true,
215                    block_quote: true,
216                    character_escape: true,
217                    character_reference: true,
218                    code_indented: true,
219                    code_fenced: true,
220                    code_text: true,
221                    definition: true,
222                    frontmatter: true,
223                    gfm_autolink_literal: true,
224                    gfm_label_start_footnote: true,
225                    gfm_footnote_definition: true,
226                    gfm_strikethrough: true,
227                    gfm_table: true,
228                    gfm_task_list_item: true,
229                    hard_break_escape: true,
230                    hard_break_trailing: true,
231                    heading_atx: true,
232                    heading_setext: true,
233                    html_flow: true,
234                    html_text: true,
235                    label_start_image: true,
236                    label_start_link: true,
237                    label_end: true,
238                    list_item: true,
239                    math_flow: true,
240                    math_text: true,
241                    mdx_esm: false,
242                    mdx_expression_flow: false,
243                    mdx_expression_text: false,
244                    mdx_jsx_flow: false,
245                    mdx_jsx_text: false,
246                    thematic_break: true,
247                },
248            },
249        )
250        .map_err(|e| miette!(e.reason))?;
251        let nodes = Node::from_mdast_node(root);
252
253        Ok(Self {
254            nodes,
255            options: RenderOptions::default(),
256        })
257    }
258}
259
260pub fn to_html(s: &str) -> String {
261    markdown::to_html(s)
262}
263
264#[cfg(test)]
265mod tests {
266    use rstest::rstest;
267
268    use crate::{ListStyle, TitleSurroundStyle, UrlSurroundStyle};
269
270    use super::*;
271
272    #[rstest]
273    #[case::header("# Title", 1, "# Title\n")]
274    #[case::header("# Title\nParagraph", 2, "# Title\nParagraph\n")]
275    #[case::header("# Title\n\nParagraph", 2, "# Title\n\nParagraph\n")]
276    #[case::list("- Item 1\n- Item 2", 2, "- Item 1\n- Item 2\n")]
277    #[case::quote("> Quote\n>Second line", 1, "> Quote\n> Second line\n")]
278    #[case::code("```rust\nlet x = 1;\n```", 1, "```rust\nlet x = 1;\n```\n")]
279    #[case::toml("+++\n[test]\ntest = 1\n+++", 1, "+++\n[test]\ntest = 1\n+++\n")]
280    #[case::code_inline("`inline`", 1, "`inline`\n")]
281    #[case::math_inline("$math$", 1, "$math$\n")]
282    #[case::math("$$\nmath\n$$", 1, "$$\nmath\n$$\n")]
283    #[case::html("<div>test</div>", 1, "<div>test</div>\n")]
284    #[case::footnote("[^a]: b", 1, "[^a]: b\n")]
285    #[case::definition("[a]: b", 1, "[a]: b\n")]
286    #[case::footnote("[^a]: b", 1, "[^a]: b\n")]
287    #[case::footnote_ref("[^a]: b\n\n[^a]", 2, "[^a]: b\n[^a]\n")]
288    #[case::image("![a](b)", 1, "![a](b)\n")]
289    #[case::image_with_title("![a](b \"c\")", 1, "![a](b \"c\")\n")]
290    #[case::image_ref("[a]: b\n\n ![c][a]", 2, "[a]: b\n\n![c][a]\n")]
291    #[case::yaml(
292        "---\ntitle: Test\ndescription: YAML front matter\n---\n",
293        1,
294        "---\ntitle: Test\ndescription: YAML front matter\n---\n"
295    )]
296    #[case::link("[a](b)", 1, "[a](b)\n")]
297    #[case::link_ref("[a]: b\n\n[c][a]", 2, "[a]: b\n\n[c][a]\n")]
298    #[case::break_("a\\b", 1, "a\\b\n")]
299    #[case::delete("~~a~~", 1, "~~a~~\n")]
300    #[case::emphasis("*a*", 1, "*a*\n")]
301    #[case::horizontal_rule("---", 1, "---\n")]
302    #[case::table(
303        "| Column1 | Column2 | Column3 |\n|:--------|:--------:|---------:|\n| Left    | Center  | Right   |\n",
304        7,
305        "|Column1|Column2|Column3|\n|:---|:---:|---:|\n|Left|Center|Right|\n"
306    )]
307    #[case::table_after_paragraph(
308        "Paragraph\n\n| A | B |\n|---|---|\n| 1 | 2 |\n",
309        6,
310        "Paragraph\n\n|A|B|\n|---|---|\n|1|2|\n"
311    )]
312    #[case::table_after_heading(
313        "# Title\n\n| A | B |\n|---|---|\n| 1 | 2 |\n",
314        6,
315        "# Title\n\n|A|B|\n|---|---|\n|1|2|\n"
316    )]
317    #[case::excessive_blank_lines("# Title\n\n\n\nParagraph", 2, "# Title\n\nParagraph\n")]
318    #[case::three_blank_lines("Para 1\n\n\n\n\nPara 2", 2, "Para 1\n\nPara 2\n")]
319    fn test_markdown_from_str(#[case] input: &str, #[case] expected_nodes: usize, #[case] expected_output: &str) {
320        let md = input.parse::<Markdown>().unwrap();
321        assert_eq!(md.nodes.len(), expected_nodes);
322        assert_eq!(md.to_string(), expected_output);
323    }
324
325    #[rstest]
326    #[case::mdx("{test}", 1, "{test}\n")]
327    #[case::mdx("<a />", 1, "<a />\n")]
328    #[case::mdx("<MyComponent {...props}/>", 1, "<MyComponent {...props} />\n")]
329    #[case::mdx("text<MyComponent {...props}/>text", 3, "text<MyComponent {...props} />text\n")]
330    #[case::mdx(
331        "<Chart color=\"#fcb32c\" year={year} />",
332        1,
333        "<Chart color=\"#fcb32c\" year={year} />\n"
334    )]
335    fn test_markdown_from_mdx_str(#[case] input: &str, #[case] expected_nodes: usize, #[case] expected_output: &str) {
336        let md = Markdown::from_mdx_str(input).unwrap();
337        assert_eq!(md.nodes.len(), expected_nodes);
338        assert_eq!(md.to_string(), expected_output);
339    }
340
341    #[test]
342    fn test_markdown_to_html() {
343        let md = "# Hello".parse::<Markdown>().unwrap();
344        let html = md.to_html();
345        assert_eq!(html, "<h1>Hello</h1>\n");
346    }
347
348    #[test]
349    fn test_markdown_to_text() {
350        let md = "# Hello\n\nWorld".parse::<Markdown>().unwrap();
351        let text = md.to_text();
352        assert_eq!(text, "Hello\nWorld\n");
353    }
354
355    #[test]
356    fn test_render_options() {
357        let mut md = "- Item 1\n- Item 2".parse::<Markdown>().unwrap();
358        assert_eq!(md.options, RenderOptions::default());
359
360        md.set_options(RenderOptions {
361            list_style: ListStyle::Plus,
362            ..RenderOptions::default()
363        });
364        assert_eq!(md.options.list_style, ListStyle::Plus);
365
366        let pretty = md.to_string();
367        assert!(pretty.contains("+ Item 1"));
368    }
369
370    #[test]
371    fn test_display_simple() {
372        let md = "# Header\nParagraph".parse::<Markdown>().unwrap();
373        assert_eq!(md.to_string(), "# Header\nParagraph\n");
374    }
375
376    #[test]
377    fn test_display_with_empty_nodes() {
378        let md = "# Header\nContent".parse::<Markdown>().unwrap();
379        assert_eq!(md.to_string(), "# Header\nContent\n");
380    }
381
382    #[test]
383    fn test_display_with_newlines() {
384        let md = "# Header\n\nParagraph 1\n\nParagraph 2".parse::<Markdown>().unwrap();
385        assert_eq!(md.to_string(), "# Header\n\nParagraph 1\n\nParagraph 2\n");
386    }
387
388    #[test]
389    fn test_display_format_lists() {
390        let md = "- Item 1\n- Item 2\n- Item 3".parse::<Markdown>().unwrap();
391        assert_eq!(md.to_string(), "- Item 1\n- Item 2\n- Item 3\n");
392    }
393
394    #[test]
395    fn test_display_with_different_list_styles() {
396        let mut md = "- Item 1\n- Item 2".parse::<Markdown>().unwrap();
397
398        md.set_options(RenderOptions {
399            list_style: ListStyle::Star,
400            link_title_style: TitleSurroundStyle::default(),
401            link_url_style: UrlSurroundStyle::default(),
402        });
403
404        let formatted = md.to_string();
405        assert!(formatted.contains("* Item 1"));
406        assert!(formatted.contains("* Item 2"));
407    }
408
409    #[test]
410    fn test_display_with_ordered_list() {
411        let md = "1. Item 1\n2. Item 2\n\n3. Item 2".parse::<Markdown>().unwrap();
412        let formatted = md.to_string();
413
414        assert!(formatted.contains("1. Item 1"));
415        assert!(formatted.contains("2. Item 2"));
416        assert!(formatted.contains("3. Item 2"));
417    }
418}
419
420#[cfg(test)]
421#[cfg(feature = "color")]
422mod color_tests {
423    use rstest::rstest;
424
425    use super::*;
426
427    #[rstest]
428    #[case::heading("# Title", "\x1b[1m\x1b[36m# Title\x1b[0m\n")]
429    #[case::emphasis("*italic*", "\x1b[3m\x1b[33m*italic*\x1b[0m\n")]
430    #[case::strong("**bold**", "\x1b[1m**bold**\x1b[0m\n")]
431    #[case::code_inline("`code`", "\x1b[32m`code`\x1b[0m\n")]
432    #[case::code_block("```rust\nlet x = 1;\n```", "\x1b[32m```rust\nlet x = 1;\n```\x1b[0m\n")]
433    #[case::link("[text](url)", "\x1b[4m\x1b[34m[text](url)\x1b[0m\n")]
434    #[case::image("![alt](url)", "\x1b[35m![alt](url)\x1b[0m\n")]
435    #[case::delete("~~deleted~~", "\x1b[31m\x1b[2m~~deleted~~\x1b[0m\n")]
436    #[case::horizontal_rule("---", "\x1b[2m---\x1b[0m\n")]
437    #[case::blockquote("> quote", "\x1b[2m> \x1b[0mquote\n")]
438    #[case::math_inline("$x^2$", "\x1b[32m$x^2$\x1b[0m\n")]
439    #[case::list("- item", "\x1b[33m-\x1b[0m item\n")]
440    fn test_to_colored_string(#[case] input: &str, #[case] expected: &str) {
441        let md = input.parse::<Markdown>().unwrap();
442        assert_eq!(md.to_colored_string(), expected);
443    }
444
445    #[test]
446    fn test_colored_output_contains_ansi_codes() {
447        let md = "# Hello\n\n**bold** and *italic*".parse::<Markdown>().unwrap();
448        let colored = md.to_colored_string();
449
450        assert!(colored.contains("\x1b["));
451        assert!(colored.contains("\x1b[0m"));
452    }
453
454    #[test]
455    fn test_plain_output_has_no_ansi_codes() {
456        let md = "# Hello\n\n**bold** and *italic*".parse::<Markdown>().unwrap();
457        let plain = md.to_string();
458
459        assert!(!plain.contains("\x1b["));
460    }
461
462    #[test]
463    fn test_parse_colors_overrides_specified_keys() {
464        let theme = ColorTheme::parse_colors("heading=1;31:code=34");
465        assert_eq!(theme.heading.0, "\x1b[1;31m");
466        assert_eq!(theme.heading.1, "\x1b[0m");
467        assert_eq!(theme.code.0, "\x1b[34m");
468        assert_eq!(theme.code.1, "\x1b[0m");
469        // Unspecified keys remain default
470        assert_eq!(theme.emphasis, ColorTheme::COLORED.emphasis);
471    }
472
473    #[test]
474    fn test_parse_colors_ignores_invalid_entries() {
475        let theme = ColorTheme::parse_colors("heading=abc:code=32:=:badformat");
476        // Invalid "abc" is skipped, heading stays default
477        assert_eq!(theme.heading, ColorTheme::COLORED.heading);
478        // Valid "32" is applied
479        assert_eq!(theme.code.0, "\x1b[32m");
480        assert_eq!(theme.code.1, "\x1b[0m");
481    }
482
483    #[test]
484    fn test_parse_colors_ignores_unknown_keys() {
485        let theme = ColorTheme::parse_colors("unknown=31:heading=33");
486        assert_eq!(theme.heading.0, "\x1b[33m");
487        assert_eq!(theme.heading.1, "\x1b[0m");
488    }
489
490    #[test]
491    fn test_parse_colors_all_keys() {
492        let theme = ColorTheme::parse_colors(
493            "heading=1:code=2:code_inline=3:emphasis=4:strong=5:link=6:link_url=7:\
494             image=8:blockquote=9:delete=10:hr=11:html=12:frontmatter=13:list=14:\
495             table=15:math=16",
496        );
497        assert_eq!(theme.heading.0, "\x1b[1m");
498        assert_eq!(theme.code.0, "\x1b[2m");
499        assert_eq!(theme.code_inline.0, "\x1b[3m");
500        assert_eq!(theme.emphasis.0, "\x1b[4m");
501        assert_eq!(theme.strong.0, "\x1b[5m");
502        assert_eq!(theme.link.0, "\x1b[6m");
503        assert_eq!(theme.link_url.0, "\x1b[7m");
504        assert_eq!(theme.image.0, "\x1b[8m");
505        assert_eq!(theme.blockquote_marker.0, "\x1b[9m");
506        assert_eq!(theme.delete.0, "\x1b[10m");
507        assert_eq!(theme.horizontal_rule.0, "\x1b[11m");
508        assert_eq!(theme.html.0, "\x1b[12m");
509        assert_eq!(theme.frontmatter.0, "\x1b[13m");
510        assert_eq!(theme.list_marker.0, "\x1b[14m");
511        assert_eq!(theme.table_separator.0, "\x1b[15m");
512        assert_eq!(theme.math.0, "\x1b[16m");
513    }
514
515    #[test]
516    fn test_parse_colors_empty_string() {
517        let theme = ColorTheme::parse_colors("");
518        assert_eq!(theme.heading, ColorTheme::COLORED.heading);
519    }
520
521    #[test]
522    fn test_colored_string_with_custom_theme() {
523        let theme = ColorTheme::parse_colors("heading=1;31");
524        let md = "# Title".parse::<Markdown>().unwrap();
525        let colored = md.to_colored_string_with_theme(&theme);
526        assert_eq!(colored, "\x1b[1;31m# Title\x1b[0m\n");
527    }
528}
529
530#[cfg(test)]
531#[cfg(feature = "json")]
532mod json_tests {
533    use rstest::rstest;
534
535    use super::*;
536
537    #[test]
538    fn test_to_json_simple() {
539        let md = "# Hello".parse::<Markdown>().unwrap();
540        let json = md.to_json().unwrap();
541        assert!(json.contains("\"type\": \"Heading\""));
542        assert!(json.contains("\"depth\": 1"));
543        assert!(json.contains("\"values\":"));
544    }
545
546    #[test]
547    fn test_to_json_complex() {
548        let md = "# Header\n\n- Item 1\n- Item 2\n\n*Emphasis* and **Strong**"
549            .parse::<Markdown>()
550            .unwrap();
551        let json = md.to_json().unwrap();
552
553        assert!(json.contains("\"type\": \"Heading\""));
554        assert!(json.contains("\"type\": \"List\""));
555        assert!(json.contains("\"type\": \"Strong\""));
556        assert!(json.contains("\"type\": \"Emphasis\""));
557    }
558
559    #[test]
560    fn test_to_json_code_blocks() {
561        let md = "```rust\nfn main() {\n    println!(\"Hello\");\n}\n```"
562            .parse::<Markdown>()
563            .unwrap();
564        let json = md.to_json().unwrap();
565
566        assert!(json.contains("\"type\": \"Code\""));
567        assert!(json.contains("\"lang\": \"rust\""));
568        assert!(json.contains("\"value\": \"fn main() {\\n    println!(\\\"Hello\\\");\\n}\""));
569    }
570
571    #[test]
572    fn test_to_json_table() {
573        let md = "| A | B |\n|---|---|\n| 1 | 2 |".parse::<Markdown>().unwrap();
574        let json = md.to_json().unwrap();
575
576        assert!(json.contains("\"type\": \"TableCell\""));
577    }
578
579    #[rstest]
580    #[case("<h1>Hello</h1>", 1, "# Hello\n")]
581    #[case("<p>Paragraph</p>", 1, "Paragraph\n")]
582    #[case("<ul><li>Item 1</li><li>Item 2</li></ul>", 2, "- Item 1\n- Item 2\n")]
583    #[case("<ol><li>First</li><li>Second</li></ol>", 2, "1. First\n2. Second\n")]
584    #[case("<blockquote>Quote</blockquote>", 1, "> Quote\n")]
585    #[case("<code>inline</code>", 1, "`inline`\n")]
586    #[case("<pre><code>block</code></pre>", 1, "```\nblock\n```\n")]
587    #[case("<table><tr><td>A</td><td>B</td></tr></table>", 3, "|A|B|\n|---|---|\n")]
588    #[cfg(feature = "html-to-markdown")]
589    fn test_markdown_from_html(#[case] input: &str, #[case] expected_nodes: usize, #[case] expected_output: &str) {
590        let md = Markdown::from_html_str(input).unwrap();
591        assert_eq!(md.nodes.len(), expected_nodes);
592        assert_eq!(md.to_string(), expected_output);
593    }
594}