Skip to main content

mq_markdown/
markdown.rs

1#[cfg(feature = "html-to-markdown")]
2use crate::html_to_markdown;
3#[cfg(feature = "html-to-markdown")]
4use crate::html_to_markdown::ConversionOptions;
5use crate::node::{Node, Position, RenderOptions, TableAlign, TableCell};
6use markdown::Constructs;
7use miette::miette;
8use std::{fmt, str::FromStr};
9
10#[derive(Debug, Clone)]
11pub struct Markdown {
12    pub nodes: Vec<Node>,
13    pub options: RenderOptions,
14}
15
16impl FromStr for Markdown {
17    type Err = miette::Error;
18
19    fn from_str(content: &str) -> Result<Self, Self::Err> {
20        Self::from_markdown_str(content)
21    }
22}
23
24impl fmt::Display for Markdown {
25    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
26        let mut pre_position: Option<Position> = None;
27        let mut is_first = true;
28        let mut current_table_row: Option<usize> = None;
29
30        // Pre-allocate buffer to reduce allocations
31        let mut buffer = String::with_capacity(self.nodes.len() * 50); // Reasonable estimate
32
33        for (i, node) in self.nodes.iter().enumerate() {
34            // Handle TableCell specially - group by row
35            if let Node::TableCell(TableCell { row, values, .. }) = node {
36                let value = values
37                    .iter()
38                    .map(|v| v.to_string_with(&self.options))
39                    .collect::<String>();
40
41                // Check if this is a new row
42                let is_new_row = current_table_row != Some(*row);
43
44                if is_new_row {
45                    // End previous row if exists
46                    if current_table_row.is_some() {
47                        buffer.push_str("|\n");
48                    }
49                    current_table_row = Some(*row);
50                }
51
52                // Output cell: |content
53                buffer.push('|');
54                buffer.push_str(&value);
55
56                // Check if this is the last cell in the row
57                let next_node = self.nodes.get(i + 1);
58                let next_is_different_row = next_node.is_none_or(
59                    |next| !matches!(next, Node::TableCell(TableCell { row: next_row, .. }) if *next_row == *row),
60                );
61
62                if next_is_different_row {
63                    buffer.push_str("|\n");
64                    current_table_row = None;
65                }
66
67                pre_position = node.position();
68                is_first = false;
69                continue;
70            }
71
72            // Handle TableAlign specially - always add newline after
73            if let Node::TableAlign(TableAlign { align, .. }) = node {
74                use itertools::Itertools;
75                buffer.push('|');
76                buffer.push_str(&align.iter().map(|a| a.to_string()).join("|"));
77                buffer.push_str("|\n");
78                pre_position = node.position();
79                is_first = false;
80                continue;
81            }
82
83            // Reset table row tracking for non-TableCell nodes
84            current_table_row = None;
85
86            let value = node.to_string_with(&self.options);
87
88            if value.is_empty() || value == "\n" {
89                pre_position = None;
90                continue;
91            }
92
93            if let Some(pos) = node.position() {
94                let new_line_count = pre_position
95                    .as_ref()
96                    .map(|p| pos.start.line.saturating_sub(p.end.line))
97                    .unwrap_or_else(|| if is_first { 0 } else { 1 });
98
99                pre_position = Some(pos.clone());
100
101                // Write newlines directly to buffer instead of creating temp string
102                for _ in 0..new_line_count {
103                    buffer.push('\n');
104                }
105                buffer.push_str(&value);
106            } else {
107                pre_position = None;
108                buffer.push_str(&value);
109                buffer.push('\n');
110            }
111
112            if is_first {
113                is_first = false;
114            }
115        }
116
117        // Write final result to formatter
118        if buffer.is_empty() || buffer.ends_with('\n') {
119            write!(f, "{}", buffer)
120        } else {
121            writeln!(f, "{}", buffer)
122        }
123    }
124}
125
126impl Markdown {
127    pub fn new(nodes: Vec<Node>) -> Self {
128        Self {
129            nodes,
130            options: RenderOptions::default(),
131        }
132    }
133
134    pub fn set_options(&mut self, options: RenderOptions) {
135        self.options = options;
136    }
137
138    pub fn from_mdx_str(content: &str) -> miette::Result<Self> {
139        let root = markdown::to_mdast(content, &markdown::ParseOptions::mdx()).map_err(|e| miette!(e.reason))?;
140        let nodes = Node::from_mdast_node(root);
141
142        Ok(Self {
143            nodes,
144            options: RenderOptions::default(),
145        })
146    }
147
148    pub fn to_html(&self) -> String {
149        markdown::to_html(self.to_string().as_str())
150    }
151
152    pub fn to_text(&self) -> String {
153        let mut result = String::with_capacity(self.nodes.len() * 20); // Reasonable estimate
154        for node in &self.nodes {
155            result.push_str(&node.value());
156            result.push('\n');
157        }
158        result
159    }
160
161    #[cfg(feature = "json")]
162    pub fn to_json(&self) -> miette::Result<String> {
163        let nodes = self
164            .nodes
165            .iter()
166            .filter(|node| !node.is_empty() && !node.is_empty_fragment())
167            .collect::<Vec<_>>();
168        serde_json::to_string_pretty(&nodes).map_err(|e| miette!("Failed to serialize to JSON: {}", e))
169    }
170
171    #[cfg(feature = "html-to-markdown")]
172    pub fn from_html_str(content: &str) -> miette::Result<Self> {
173        Self::from_html_str_with_options(content, ConversionOptions::default())
174    }
175
176    #[cfg(feature = "html-to-markdown")]
177    pub fn from_html_str_with_options(content: &str, options: ConversionOptions) -> miette::Result<Self> {
178        html_to_markdown::convert_html_to_markdown(content, options)
179            .map_err(|e| miette!(e))
180            .and_then(|md_string| Self::from_markdown_str(&md_string))
181    }
182
183    pub fn from_markdown_str(content: &str) -> miette::Result<Self> {
184        let root = markdown::to_mdast(
185            content,
186            &markdown::ParseOptions {
187                gfm_strikethrough_single_tilde: true,
188                math_text_single_dollar: true,
189                mdx_expression_parse: None,
190                mdx_esm_parse: None,
191                constructs: Constructs {
192                    attention: true,
193                    autolink: true,
194                    block_quote: true,
195                    character_escape: true,
196                    character_reference: true,
197                    code_indented: true,
198                    code_fenced: true,
199                    code_text: true,
200                    definition: true,
201                    frontmatter: true,
202                    gfm_autolink_literal: true,
203                    gfm_label_start_footnote: true,
204                    gfm_footnote_definition: true,
205                    gfm_strikethrough: true,
206                    gfm_table: true,
207                    gfm_task_list_item: true,
208                    hard_break_escape: true,
209                    hard_break_trailing: true,
210                    heading_atx: true,
211                    heading_setext: true,
212                    html_flow: true,
213                    html_text: true,
214                    label_start_image: true,
215                    label_start_link: true,
216                    label_end: true,
217                    list_item: true,
218                    math_flow: true,
219                    math_text: true,
220                    mdx_esm: false,
221                    mdx_expression_flow: false,
222                    mdx_expression_text: false,
223                    mdx_jsx_flow: false,
224                    mdx_jsx_text: false,
225                    thematic_break: true,
226                },
227            },
228        )
229        .map_err(|e| miette!(e.reason))?;
230        let nodes = Node::from_mdast_node(root);
231
232        Ok(Self {
233            nodes,
234            options: RenderOptions::default(),
235        })
236    }
237}
238
239pub fn to_html(s: &str) -> String {
240    markdown::to_html(s)
241}
242
243#[cfg(test)]
244mod tests {
245    use rstest::rstest;
246
247    use crate::{ListStyle, TitleSurroundStyle, UrlSurroundStyle};
248
249    use super::*;
250
251    #[rstest]
252    #[case::header("# Title", 1, "# Title\n")]
253    #[case::header("# Title\nParagraph", 2, "# Title\nParagraph\n")]
254    #[case::header("# Title\n\nParagraph", 2, "# Title\n\nParagraph\n")]
255    #[case::list("- Item 1\n- Item 2", 2, "- Item 1\n- Item 2\n")]
256    #[case::quote("> Quote\n>Second line", 1, "> Quote\n> Second line\n")]
257    #[case::code("```rust\nlet x = 1;\n```", 1, "```rust\nlet x = 1;\n```\n")]
258    #[case::toml("+++\n[test]\ntest = 1\n+++", 1, "+++\n[test]\ntest = 1\n+++\n")]
259    #[case::code_inline("`inline`", 1, "`inline`\n")]
260    #[case::math_inline("$math$", 1, "$math$\n")]
261    #[case::math("$$\nmath\n$$", 1, "$$\nmath\n$$\n")]
262    #[case::html("<div>test</div>", 1, "<div>test</div>\n")]
263    #[case::footnote("[^a]: b", 1, "[^a]: b\n")]
264    #[case::definition("[a]: b", 1, "[a]: b\n")]
265    #[case::footnote("[^a]: b", 1, "[^a]: b\n")]
266    #[case::footnote_ref("[^a]: b\n\n[^a]", 2, "[^a]: b\n[^a]\n")]
267    #[case::image("![a](b)", 1, "![a](b)\n")]
268    #[case::image_with_title("![a](b \"c\")", 1, "![a](b \"c\")\n")]
269    #[case::image_ref("[a]: b\n\n ![c][a]", 2, "[a]: b\n\n![c][a]\n")]
270    #[case::yaml(
271        "---\ntitle: Test\ndescription: YAML front matter\n---\n",
272        1,
273        "---\ntitle: Test\ndescription: YAML front matter\n---\n"
274    )]
275    #[case::link("[a](b)", 1, "[a](b)\n")]
276    #[case::link_ref("[a]: b\n\n[c][a]", 2, "[a]: b\n\n[c][a]\n")]
277    #[case::break_("a\\b", 1, "a\\b\n")]
278    #[case::delete("~~a~~", 1, "~~a~~\n")]
279    #[case::emphasis("*a*", 1, "*a*\n")]
280    #[case::horizontal_rule("---", 1, "---\n")]
281    #[case::table(
282        "| Column1 | Column2 | Column3 |\n|:--------|:--------:|---------:|\n| Left    | Center  | Right   |\n",
283        7,
284        "|Column1|Column2|Column3|\n|:---|:---:|---:|\n|Left|Center|Right|\n"
285    )]
286    fn test_markdown_from_str(#[case] input: &str, #[case] expected_nodes: usize, #[case] expected_output: &str) {
287        let md = input.parse::<Markdown>().unwrap();
288        assert_eq!(md.nodes.len(), expected_nodes);
289        assert_eq!(md.to_string(), expected_output);
290    }
291
292    #[rstest]
293    #[case::mdx("{test}", 1, "{test}\n")]
294    #[case::mdx("<a />", 1, "<a />\n")]
295    #[case::mdx("<MyComponent {...props}/>", 1, "<MyComponent {...props} />\n")]
296    #[case::mdx("text<MyComponent {...props}/>text", 3, "text<MyComponent {...props} />text\n")]
297    #[case::mdx(
298        "<Chart color=\"#fcb32c\" year={year} />",
299        1,
300        "<Chart color=\"#fcb32c\" year={year} />\n"
301    )]
302    fn test_markdown_from_mdx_str(#[case] input: &str, #[case] expected_nodes: usize, #[case] expected_output: &str) {
303        let md = Markdown::from_mdx_str(input).unwrap();
304        assert_eq!(md.nodes.len(), expected_nodes);
305        assert_eq!(md.to_string(), expected_output);
306    }
307
308    #[test]
309    fn test_markdown_to_html() {
310        let md = "# Hello".parse::<Markdown>().unwrap();
311        let html = md.to_html();
312        assert_eq!(html, "<h1>Hello</h1>\n");
313    }
314
315    #[test]
316    fn test_markdown_to_text() {
317        let md = "# Hello\n\nWorld".parse::<Markdown>().unwrap();
318        let text = md.to_text();
319        assert_eq!(text, "Hello\nWorld\n");
320    }
321
322    #[test]
323    fn test_render_options() {
324        let mut md = "- Item 1\n- Item 2".parse::<Markdown>().unwrap();
325        assert_eq!(md.options, RenderOptions::default());
326
327        md.set_options(RenderOptions {
328            list_style: ListStyle::Plus,
329            ..RenderOptions::default()
330        });
331        assert_eq!(md.options.list_style, ListStyle::Plus);
332
333        let pretty = md.to_string();
334        assert!(pretty.contains("+ Item 1"));
335    }
336
337    #[test]
338    fn test_display_simple() {
339        let md = "# Header\nParagraph".parse::<Markdown>().unwrap();
340        assert_eq!(md.to_string(), "# Header\nParagraph\n");
341    }
342
343    #[test]
344    fn test_display_with_empty_nodes() {
345        let md = "# Header\nContent".parse::<Markdown>().unwrap();
346        assert_eq!(md.to_string(), "# Header\nContent\n");
347    }
348
349    #[test]
350    fn test_display_with_newlines() {
351        let md = "# Header\n\nParagraph 1\n\nParagraph 2".parse::<Markdown>().unwrap();
352        assert_eq!(md.to_string(), "# Header\n\nParagraph 1\n\nParagraph 2\n");
353    }
354
355    #[test]
356    fn test_display_format_lists() {
357        let md = "- Item 1\n- Item 2\n- Item 3".parse::<Markdown>().unwrap();
358        assert_eq!(md.to_string(), "- Item 1\n- Item 2\n- Item 3\n");
359    }
360
361    #[test]
362    fn test_display_with_different_list_styles() {
363        let mut md = "- Item 1\n- Item 2".parse::<Markdown>().unwrap();
364
365        md.set_options(RenderOptions {
366            list_style: ListStyle::Star,
367            link_title_style: TitleSurroundStyle::default(),
368            link_url_style: UrlSurroundStyle::default(),
369        });
370
371        let formatted = md.to_string();
372        assert!(formatted.contains("* Item 1"));
373        assert!(formatted.contains("* Item 2"));
374    }
375
376    #[test]
377    fn test_display_with_ordered_list() {
378        let md = "1. Item 1\n2. Item 2\n\n3. Item 2".parse::<Markdown>().unwrap();
379        let formatted = md.to_string();
380
381        assert!(formatted.contains("1. Item 1"));
382        assert!(formatted.contains("2. Item 2"));
383        assert!(formatted.contains("3. Item 2"));
384    }
385}
386
387#[cfg(test)]
388#[cfg(feature = "json")]
389mod json_tests {
390    use rstest::rstest;
391
392    use super::*;
393
394    #[test]
395    fn test_to_json_simple() {
396        let md = "# Hello".parse::<Markdown>().unwrap();
397        let json = md.to_json().unwrap();
398        assert!(json.contains("\"type\": \"Heading\""));
399        assert!(json.contains("\"depth\": 1"));
400        assert!(json.contains("\"values\":"));
401    }
402
403    #[test]
404    fn test_to_json_complex() {
405        let md = "# Header\n\n- Item 1\n- Item 2\n\n*Emphasis* and **Strong**"
406            .parse::<Markdown>()
407            .unwrap();
408        let json = md.to_json().unwrap();
409
410        assert!(json.contains("\"type\": \"Heading\""));
411        assert!(json.contains("\"type\": \"List\""));
412        assert!(json.contains("\"type\": \"Strong\""));
413        assert!(json.contains("\"type\": \"Emphasis\""));
414    }
415
416    #[test]
417    fn test_to_json_code_blocks() {
418        let md = "```rust\nfn main() {\n    println!(\"Hello\");\n}\n```"
419            .parse::<Markdown>()
420            .unwrap();
421        let json = md.to_json().unwrap();
422
423        assert!(json.contains("\"type\": \"Code\""));
424        assert!(json.contains("\"lang\": \"rust\""));
425        assert!(json.contains("\"value\": \"fn main() {\\n    println!(\\\"Hello\\\");\\n}\""));
426    }
427
428    #[test]
429    fn test_to_json_table() {
430        let md = "| A | B |\n|---|---|\n| 1 | 2 |".parse::<Markdown>().unwrap();
431        let json = md.to_json().unwrap();
432
433        assert!(json.contains("\"type\": \"TableCell\""));
434    }
435
436    #[rstest]
437    #[case("<h1>Hello</h1>", 1, "# Hello\n")]
438    #[case("<p>Paragraph</p>", 1, "Paragraph\n")]
439    #[case("<ul><li>Item 1</li><li>Item 2</li></ul>", 2, "- Item 1\n- Item 2\n")]
440    #[case("<ol><li>First</li><li>Second</li></ol>", 2, "1. First\n2. Second\n")]
441    #[case("<blockquote>Quote</blockquote>", 1, "> Quote\n")]
442    #[case("<code>inline</code>", 1, "`inline`\n")]
443    #[case("<pre><code>block</code></pre>", 1, "```\nblock\n```\n")]
444    #[case("<table><tr><td>A</td><td>B</td></tr></table>", 3, "|A|B|\n|---|---|\n")]
445    #[cfg(feature = "html-to-markdown")]
446    fn test_markdown_from_html(#[case] input: &str, #[case] expected_nodes: usize, #[case] expected_output: &str) {
447        let md = Markdown::from_html_str(input).unwrap();
448        assert_eq!(md.nodes.len(), expected_nodes);
449        assert_eq!(md.to_string(), expected_output);
450    }
451}