mq_markdown/
markdown.rs

1#[cfg(feature = "html-to-markdown")]
2use crate::html_to_markdown;
3#[cfg(feature = "html-to-markdown")]
4use crate::html_to_markdown::ConversionOptions;
5use markdown::Constructs;
6use miette::miette;
7use std::{fmt, str::FromStr};
8
9use crate::node::{Node, Position, RenderOptions};
10
11#[derive(Debug, Clone)]
12pub struct Markdown {
13    pub nodes: Vec<Node>,
14    pub options: RenderOptions,
15}
16
17impl FromStr for Markdown {
18    type Err = miette::Error;
19
20    fn from_str(content: &str) -> Result<Self, Self::Err> {
21        Self::from_markdown_str(content)
22    }
23}
24
25impl fmt::Display for Markdown {
26    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27        let mut pre_position: Option<Position> = None;
28        let mut is_first = true;
29
30        // Pre-allocate buffer to reduce allocations
31        let mut buffer = String::with_capacity(self.nodes.len() * 50); // Reasonable estimate
32
33        for node in &self.nodes {
34            let value = node.to_string_with(&self.options);
35
36            if value.is_empty() || value == "\n" {
37                pre_position = None;
38                continue;
39            }
40
41            if let Some(pos) = node.position() {
42                let new_line_count = pre_position
43                    .as_ref()
44                    .map(|p| pos.start.line - p.end.line)
45                    .unwrap_or_else(|| if is_first { 0 } else { 1 });
46
47                pre_position = Some(pos.clone());
48
49                // Write newlines directly to buffer instead of creating temp string
50                for _ in 0..new_line_count {
51                    buffer.push('\n');
52                }
53                buffer.push_str(&value);
54            } else {
55                pre_position = None;
56                buffer.push_str(&value);
57                buffer.push('\n');
58            }
59
60            if is_first {
61                is_first = false;
62            }
63        }
64
65        // Write final result to formatter
66        if buffer.is_empty() || buffer.ends_with('\n') {
67            write!(f, "{}", buffer)
68        } else {
69            writeln!(f, "{}", buffer)
70        }
71    }
72}
73
74impl Markdown {
75    pub fn new(nodes: Vec<Node>) -> Self {
76        Self {
77            nodes,
78            options: RenderOptions::default(),
79        }
80    }
81
82    pub fn set_options(&mut self, options: RenderOptions) {
83        self.options = options;
84    }
85
86    pub fn from_mdx_str(content: &str) -> miette::Result<Self> {
87        let root = markdown::to_mdast(content, &markdown::ParseOptions::mdx()).map_err(|e| miette!(e.reason))?;
88        let nodes = Node::from_mdast_node(root);
89
90        Ok(Self {
91            nodes,
92            options: RenderOptions::default(),
93        })
94    }
95
96    pub fn to_html(&self) -> String {
97        markdown::to_html(self.to_string().as_str())
98    }
99
100    pub fn to_text(&self) -> String {
101        let mut result = String::with_capacity(self.nodes.len() * 20); // Reasonable estimate
102        for node in &self.nodes {
103            result.push_str(&node.value());
104            result.push('\n');
105        }
106        result
107    }
108
109    #[cfg(feature = "json")]
110    pub fn to_json(&self) -> miette::Result<String> {
111        let nodes = self
112            .nodes
113            .iter()
114            .filter(|node| !node.is_empty() && !node.is_empty_fragment())
115            .collect::<Vec<_>>();
116        serde_json::to_string_pretty(&nodes).map_err(|e| miette!("Failed to serialize to JSON: {}", e))
117    }
118
119    #[cfg(feature = "html-to-markdown")]
120    pub fn from_html_str(content: &str) -> miette::Result<Self> {
121        Self::from_html_str_with_options(content, ConversionOptions::default())
122    }
123
124    #[cfg(feature = "html-to-markdown")]
125    pub fn from_html_str_with_options(content: &str, options: ConversionOptions) -> miette::Result<Self> {
126        html_to_markdown::convert_html_to_markdown(content, options)
127            .map_err(|e| miette!(e))
128            .and_then(|md_string| Self::from_markdown_str(&md_string))
129    }
130
131    pub fn from_markdown_str(content: &str) -> miette::Result<Self> {
132        let root = markdown::to_mdast(
133            content,
134            &markdown::ParseOptions {
135                gfm_strikethrough_single_tilde: true,
136                math_text_single_dollar: true,
137                mdx_expression_parse: None,
138                mdx_esm_parse: None,
139                constructs: Constructs {
140                    attention: true,
141                    autolink: true,
142                    block_quote: true,
143                    character_escape: true,
144                    character_reference: true,
145                    code_indented: true,
146                    code_fenced: true,
147                    code_text: true,
148                    definition: true,
149                    frontmatter: true,
150                    gfm_autolink_literal: true,
151                    gfm_label_start_footnote: true,
152                    gfm_footnote_definition: true,
153                    gfm_strikethrough: true,
154                    gfm_table: true,
155                    gfm_task_list_item: true,
156                    hard_break_escape: true,
157                    hard_break_trailing: true,
158                    heading_atx: true,
159                    heading_setext: true,
160                    html_flow: true,
161                    html_text: true,
162                    label_start_image: true,
163                    label_start_link: true,
164                    label_end: true,
165                    list_item: true,
166                    math_flow: true,
167                    math_text: true,
168                    mdx_esm: false,
169                    mdx_expression_flow: false,
170                    mdx_expression_text: false,
171                    mdx_jsx_flow: false,
172                    mdx_jsx_text: false,
173                    thematic_break: true,
174                },
175            },
176        )
177        .map_err(|e| miette!(e.reason))?;
178        let nodes = Node::from_mdast_node(root);
179
180        Ok(Self {
181            nodes,
182            options: RenderOptions::default(),
183        })
184    }
185}
186
187pub fn to_html(s: &str) -> String {
188    markdown::to_html(s)
189}
190
191#[cfg(test)]
192mod tests {
193    use rstest::rstest;
194
195    use crate::{ListStyle, TitleSurroundStyle, UrlSurroundStyle};
196
197    use super::*;
198
199    #[rstest]
200    #[case::header("# Title", 1, "# Title\n")]
201    #[case::header("# Title\nParagraph", 2, "# Title\nParagraph\n")]
202    #[case::header("# Title\n\nParagraph", 2, "# Title\n\nParagraph\n")]
203    #[case::list("- Item 1\n- Item 2", 2, "- Item 1\n- Item 2\n")]
204    #[case::quote("> Quote\n>Second line", 1, "> Quote\n> Second line\n")]
205    #[case::code("```rust\nlet x = 1;\n```", 1, "```rust\nlet x = 1;\n```\n")]
206    #[case::toml("+++\n[test]\ntest = 1\n+++", 1, "+++\n[test]\ntest = 1\n+++\n")]
207    #[case::code_inline("`inline`", 1, "`inline`\n")]
208    #[case::math_inline("$math$", 1, "$math$\n")]
209    #[case::math("$$\nmath\n$$", 1, "$$\nmath\n$$\n")]
210    #[case::html("<div>test</div>", 1, "<div>test</div>\n")]
211    #[case::footnote("[^a]: b", 1, "[^a]: b\n")]
212    #[case::definition("[a]: b", 1, "[a]: b\n")]
213    #[case::footnote("[^a]: b", 1, "[^a]: b\n")]
214    #[case::footnote_ref("[^a]: b\n\n[^a]", 2, "[^a]: b\n[^a]\n")]
215    #[case::image("![a](b)", 1, "![a](b)\n")]
216    #[case::image_with_title("![a](b \"c\")", 1, "![a](b \"c\")\n")]
217    #[case::image_ref("[a]: b\n\n ![c][a]", 2, "[a]: b\n\n![c][a]\n")]
218    #[case::yaml(
219        "---\ntitle: Test\ndescription: YAML front matter\n---\n",
220        1,
221        "---\ntitle: Test\ndescription: YAML front matter\n---\n"
222    )]
223    #[case::link("[a](b)", 1, "[a](b)\n")]
224    #[case::link_ref("[a]: b\n\n[c][a]", 2, "[a]: b\n\n[c][a]\n")]
225    #[case::break_("a\\b", 1, "a\\b\n")]
226    #[case::delete("~~a~~", 1, "~~a~~\n")]
227    #[case::emphasis("*a*", 1, "*a*\n")]
228    #[case::horizontal_rule("---", 1, "---\n")]
229    #[case::table(
230        "| Column1 | Column2 | Column3 |\n|:--------|:--------:|---------:|\n| Left    | Center  | Right   |\n",
231        7,
232        "|Column1|Column2|Column3|\n|:---|:---:|---:|\n|Left|Center|Right|\n"
233    )]
234    fn test_markdown_from_str(#[case] input: &str, #[case] expected_nodes: usize, #[case] expected_output: &str) {
235        let md = input.parse::<Markdown>().unwrap();
236        assert_eq!(md.nodes.len(), expected_nodes);
237        assert_eq!(md.to_string(), expected_output);
238    }
239
240    #[rstest]
241    #[case::mdx("{test}", 1, "{test}\n")]
242    #[case::mdx("<a />", 1, "<a />\n")]
243    #[case::mdx("<MyComponent {...props}/>", 1, "<MyComponent {...props} />\n")]
244    #[case::mdx("text<MyComponent {...props}/>text", 3, "text<MyComponent {...props} />text\n")]
245    #[case::mdx(
246        "<Chart color=\"#fcb32c\" year={year} />",
247        1,
248        "<Chart color=\"#fcb32c\" year={year} />\n"
249    )]
250    fn test_markdown_from_mdx_str(#[case] input: &str, #[case] expected_nodes: usize, #[case] expected_output: &str) {
251        let md = Markdown::from_mdx_str(input).unwrap();
252        assert_eq!(md.nodes.len(), expected_nodes);
253        assert_eq!(md.to_string(), expected_output);
254    }
255
256    #[test]
257    fn test_markdown_to_html() {
258        let md = "# Hello".parse::<Markdown>().unwrap();
259        let html = md.to_html();
260        assert_eq!(html, "<h1>Hello</h1>\n");
261    }
262
263    #[test]
264    fn test_markdown_to_text() {
265        let md = "# Hello\n\nWorld".parse::<Markdown>().unwrap();
266        let text = md.to_text();
267        assert_eq!(text, "Hello\nWorld\n");
268    }
269
270    #[test]
271    fn test_render_options() {
272        let mut md = "- Item 1\n- Item 2".parse::<Markdown>().unwrap();
273        assert_eq!(md.options, RenderOptions::default());
274
275        md.set_options(RenderOptions {
276            list_style: ListStyle::Plus,
277            ..RenderOptions::default()
278        });
279        assert_eq!(md.options.list_style, ListStyle::Plus);
280
281        let pretty = md.to_string();
282        assert!(pretty.contains("+ Item 1"));
283    }
284
285    #[test]
286    fn test_display_simple() {
287        let md = "# Header\nParagraph".parse::<Markdown>().unwrap();
288        assert_eq!(md.to_string(), "# Header\nParagraph\n");
289    }
290
291    #[test]
292    fn test_display_with_empty_nodes() {
293        let md = "# Header\nContent".parse::<Markdown>().unwrap();
294        assert_eq!(md.to_string(), "# Header\nContent\n");
295    }
296
297    #[test]
298    fn test_display_with_newlines() {
299        let md = "# Header\n\nParagraph 1\n\nParagraph 2".parse::<Markdown>().unwrap();
300        assert_eq!(md.to_string(), "# Header\n\nParagraph 1\n\nParagraph 2\n");
301    }
302
303    #[test]
304    fn test_display_format_lists() {
305        let md = "- Item 1\n- Item 2\n- Item 3".parse::<Markdown>().unwrap();
306        assert_eq!(md.to_string(), "- Item 1\n- Item 2\n- Item 3\n");
307    }
308
309    #[test]
310    fn test_display_with_different_list_styles() {
311        let mut md = "- Item 1\n- Item 2".parse::<Markdown>().unwrap();
312
313        md.set_options(RenderOptions {
314            list_style: ListStyle::Star,
315            link_title_style: TitleSurroundStyle::default(),
316            link_url_style: UrlSurroundStyle::default(),
317        });
318
319        let formatted = md.to_string();
320        assert!(formatted.contains("* Item 1"));
321        assert!(formatted.contains("* Item 2"));
322    }
323
324    #[test]
325    fn test_display_with_ordered_list() {
326        let md = "1. Item 1\n2. Item 2\n\n3. Item 2".parse::<Markdown>().unwrap();
327        let formatted = md.to_string();
328
329        assert!(formatted.contains("1. Item 1"));
330        assert!(formatted.contains("2. Item 2"));
331        assert!(formatted.contains("3. Item 2"));
332    }
333}
334
335#[cfg(test)]
336#[cfg(feature = "json")]
337mod json_tests {
338    use rstest::rstest;
339
340    use super::*;
341
342    #[test]
343    fn test_to_json_simple() {
344        let md = "# Hello".parse::<Markdown>().unwrap();
345        let json = md.to_json().unwrap();
346        assert!(json.contains("\"type\": \"Heading\""));
347        assert!(json.contains("\"depth\": 1"));
348        assert!(json.contains("\"values\":"));
349    }
350
351    #[test]
352    fn test_to_json_complex() {
353        let md = "# Header\n\n- Item 1\n- Item 2\n\n*Emphasis* and **Strong**"
354            .parse::<Markdown>()
355            .unwrap();
356        let json = md.to_json().unwrap();
357
358        assert!(json.contains("\"type\": \"Heading\""));
359        assert!(json.contains("\"type\": \"List\""));
360        assert!(json.contains("\"type\": \"Strong\""));
361        assert!(json.contains("\"type\": \"Emphasis\""));
362    }
363
364    #[test]
365    fn test_to_json_code_blocks() {
366        let md = "```rust\nfn main() {\n    println!(\"Hello\");\n}\n```"
367            .parse::<Markdown>()
368            .unwrap();
369        let json = md.to_json().unwrap();
370
371        assert!(json.contains("\"type\": \"Code\""));
372        assert!(json.contains("\"lang\": \"rust\""));
373        assert!(json.contains("\"value\": \"fn main() {\\n    println!(\\\"Hello\\\");\\n}\""));
374    }
375
376    #[test]
377    fn test_to_json_table() {
378        let md = "| A | B |\n|---|---|\n| 1 | 2 |".parse::<Markdown>().unwrap();
379        let json = md.to_json().unwrap();
380
381        assert!(json.contains("\"type\": \"TableCell\""));
382    }
383
384    #[rstest]
385    #[case("<h1>Hello</h1>", 1, "# Hello\n")]
386    #[case("<p>Paragraph</p>", 1, "Paragraph\n")]
387    #[case("<ul><li>Item 1</li><li>Item 2</li></ul>", 2, "- Item 1\n- Item 2\n")]
388    #[case("<ol><li>First</li><li>Second</li></ol>", 2, "1. First\n2. Second\n")]
389    #[case("<blockquote>Quote</blockquote>", 1, "> Quote\n")]
390    #[case("<code>inline</code>", 1, "`inline`\n")]
391    #[case("<pre><code>block</code></pre>", 1, "```\nblock\n```\n")]
392    #[case("<table><tr><td>A</td><td>B</td></tr></table>", 3, "|A|B|\n|---|---|\n")]
393    #[cfg(feature = "html-to-markdown")]
394    fn test_markdown_from_html(#[case] input: &str, #[case] expected_nodes: usize, #[case] expected_output: &str) {
395        let md = Markdown::from_html_str(input).unwrap();
396        assert_eq!(md.nodes.len(), expected_nodes);
397        assert_eq!(md.to_string(), expected_output);
398    }
399}