1#[cfg(feature = "html-to-markdown")]
2use crate::html_to_markdown;
3#[cfg(feature = "html-to-markdown")]
4use crate::html_to_markdown::ConversionOptions;
5use markdown::Constructs;
6use miette::miette;
7use std::{fmt, str::FromStr};
8
9use crate::node::{Node, Position, RenderOptions};
10
11#[derive(Debug, Clone)]
12pub struct Markdown {
13 pub nodes: Vec<Node>,
14 pub options: RenderOptions,
15}
16
17impl FromStr for Markdown {
18 type Err = miette::Error;
19
20 fn from_str(content: &str) -> Result<Self, Self::Err> {
21 Self::from_markdown_str(content)
22 }
23}
24
25impl fmt::Display for Markdown {
26 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
27 let mut pre_position: Option<Position> = None;
28 let mut is_first = true;
29
30 let mut buffer = String::with_capacity(self.nodes.len() * 50); for node in &self.nodes {
34 let value = node.to_string_with(&self.options);
35
36 if value.is_empty() || value == "\n" {
37 pre_position = None;
38 continue;
39 }
40
41 if let Some(pos) = node.position() {
42 let new_line_count = pre_position
43 .as_ref()
44 .map(|p| pos.start.line - p.end.line)
45 .unwrap_or_else(|| if is_first { 0 } else { 1 });
46
47 pre_position = Some(pos.clone());
48
49 for _ in 0..new_line_count {
51 buffer.push('\n');
52 }
53 buffer.push_str(&value);
54 } else {
55 pre_position = None;
56 buffer.push_str(&value);
57 buffer.push('\n');
58 }
59
60 if is_first {
61 is_first = false;
62 }
63 }
64
65 if buffer.is_empty() || buffer.ends_with('\n') {
67 write!(f, "{}", buffer)
68 } else {
69 writeln!(f, "{}", buffer)
70 }
71 }
72}
73
74impl Markdown {
75 pub fn new(nodes: Vec<Node>) -> Self {
76 Self {
77 nodes,
78 options: RenderOptions::default(),
79 }
80 }
81
82 pub fn set_options(&mut self, options: RenderOptions) {
83 self.options = options;
84 }
85
86 pub fn from_mdx_str(content: &str) -> miette::Result<Self> {
87 let root = markdown::to_mdast(content, &markdown::ParseOptions::mdx()).map_err(|e| miette!(e.reason))?;
88 let nodes = Node::from_mdast_node(root);
89
90 Ok(Self {
91 nodes,
92 options: RenderOptions::default(),
93 })
94 }
95
96 pub fn to_html(&self) -> String {
97 markdown::to_html(self.to_string().as_str())
98 }
99
100 pub fn to_text(&self) -> String {
101 let mut result = String::with_capacity(self.nodes.len() * 20); for node in &self.nodes {
103 result.push_str(&node.value());
104 result.push('\n');
105 }
106 result
107 }
108
109 #[cfg(feature = "json")]
110 pub fn to_json(&self) -> miette::Result<String> {
111 let nodes = self
112 .nodes
113 .iter()
114 .filter(|node| !node.is_empty() && !node.is_empty_fragment())
115 .collect::<Vec<_>>();
116 serde_json::to_string_pretty(&nodes).map_err(|e| miette!("Failed to serialize to JSON: {}", e))
117 }
118
119 #[cfg(feature = "html-to-markdown")]
120 pub fn from_html_str(content: &str) -> miette::Result<Self> {
121 Self::from_html_str_with_options(content, ConversionOptions::default())
122 }
123
124 #[cfg(feature = "html-to-markdown")]
125 pub fn from_html_str_with_options(content: &str, options: ConversionOptions) -> miette::Result<Self> {
126 html_to_markdown::convert_html_to_markdown(content, options)
127 .map_err(|e| miette!(e))
128 .and_then(|md_string| Self::from_markdown_str(&md_string))
129 }
130
131 pub fn from_markdown_str(content: &str) -> miette::Result<Self> {
132 let root = markdown::to_mdast(
133 content,
134 &markdown::ParseOptions {
135 gfm_strikethrough_single_tilde: true,
136 math_text_single_dollar: true,
137 mdx_expression_parse: None,
138 mdx_esm_parse: None,
139 constructs: Constructs {
140 attention: true,
141 autolink: true,
142 block_quote: true,
143 character_escape: true,
144 character_reference: true,
145 code_indented: true,
146 code_fenced: true,
147 code_text: true,
148 definition: true,
149 frontmatter: true,
150 gfm_autolink_literal: true,
151 gfm_label_start_footnote: true,
152 gfm_footnote_definition: true,
153 gfm_strikethrough: true,
154 gfm_table: true,
155 gfm_task_list_item: true,
156 hard_break_escape: true,
157 hard_break_trailing: true,
158 heading_atx: true,
159 heading_setext: true,
160 html_flow: true,
161 html_text: true,
162 label_start_image: true,
163 label_start_link: true,
164 label_end: true,
165 list_item: true,
166 math_flow: true,
167 math_text: true,
168 mdx_esm: false,
169 mdx_expression_flow: false,
170 mdx_expression_text: false,
171 mdx_jsx_flow: false,
172 mdx_jsx_text: false,
173 thematic_break: true,
174 },
175 },
176 )
177 .map_err(|e| miette!(e.reason))?;
178 let nodes = Node::from_mdast_node(root);
179
180 Ok(Self {
181 nodes,
182 options: RenderOptions::default(),
183 })
184 }
185}
186
187pub fn to_html(s: &str) -> String {
188 markdown::to_html(s)
189}
190
191#[cfg(test)]
192mod tests {
193 use rstest::rstest;
194
195 use crate::{ListStyle, TitleSurroundStyle, UrlSurroundStyle};
196
197 use super::*;
198
199 #[rstest]
200 #[case::header("# Title", 1, "# Title\n")]
201 #[case::header("# Title\nParagraph", 2, "# Title\nParagraph\n")]
202 #[case::header("# Title\n\nParagraph", 2, "# Title\n\nParagraph\n")]
203 #[case::list("- Item 1\n- Item 2", 2, "- Item 1\n- Item 2\n")]
204 #[case::quote("> Quote\n>Second line", 1, "> Quote\n> Second line\n")]
205 #[case::code("```rust\nlet x = 1;\n```", 1, "```rust\nlet x = 1;\n```\n")]
206 #[case::toml("+++\n[test]\ntest = 1\n+++", 1, "+++\n[test]\ntest = 1\n+++\n")]
207 #[case::code_inline("`inline`", 1, "`inline`\n")]
208 #[case::math_inline("$math$", 1, "$math$\n")]
209 #[case::math("$$\nmath\n$$", 1, "$$\nmath\n$$\n")]
210 #[case::html("<div>test</div>", 1, "<div>test</div>\n")]
211 #[case::footnote("[^a]: b", 1, "[^a]: b\n")]
212 #[case::definition("[a]: b", 1, "[a]: b\n")]
213 #[case::footnote("[^a]: b", 1, "[^a]: b\n")]
214 #[case::footnote_ref("[^a]: b\n\n[^a]", 2, "[^a]: b\n[^a]\n")]
215 #[case::image("", 1, "\n")]
216 #[case::image_with_title("", 1, "\n")]
217 #[case::image_ref("[a]: b\n\n ![c][a]", 2, "[a]: b\n\n![c][a]\n")]
218 #[case::yaml(
219 "---\ntitle: Test\ndescription: YAML front matter\n---\n",
220 1,
221 "---\ntitle: Test\ndescription: YAML front matter\n---\n"
222 )]
223 #[case::link("[a](b)", 1, "[a](b)\n")]
224 #[case::link_ref("[a]: b\n\n[c][a]", 2, "[a]: b\n\n[c][a]\n")]
225 #[case::break_("a\\b", 1, "a\\b\n")]
226 #[case::delete("~~a~~", 1, "~~a~~\n")]
227 #[case::emphasis("*a*", 1, "*a*\n")]
228 #[case::horizontal_rule("---", 1, "---\n")]
229 #[case::table(
230 "| Column1 | Column2 | Column3 |\n|:--------|:--------:|---------:|\n| Left | Center | Right |\n",
231 7,
232 "|Column1|Column2|Column3|\n|:---|:---:|---:|\n|Left|Center|Right|\n"
233 )]
234 fn test_markdown_from_str(#[case] input: &str, #[case] expected_nodes: usize, #[case] expected_output: &str) {
235 let md = input.parse::<Markdown>().unwrap();
236 assert_eq!(md.nodes.len(), expected_nodes);
237 assert_eq!(md.to_string(), expected_output);
238 }
239
240 #[rstest]
241 #[case::mdx("{test}", 1, "{test}\n")]
242 #[case::mdx("<a />", 1, "<a />\n")]
243 #[case::mdx("<MyComponent {...props}/>", 1, "<MyComponent {...props} />\n")]
244 #[case::mdx("text<MyComponent {...props}/>text", 3, "text<MyComponent {...props} />text\n")]
245 #[case::mdx(
246 "<Chart color=\"#fcb32c\" year={year} />",
247 1,
248 "<Chart color=\"#fcb32c\" year={year} />\n"
249 )]
250 fn test_markdown_from_mdx_str(#[case] input: &str, #[case] expected_nodes: usize, #[case] expected_output: &str) {
251 let md = Markdown::from_mdx_str(input).unwrap();
252 assert_eq!(md.nodes.len(), expected_nodes);
253 assert_eq!(md.to_string(), expected_output);
254 }
255
256 #[test]
257 fn test_markdown_to_html() {
258 let md = "# Hello".parse::<Markdown>().unwrap();
259 let html = md.to_html();
260 assert_eq!(html, "<h1>Hello</h1>\n");
261 }
262
263 #[test]
264 fn test_markdown_to_text() {
265 let md = "# Hello\n\nWorld".parse::<Markdown>().unwrap();
266 let text = md.to_text();
267 assert_eq!(text, "Hello\nWorld\n");
268 }
269
270 #[test]
271 fn test_render_options() {
272 let mut md = "- Item 1\n- Item 2".parse::<Markdown>().unwrap();
273 assert_eq!(md.options, RenderOptions::default());
274
275 md.set_options(RenderOptions {
276 list_style: ListStyle::Plus,
277 ..RenderOptions::default()
278 });
279 assert_eq!(md.options.list_style, ListStyle::Plus);
280
281 let pretty = md.to_string();
282 assert!(pretty.contains("+ Item 1"));
283 }
284
285 #[test]
286 fn test_display_simple() {
287 let md = "# Header\nParagraph".parse::<Markdown>().unwrap();
288 assert_eq!(md.to_string(), "# Header\nParagraph\n");
289 }
290
291 #[test]
292 fn test_display_with_empty_nodes() {
293 let md = "# Header\nContent".parse::<Markdown>().unwrap();
294 assert_eq!(md.to_string(), "# Header\nContent\n");
295 }
296
297 #[test]
298 fn test_display_with_newlines() {
299 let md = "# Header\n\nParagraph 1\n\nParagraph 2".parse::<Markdown>().unwrap();
300 assert_eq!(md.to_string(), "# Header\n\nParagraph 1\n\nParagraph 2\n");
301 }
302
303 #[test]
304 fn test_display_format_lists() {
305 let md = "- Item 1\n- Item 2\n- Item 3".parse::<Markdown>().unwrap();
306 assert_eq!(md.to_string(), "- Item 1\n- Item 2\n- Item 3\n");
307 }
308
309 #[test]
310 fn test_display_with_different_list_styles() {
311 let mut md = "- Item 1\n- Item 2".parse::<Markdown>().unwrap();
312
313 md.set_options(RenderOptions {
314 list_style: ListStyle::Star,
315 link_title_style: TitleSurroundStyle::default(),
316 link_url_style: UrlSurroundStyle::default(),
317 });
318
319 let formatted = md.to_string();
320 assert!(formatted.contains("* Item 1"));
321 assert!(formatted.contains("* Item 2"));
322 }
323
324 #[test]
325 fn test_display_with_ordered_list() {
326 let md = "1. Item 1\n2. Item 2\n\n3. Item 2".parse::<Markdown>().unwrap();
327 let formatted = md.to_string();
328
329 assert!(formatted.contains("1. Item 1"));
330 assert!(formatted.contains("2. Item 2"));
331 assert!(formatted.contains("3. Item 2"));
332 }
333}
334
335#[cfg(test)]
336#[cfg(feature = "json")]
337mod json_tests {
338 use rstest::rstest;
339
340 use super::*;
341
342 #[test]
343 fn test_to_json_simple() {
344 let md = "# Hello".parse::<Markdown>().unwrap();
345 let json = md.to_json().unwrap();
346 assert!(json.contains("\"type\": \"Heading\""));
347 assert!(json.contains("\"depth\": 1"));
348 assert!(json.contains("\"values\":"));
349 }
350
351 #[test]
352 fn test_to_json_complex() {
353 let md = "# Header\n\n- Item 1\n- Item 2\n\n*Emphasis* and **Strong**"
354 .parse::<Markdown>()
355 .unwrap();
356 let json = md.to_json().unwrap();
357
358 assert!(json.contains("\"type\": \"Heading\""));
359 assert!(json.contains("\"type\": \"List\""));
360 assert!(json.contains("\"type\": \"Strong\""));
361 assert!(json.contains("\"type\": \"Emphasis\""));
362 }
363
364 #[test]
365 fn test_to_json_code_blocks() {
366 let md = "```rust\nfn main() {\n println!(\"Hello\");\n}\n```"
367 .parse::<Markdown>()
368 .unwrap();
369 let json = md.to_json().unwrap();
370
371 assert!(json.contains("\"type\": \"Code\""));
372 assert!(json.contains("\"lang\": \"rust\""));
373 assert!(json.contains("\"value\": \"fn main() {\\n println!(\\\"Hello\\\");\\n}\""));
374 }
375
376 #[test]
377 fn test_to_json_table() {
378 let md = "| A | B |\n|---|---|\n| 1 | 2 |".parse::<Markdown>().unwrap();
379 let json = md.to_json().unwrap();
380
381 assert!(json.contains("\"type\": \"TableCell\""));
382 }
383
384 #[rstest]
385 #[case("<h1>Hello</h1>", 1, "# Hello\n")]
386 #[case("<p>Paragraph</p>", 1, "Paragraph\n")]
387 #[case("<ul><li>Item 1</li><li>Item 2</li></ul>", 2, "- Item 1\n- Item 2\n")]
388 #[case("<ol><li>First</li><li>Second</li></ol>", 2, "1. First\n2. Second\n")]
389 #[case("<blockquote>Quote</blockquote>", 1, "> Quote\n")]
390 #[case("<code>inline</code>", 1, "`inline`\n")]
391 #[case("<pre><code>block</code></pre>", 1, "```\nblock\n```\n")]
392 #[case("<table><tr><td>A</td><td>B</td></tr></table>", 3, "|A|B|\n|---|---|\n")]
393 #[cfg(feature = "html-to-markdown")]
394 fn test_markdown_from_html(#[case] input: &str, #[case] expected_nodes: usize, #[case] expected_output: &str) {
395 let md = Markdown::from_html_str(input).unwrap();
396 assert_eq!(md.nodes.len(), expected_nodes);
397 assert_eq!(md.to_string(), expected_output);
398 }
399}