1use crate::document::{DoclingDocument, Node, Table};
4
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
7pub enum ImageMode {
8 #[default]
10 Placeholder,
11 Embedded,
13 Referenced,
16}
17
18struct Ctx {
20 strict: bool,
21 compact_tables: bool,
23 images: ImageMode,
24 artifacts_dir: String,
25 artifacts: Vec<(String, Vec<u8>)>,
27 pic_index: usize,
28}
29
30pub fn to_markdown(doc: &DoclingDocument, strict: bool) -> String {
36 to_markdown_images(doc, strict, ImageMode::Placeholder, "artifacts").0
37}
38
39pub fn to_markdown_images(
43 doc: &DoclingDocument,
44 strict: bool,
45 images: ImageMode,
46 artifacts_dir: &str,
47) -> (String, Vec<(String, Vec<u8>)>) {
48 let mut ctx = Ctx {
49 strict,
50 compact_tables: doc.compact_tables,
51 images,
52 artifacts_dir: artifacts_dir.to_string(),
53 artifacts: Vec::new(),
54 pic_index: 0,
55 };
56 let mut blocks: Vec<String> = Vec::new();
57 render(&doc.nodes, &mut blocks, &mut ctx);
58 let mut body = blocks.join("\n\n");
59 if strict && !doc.links.is_empty() {
63 body = apply_links(&body, &doc.links);
64 }
65 let md = if body.is_empty() {
66 String::new()
67 } else {
68 format!("{body}\n")
69 };
70 (md, ctx.artifacts)
71}
72
73fn apply_links(body: &str, links: &[(String, String)]) -> String {
81 let mut out = body.to_string();
82 let mut cursor = 0usize;
83 for (anchor, href) in links {
84 let anchor = anchor
85 .replace('&', "&")
86 .replace('<', "<")
87 .replace('>', ">");
88 if anchor.is_empty() {
89 continue;
90 }
91 if let Some(rel) = out[cursor..].find(&anchor) {
92 let at = cursor + rel;
93 let replacement = format!("[{anchor}]({href})");
95 out.replace_range(at..at + anchor.len(), &replacement);
96 cursor = at + replacement.len();
97 }
98 }
99 out
100}
101
102fn strict_text(text: &str, strict: bool) -> String {
110 if !strict {
111 return text.to_string();
112 }
113 text.replace("\\_", "_")
114 .replace(" ,", ",")
115 .replace(" .", ".")
116 .replace(" ;", ";")
117 .replace(" )", ")")
118 .replace("( ", "(")
119 .replace(" ]", "]")
120 .replace("[ ", "[")
121}
122
123fn render(nodes: &[Node], blocks: &mut Vec<String>, ctx: &mut Ctx) {
124 let mut i = 0;
125 while i < nodes.len() {
126 match &nodes[i] {
127 Node::ListItem { .. } => {
128 let start = i;
129 while matches!(nodes.get(i), Some(Node::ListItem { .. })) {
130 i += 1;
131 }
132 render_list_run(&nodes[start..i], blocks, ctx.strict);
133 }
134 other => {
135 render_one(other, blocks, ctx);
136 i += 1;
137 }
138 }
139 }
140}
141
142fn render_list_run(items: &[Node], blocks: &mut Vec<String>, strict: bool) {
148 let mut lines: Vec<String> = Vec::new();
149 let mut prev: Vec<Option<(bool, u64)>> = Vec::new();
152
153 for item in items {
154 let Node::ListItem {
155 ordered,
156 number,
157 first_in_list,
158 text,
159 level,
160 } = item
161 else {
162 continue;
163 };
164 let level = *level as usize;
165
166 prev.truncate(level + 1);
168 while prev.len() <= level {
169 prev.push(None);
170 }
171
172 if let Some((prev_ordered, prev_number)) = prev[level] {
176 let new_list = *first_in_list
177 || prev_ordered != *ordered
178 || (*ordered && *number != prev_number + 1);
179 if new_list {
180 lines.push(String::new());
181 }
182 }
183
184 let indent = " ".repeat(level);
185 let marker = if *ordered {
186 format!("{number}.")
187 } else {
188 "-".to_string()
189 };
190 lines.push(format!("{indent}{marker} {}", strict_text(text, strict)));
191 prev[level] = Some((*ordered, *number));
192 }
193
194 blocks.push(lines.join("\n"));
195}
196
197fn render_one(node: &Node, blocks: &mut Vec<String>, ctx: &mut Ctx) {
198 match node {
199 Node::Heading { level, text } => {
200 let hashes = "#".repeat((*level).clamp(1, 6) as usize);
201 blocks.push(format!("{hashes} {}", strict_text(text, ctx.strict)));
202 }
203 Node::Paragraph { text } => blocks.push(strict_text(text, ctx.strict)),
204 Node::Code { language, text } => {
205 let lang = match language {
207 Some(l) if ctx.strict => l.as_str(),
208 _ => "",
209 };
210 blocks.push(format!("```{lang}\n{text}\n```"));
211 }
212 Node::Table(table) => {
213 let rendered = render_table(table, ctx.compact_tables);
214 if !rendered.is_empty() {
215 blocks.push(rendered);
216 }
217 }
218 Node::Picture { caption, image } => {
219 if let Some(cap) = caption {
220 if !cap.is_empty() {
221 blocks.push(cap.clone());
222 }
223 }
224 blocks.push(picture_marker(image.as_ref(), ctx));
225 }
226 Node::Group { children, .. } => render(children, blocks, ctx),
227 Node::ListItem { .. } => unreachable!("list items are rendered in runs"),
229 }
230}
231
232fn picture_marker(image: Option<&crate::PictureImage>, ctx: &mut Ctx) -> String {
235 match (ctx.images, image) {
236 (ImageMode::Embedded, Some(img)) => format!("", img.data_uri()),
237 (ImageMode::Referenced, Some(img)) => {
238 let path = format!(
239 "{}/image_{:06}.{}",
240 ctx.artifacts_dir,
241 ctx.pic_index,
242 ext_for(&img.mimetype)
243 );
244 ctx.pic_index += 1;
245 ctx.artifacts.push((path.clone(), img.data.clone()));
246 format!("")
247 }
248 _ => "<!-- image -->".to_string(),
250 }
251}
252
253fn ext_for(mimetype: &str) -> &str {
254 match mimetype {
255 "image/jpeg" => "jpg",
256 "image/gif" => "gif",
257 "image/webp" => "webp",
258 "image/bmp" => "bmp",
259 "image/tiff" => "tif",
260 _ => "png",
261 }
262}
263
264fn render_table(table: &Table, compact: bool) -> String {
278 if table.rows.is_empty() {
279 return String::new();
280 }
281 let num_cols = table.rows.iter().map(Vec::len).max().unwrap_or(0);
282 if num_cols == 0 {
283 return String::new();
284 }
285
286 let grid: Vec<Vec<String>> = table
289 .rows
290 .iter()
291 .enumerate()
292 .map(|(r, row)| {
293 (0..num_cols)
294 .map(|c| {
295 let cell = escape_cell(row.get(c).map(String::as_str).unwrap_or(""));
296 if r == 0 {
297 cell
298 } else {
299 cell.trim().to_string()
300 }
301 })
302 .collect()
303 })
304 .collect();
305
306 if compact {
307 let render_row = |r: usize| -> String { format!("| {} |", grid[r].join(" | ")) };
309 let mut lines = Vec::with_capacity(grid.len() + 1);
310 lines.push(render_row(0));
311 let sep: Vec<&str> = (0..num_cols).map(|_| "-").collect();
312 lines.push(format!("| {} |", sep.join(" | ")));
313 for r in 1..grid.len() {
314 lines.push(render_row(r));
315 }
316 return lines.join("\n");
317 }
318
319 let dw = |s: &str| s.chars().count();
321 let data_rows = 1..grid.len();
322
323 let right: Vec<bool> = (0..num_cols)
325 .map(|c| {
326 !data_rows.is_empty()
327 && data_rows.clone().all(|r| {
328 let t = grid[r][c].trim();
329 !t.is_empty() && t.parse::<f64>().is_ok()
330 })
331 })
332 .collect();
333
334 let width: Vec<usize> = (0..num_cols)
336 .map(|c| {
337 let mut w = dw(&grid[0][c]) + 2;
338 for r in data_rows.clone() {
339 w = w.max(dw(&grid[r][c]));
340 }
341 w
342 })
343 .collect();
344
345 let fmt_cell = |s: &str, c: usize| -> String {
346 let pad = " ".repeat(width[c].saturating_sub(dw(s)));
347 let body = if right[c] {
348 format!("{pad}{s}")
349 } else {
350 format!("{s}{pad}")
351 };
352 format!(" {body} ")
353 };
354 let render_row = |r: usize| -> String {
355 let cells: Vec<String> = (0..num_cols).map(|c| fmt_cell(&grid[r][c], c)).collect();
356 format!("|{}|", cells.join("|"))
357 };
358
359 let mut lines = Vec::with_capacity(grid.len() + 1);
360 lines.push(render_row(0));
361 let sep: Vec<String> = (0..num_cols).map(|c| "-".repeat(width[c] + 2)).collect();
362 lines.push(format!("|{}|", sep.join("|")));
363 for r in data_rows {
364 lines.push(render_row(r));
365 }
366 lines.join("\n")
367}
368
369fn escape_cell(s: &str) -> String {
372 s.replace('\n', " ").replace('|', "|")
373}
374
375#[cfg(test)]
376mod tests {
377 use super::*;
378
379 #[test]
380 fn renders_headings_paragraphs_and_lists() {
381 let mut doc = DoclingDocument::new("demo");
382 doc.add_heading(1, "Title");
383 doc.add_paragraph("Hello world.");
384 doc.push(Node::ListItem {
385 ordered: false,
386 number: 1,
387 first_in_list: true,
388 text: "first".into(),
389 level: 0,
390 });
391 doc.push(Node::ListItem {
392 ordered: false,
393 number: 2,
394 first_in_list: false,
395 text: "second".into(),
396 level: 0,
397 });
398 let md = doc.export_to_markdown();
399 assert_eq!(md, "# Title\n\nHello world.\n\n- first\n- second\n");
400 }
401
402 #[test]
403 fn strict_renders_recovered_links_legacy_does_not() {
404 let mut doc = DoclingDocument::new("cv");
405 doc.add_paragraph("Find me on LinkedIn or GitHub.");
406 doc.links = vec![
407 ("LinkedIn".into(), "https://www.linkedin.com/in/x/".into()),
408 ("GitHub".into(), "https://github.com/x/".into()),
409 ];
410 assert_eq!(doc.export_to_markdown(), "Find me on LinkedIn or GitHub.\n");
412 assert_eq!(
414 doc.export_to_markdown_with(true),
415 "Find me on [LinkedIn](https://www.linkedin.com/in/x/) or [GitHub](https://github.com/x/).\n"
416 );
417 }
418
419 #[test]
420 fn strict_links_match_escaped_anchor_and_consume_in_order() {
421 let mut doc = DoclingDocument::new("d");
422 doc.add_paragraph("AI & ML here, and issues here, then issues there.");
426 doc.links = vec![
427 ("AI & ML".into(), "https://a/".into()),
428 ("issues".into(), "https://first/".into()),
429 ("issues".into(), "https://second/".into()),
430 ];
431 assert_eq!(
432 doc.export_to_markdown_with(true),
433 "[AI & ML](https://a/) here, and [issues](https://first/) here, then [issues](https://second/) there.\n"
434 );
435 }
436
437 #[test]
438 fn renders_compact_table() {
439 let mut doc = DoclingDocument::new("t");
440 doc.compact_tables = true;
443 doc.push(Node::Table(Table {
444 rows: vec![vec!["a".into(), "b".into()], vec!["1".into(), "2".into()]],
445 }));
446 let md = doc.export_to_markdown();
447 assert_eq!(md, "| a | b |\n| - | - |\n| 1 | 2 |\n");
448 }
449
450 #[test]
451 fn renders_padded_github_table_by_default() {
452 let mut doc = DoclingDocument::new("t");
453 doc.push(Node::Table(Table {
454 rows: vec![vec!["a".into(), "b".into()], vec!["1".into(), "2".into()]],
455 }));
456 let md = doc.export_to_markdown();
457 assert_eq!(md, "| a | b |\n|-----|-----|\n| 1 | 2 |\n");
459 }
460
461 #[test]
462 fn strict_unescapes_inline_underscores_legacy_keeps_them() {
463 let mut doc = DoclingDocument::new("t");
464 doc.add_heading(1, "a\\_b");
465 doc.add_paragraph("x\\_y");
466 doc.push(Node::ListItem {
467 ordered: false,
468 number: 1,
469 first_in_list: true,
470 text: "i\\_j".into(),
471 level: 0,
472 });
473 assert_eq!(doc.export_to_markdown(), "# a\\_b\n\nx\\_y\n\n- i\\_j\n");
475 assert_eq!(doc.export_to_markdown_with(true), "# a_b\n\nx_y\n\n- i_j\n");
477 }
478
479 #[test]
480 fn strict_tightens_punctuation_spacing_legacy_keeps_it() {
481 let mut doc = DoclingDocument::new("t");
482 doc.add_paragraph("see [ 37 , 36 ] and ( x ) .");
483 assert_eq!(doc.export_to_markdown(), "see [ 37 , 36 ] and ( x ) .\n");
485 assert_eq!(doc.export_to_markdown_with(true), "see [37, 36] and (x).\n");
487 }
488}