1use crate::document::{DoclingDocument, Node, Table};
4
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
7pub enum ImageMode {
8 #[default]
10 Placeholder,
11 Embedded,
13 Referenced,
16}
17
18struct Ctx {
20 strict: bool,
21 images: ImageMode,
22 artifacts_dir: String,
23 artifacts: Vec<(String, Vec<u8>)>,
25 pic_index: usize,
26}
27
28pub fn to_markdown(doc: &DoclingDocument, strict: bool) -> String {
34 to_markdown_images(doc, strict, ImageMode::Placeholder, "artifacts").0
35}
36
37pub fn to_markdown_images(
41 doc: &DoclingDocument,
42 strict: bool,
43 images: ImageMode,
44 artifacts_dir: &str,
45) -> (String, Vec<(String, Vec<u8>)>) {
46 let mut ctx = Ctx {
47 strict,
48 images,
49 artifacts_dir: artifacts_dir.to_string(),
50 artifacts: Vec::new(),
51 pic_index: 0,
52 };
53 let mut blocks: Vec<String> = Vec::new();
54 render(&doc.nodes, &mut blocks, &mut ctx);
55 let body = blocks.join("\n\n");
56 let md = if body.is_empty() {
57 String::new()
58 } else {
59 format!("{body}\n")
60 };
61 (md, ctx.artifacts)
62}
63
64fn strict_text(text: &str, strict: bool) -> String {
69 if strict {
70 text.replace("\\_", "_")
71 } else {
72 text.to_string()
73 }
74}
75
76fn render(nodes: &[Node], blocks: &mut Vec<String>, ctx: &mut Ctx) {
77 let mut i = 0;
78 while i < nodes.len() {
79 match &nodes[i] {
80 Node::ListItem { .. } => {
81 let start = i;
82 while matches!(nodes.get(i), Some(Node::ListItem { .. })) {
83 i += 1;
84 }
85 render_list_run(&nodes[start..i], blocks, ctx.strict);
86 }
87 other => {
88 render_one(other, blocks, ctx);
89 i += 1;
90 }
91 }
92 }
93}
94
95fn render_list_run(items: &[Node], blocks: &mut Vec<String>, strict: bool) {
101 let mut lines: Vec<String> = Vec::new();
102 let mut prev: Vec<Option<(bool, u64)>> = Vec::new();
105
106 for item in items {
107 let Node::ListItem {
108 ordered,
109 number,
110 first_in_list,
111 text,
112 level,
113 } = item
114 else {
115 continue;
116 };
117 let level = *level as usize;
118
119 prev.truncate(level + 1);
121 while prev.len() <= level {
122 prev.push(None);
123 }
124
125 if let Some((prev_ordered, prev_number)) = prev[level] {
129 let new_list = *first_in_list
130 || prev_ordered != *ordered
131 || (*ordered && *number != prev_number + 1);
132 if new_list {
133 lines.push(String::new());
134 }
135 }
136
137 let indent = " ".repeat(level);
138 let marker = if *ordered {
139 format!("{number}.")
140 } else {
141 "-".to_string()
142 };
143 lines.push(format!("{indent}{marker} {}", strict_text(text, strict)));
144 prev[level] = Some((*ordered, *number));
145 }
146
147 blocks.push(lines.join("\n"));
148}
149
150fn render_one(node: &Node, blocks: &mut Vec<String>, ctx: &mut Ctx) {
151 match node {
152 Node::Heading { level, text } => {
153 let hashes = "#".repeat((*level).clamp(1, 6) as usize);
154 blocks.push(format!("{hashes} {}", strict_text(text, ctx.strict)));
155 }
156 Node::Paragraph { text } => blocks.push(strict_text(text, ctx.strict)),
157 Node::Code { language, text } => {
158 let lang = match language {
160 Some(l) if ctx.strict => l.as_str(),
161 _ => "",
162 };
163 blocks.push(format!("```{lang}\n{text}\n```"));
164 }
165 Node::Table(table) => {
166 let rendered = render_table(table);
167 if !rendered.is_empty() {
168 blocks.push(rendered);
169 }
170 }
171 Node::Picture { caption, image } => {
172 if let Some(cap) = caption {
173 if !cap.is_empty() {
174 blocks.push(cap.clone());
175 }
176 }
177 blocks.push(picture_marker(image.as_ref(), ctx));
178 }
179 Node::Group { children, .. } => render(children, blocks, ctx),
180 Node::ListItem { .. } => unreachable!("list items are rendered in runs"),
182 }
183}
184
185fn picture_marker(image: Option<&crate::PictureImage>, ctx: &mut Ctx) -> String {
188 match (ctx.images, image) {
189 (ImageMode::Embedded, Some(img)) => format!("", img.data_uri()),
190 (ImageMode::Referenced, Some(img)) => {
191 let path = format!(
192 "{}/image_{:06}.{}",
193 ctx.artifacts_dir,
194 ctx.pic_index,
195 ext_for(&img.mimetype)
196 );
197 ctx.pic_index += 1;
198 ctx.artifacts.push((path.clone(), img.data.clone()));
199 format!("")
200 }
201 _ => "<!-- image -->".to_string(),
203 }
204}
205
206fn ext_for(mimetype: &str) -> &str {
207 match mimetype {
208 "image/jpeg" => "jpg",
209 "image/gif" => "gif",
210 "image/webp" => "webp",
211 "image/bmp" => "bmp",
212 "image/tiff" => "tif",
213 _ => "png",
214 }
215}
216
217fn render_table(table: &Table) -> String {
226 if table.rows.is_empty() {
227 return String::new();
228 }
229 let num_cols = table.rows.iter().map(Vec::len).max().unwrap_or(0);
230 if num_cols == 0 {
231 return String::new();
232 }
233
234 let grid: Vec<Vec<String>> = table
237 .rows
238 .iter()
239 .enumerate()
240 .map(|(r, row)| {
241 (0..num_cols)
242 .map(|c| {
243 let cell = escape_cell(row.get(c).map(String::as_str).unwrap_or(""));
244 if r == 0 {
245 cell
246 } else {
247 cell.trim().to_string()
248 }
249 })
250 .collect()
251 })
252 .collect();
253
254 let dw = |s: &str| s.chars().count();
256 let data_rows = 1..grid.len();
257
258 let right: Vec<bool> = (0..num_cols)
260 .map(|c| {
261 !data_rows.is_empty()
262 && data_rows.clone().all(|r| {
263 let t = grid[r][c].trim();
264 !t.is_empty() && t.parse::<f64>().is_ok()
265 })
266 })
267 .collect();
268
269 let width: Vec<usize> = (0..num_cols)
271 .map(|c| {
272 let mut w = dw(&grid[0][c]) + 2;
273 for r in data_rows.clone() {
274 w = w.max(dw(&grid[r][c]));
275 }
276 w
277 })
278 .collect();
279
280 let fmt_cell = |s: &str, c: usize| -> String {
281 let pad = " ".repeat(width[c].saturating_sub(dw(s)));
282 let body = if right[c] {
283 format!("{pad}{s}")
284 } else {
285 format!("{s}{pad}")
286 };
287 format!(" {body} ")
288 };
289 let render_row = |r: usize| -> String {
290 let cells: Vec<String> = (0..num_cols).map(|c| fmt_cell(&grid[r][c], c)).collect();
291 format!("|{}|", cells.join("|"))
292 };
293
294 let mut lines = Vec::with_capacity(grid.len() + 1);
295 lines.push(render_row(0));
296 let sep: Vec<String> = (0..num_cols).map(|c| "-".repeat(width[c] + 2)).collect();
297 lines.push(format!("|{}|", sep.join("|")));
298 for r in data_rows {
299 lines.push(render_row(r));
300 }
301 lines.join("\n")
302}
303
304fn escape_cell(s: &str) -> String {
307 s.replace('\n', " ").replace('|', "|")
308}
309
310#[cfg(test)]
311mod tests {
312 use super::*;
313
314 #[test]
315 fn renders_headings_paragraphs_and_lists() {
316 let mut doc = DoclingDocument::new("demo");
317 doc.add_heading(1, "Title");
318 doc.add_paragraph("Hello world.");
319 doc.push(Node::ListItem {
320 ordered: false,
321 number: 1,
322 first_in_list: true,
323 text: "first".into(),
324 level: 0,
325 });
326 doc.push(Node::ListItem {
327 ordered: false,
328 number: 2,
329 first_in_list: false,
330 text: "second".into(),
331 level: 0,
332 });
333 let md = doc.export_to_markdown();
334 assert_eq!(md, "# Title\n\nHello world.\n\n- first\n- second\n");
335 }
336
337 #[test]
338 fn renders_github_table() {
339 let mut doc = DoclingDocument::new("t");
340 doc.push(Node::Table(Table {
341 rows: vec![vec!["a".into(), "b".into()], vec!["1".into(), "2".into()]],
342 }));
343 let md = doc.export_to_markdown();
344 assert_eq!(md, "| a | b |\n|-----|-----|\n| 1 | 2 |\n");
347 }
348
349 #[test]
350 fn strict_unescapes_inline_underscores_legacy_keeps_them() {
351 let mut doc = DoclingDocument::new("t");
352 doc.add_heading(1, "a\\_b");
353 doc.add_paragraph("x\\_y");
354 doc.push(Node::ListItem {
355 ordered: false,
356 number: 1,
357 first_in_list: true,
358 text: "i\\_j".into(),
359 level: 0,
360 });
361 assert_eq!(doc.export_to_markdown(), "# a\\_b\n\nx\\_y\n\n- i\\_j\n");
363 assert_eq!(doc.export_to_markdown_with(true), "# a_b\n\nx_y\n\n- i_j\n");
365 }
366}