1use crate::document::{DoclingDocument, Node, Table};
4
5#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
7pub enum ImageMode {
8 #[default]
10 Placeholder,
11 Embedded,
13 Referenced,
16}
17
18struct Ctx {
20 strict: bool,
21 images: ImageMode,
22 artifacts_dir: String,
23 artifacts: Vec<(String, Vec<u8>)>,
25 pic_index: usize,
26}
27
28pub fn to_markdown(doc: &DoclingDocument, strict: bool) -> String {
34 to_markdown_images(doc, strict, ImageMode::Placeholder, "artifacts").0
35}
36
37pub fn to_markdown_images(
41 doc: &DoclingDocument,
42 strict: bool,
43 images: ImageMode,
44 artifacts_dir: &str,
45) -> (String, Vec<(String, Vec<u8>)>) {
46 let mut ctx = Ctx {
47 strict,
48 images,
49 artifacts_dir: artifacts_dir.to_string(),
50 artifacts: Vec::new(),
51 pic_index: 0,
52 };
53 let mut blocks: Vec<String> = Vec::new();
54 render(&doc.nodes, &mut blocks, &mut ctx);
55 let body = blocks.join("\n\n");
56 let md = if body.is_empty() {
57 String::new()
58 } else {
59 format!("{body}\n")
60 };
61 (md, ctx.artifacts)
62}
63
64fn render(nodes: &[Node], blocks: &mut Vec<String>, ctx: &mut Ctx) {
65 let mut i = 0;
66 while i < nodes.len() {
67 match &nodes[i] {
68 Node::ListItem { .. } => {
69 let start = i;
70 while matches!(nodes.get(i), Some(Node::ListItem { .. })) {
71 i += 1;
72 }
73 render_list_run(&nodes[start..i], blocks);
74 }
75 other => {
76 render_one(other, blocks, ctx);
77 i += 1;
78 }
79 }
80 }
81}
82
83fn render_list_run(items: &[Node], blocks: &mut Vec<String>) {
89 let mut lines: Vec<String> = Vec::new();
90 let mut prev: Vec<Option<(bool, u64)>> = Vec::new();
93
94 for item in items {
95 let Node::ListItem {
96 ordered,
97 number,
98 first_in_list,
99 text,
100 level,
101 } = item
102 else {
103 continue;
104 };
105 let level = *level as usize;
106
107 prev.truncate(level + 1);
109 while prev.len() <= level {
110 prev.push(None);
111 }
112
113 if let Some((prev_ordered, prev_number)) = prev[level] {
117 let new_list = *first_in_list
118 || prev_ordered != *ordered
119 || (*ordered && *number != prev_number + 1);
120 if new_list {
121 lines.push(String::new());
122 }
123 }
124
125 let indent = " ".repeat(level);
126 let marker = if *ordered {
127 format!("{number}.")
128 } else {
129 "-".to_string()
130 };
131 lines.push(format!("{indent}{marker} {text}"));
132 prev[level] = Some((*ordered, *number));
133 }
134
135 blocks.push(lines.join("\n"));
136}
137
138fn render_one(node: &Node, blocks: &mut Vec<String>, ctx: &mut Ctx) {
139 match node {
140 Node::Heading { level, text } => {
141 let hashes = "#".repeat((*level).clamp(1, 6) as usize);
142 blocks.push(format!("{hashes} {text}"));
143 }
144 Node::Paragraph { text } => blocks.push(text.clone()),
145 Node::Code { language, text } => {
146 let lang = match language {
148 Some(l) if ctx.strict => l.as_str(),
149 _ => "",
150 };
151 blocks.push(format!("```{lang}\n{text}\n```"));
152 }
153 Node::Table(table) => {
154 let rendered = render_table(table);
155 if !rendered.is_empty() {
156 blocks.push(rendered);
157 }
158 }
159 Node::Picture { caption, image } => {
160 if let Some(cap) = caption {
161 if !cap.is_empty() {
162 blocks.push(cap.clone());
163 }
164 }
165 blocks.push(picture_marker(image.as_ref(), ctx));
166 }
167 Node::Group { children, .. } => render(children, blocks, ctx),
168 Node::ListItem { .. } => unreachable!("list items are rendered in runs"),
170 }
171}
172
173fn picture_marker(image: Option<&crate::PictureImage>, ctx: &mut Ctx) -> String {
176 match (ctx.images, image) {
177 (ImageMode::Embedded, Some(img)) => format!("", img.data_uri()),
178 (ImageMode::Referenced, Some(img)) => {
179 let path = format!(
180 "{}/image_{:06}.{}",
181 ctx.artifacts_dir,
182 ctx.pic_index,
183 ext_for(&img.mimetype)
184 );
185 ctx.pic_index += 1;
186 ctx.artifacts.push((path.clone(), img.data.clone()));
187 format!("")
188 }
189 _ => "<!-- image -->".to_string(),
191 }
192}
193
194fn ext_for(mimetype: &str) -> &str {
195 match mimetype {
196 "image/jpeg" => "jpg",
197 "image/gif" => "gif",
198 "image/webp" => "webp",
199 "image/bmp" => "bmp",
200 "image/tiff" => "tif",
201 _ => "png",
202 }
203}
204
205fn render_table(table: &Table) -> String {
214 if table.rows.is_empty() {
215 return String::new();
216 }
217 let num_cols = table.rows.iter().map(Vec::len).max().unwrap_or(0);
218 if num_cols == 0 {
219 return String::new();
220 }
221
222 let grid: Vec<Vec<String>> = table
225 .rows
226 .iter()
227 .enumerate()
228 .map(|(r, row)| {
229 (0..num_cols)
230 .map(|c| {
231 let cell = escape_cell(row.get(c).map(String::as_str).unwrap_or(""));
232 if r == 0 {
233 cell
234 } else {
235 cell.trim().to_string()
236 }
237 })
238 .collect()
239 })
240 .collect();
241
242 let dw = |s: &str| s.chars().count();
244 let data_rows = 1..grid.len();
245
246 let right: Vec<bool> = (0..num_cols)
248 .map(|c| {
249 !data_rows.is_empty()
250 && data_rows.clone().all(|r| {
251 let t = grid[r][c].trim();
252 !t.is_empty() && t.parse::<f64>().is_ok()
253 })
254 })
255 .collect();
256
257 let width: Vec<usize> = (0..num_cols)
259 .map(|c| {
260 let mut w = dw(&grid[0][c]) + 2;
261 for r in data_rows.clone() {
262 w = w.max(dw(&grid[r][c]));
263 }
264 w
265 })
266 .collect();
267
268 let fmt_cell = |s: &str, c: usize| -> String {
269 let pad = " ".repeat(width[c].saturating_sub(dw(s)));
270 let body = if right[c] {
271 format!("{pad}{s}")
272 } else {
273 format!("{s}{pad}")
274 };
275 format!(" {body} ")
276 };
277 let render_row = |r: usize| -> String {
278 let cells: Vec<String> = (0..num_cols).map(|c| fmt_cell(&grid[r][c], c)).collect();
279 format!("|{}|", cells.join("|"))
280 };
281
282 let mut lines = Vec::with_capacity(grid.len() + 1);
283 lines.push(render_row(0));
284 let sep: Vec<String> = (0..num_cols).map(|c| "-".repeat(width[c] + 2)).collect();
285 lines.push(format!("|{}|", sep.join("|")));
286 for r in data_rows {
287 lines.push(render_row(r));
288 }
289 lines.join("\n")
290}
291
292fn escape_cell(s: &str) -> String {
295 s.replace('\n', " ").replace('|', "|")
296}
297
298#[cfg(test)]
299mod tests {
300 use super::*;
301
302 #[test]
303 fn renders_headings_paragraphs_and_lists() {
304 let mut doc = DoclingDocument::new("demo");
305 doc.add_heading(1, "Title");
306 doc.add_paragraph("Hello world.");
307 doc.push(Node::ListItem {
308 ordered: false,
309 number: 1,
310 first_in_list: true,
311 text: "first".into(),
312 level: 0,
313 });
314 doc.push(Node::ListItem {
315 ordered: false,
316 number: 2,
317 first_in_list: false,
318 text: "second".into(),
319 level: 0,
320 });
321 let md = doc.export_to_markdown();
322 assert_eq!(md, "# Title\n\nHello world.\n\n- first\n- second\n");
323 }
324
325 #[test]
326 fn renders_github_table() {
327 let mut doc = DoclingDocument::new("t");
328 doc.push(Node::Table(Table {
329 rows: vec![vec!["a".into(), "b".into()], vec!["1".into(), "2".into()]],
330 }));
331 let md = doc.export_to_markdown();
332 assert_eq!(md, "| a | b |\n|-----|-----|\n| 1 | 2 |\n");
335 }
336}