Skip to main content

oxidize_pdf/pipeline/
export.rs

1use crate::pipeline::Element;
2
3/// Configuration for element-aware markdown export.
4#[derive(Debug, Clone)]
5pub struct ExportConfig {
6    /// Include Header and Footer elements in output (default: false).
7    pub include_headers_footers: bool,
8}
9
10impl Default for ExportConfig {
11    fn default() -> Self {
12        Self {
13            include_headers_footers: false,
14        }
15    }
16}
17
18/// Exports a slice of [`Element`]s to Markdown format.
19#[derive(Debug, Clone, Default)]
20pub struct ElementMarkdownExporter {
21    pub config: ExportConfig,
22}
23
24impl ElementMarkdownExporter {
25    pub fn new(config: ExportConfig) -> Self {
26        Self { config }
27    }
28
29    /// Export elements to a Markdown string.
30    pub fn export(&self, elements: &[Element]) -> String {
31        if elements.is_empty() {
32            return String::new();
33        }
34        let mut parts: Vec<String> = Vec::new();
35        for element in elements {
36            if let Some(md) = self.element_to_markdown(element) {
37                parts.push(md);
38            }
39        }
40        parts.join("\n\n")
41    }
42
43    fn element_to_markdown(&self, element: &Element) -> Option<String> {
44        match element {
45            Element::Title(d) => Some(format!("# {}", d.text.trim())),
46            Element::Paragraph(d) => Some(d.text.trim().to_string()),
47            Element::ListItem(d) => Some(format!("- {}", d.text.trim())),
48            Element::KeyValue(kv) => Some(format!("**{}**: {}", kv.key.trim(), kv.value.trim())),
49            Element::CodeBlock(d) => Some(format!("```\n{}\n```", d.text.trim())),
50            Element::Image(img) => {
51                let alt = img.alt_text.as_deref().unwrap_or("");
52                Some(format!("![{}]()", alt))
53            }
54            Element::Table(t) => Some(table_to_markdown(&t.rows)),
55            Element::Header(_) | Element::Footer(_) => {
56                if self.config.include_headers_footers {
57                    Some(element.display_text())
58                } else {
59                    None
60                }
61            }
62        }
63    }
64}
65
66fn table_to_markdown(rows: &[Vec<String>]) -> String {
67    if rows.is_empty() {
68        return String::new();
69    }
70    let mut lines = Vec::new();
71    lines.push(format!("| {} |", rows[0].join(" | ")));
72    let sep: Vec<&str> = vec!["---"; rows[0].len()];
73    lines.push(format!("| {} |", sep.join(" | ")));
74    for row in &rows[1..] {
75        lines.push(format!("| {} |", row.join(" | ")));
76    }
77    lines.join("\n")
78}