Skip to main content

lex_babel/formats/pdf/
mod.rs

1//! PDF export built on top of the HTML serializer + headless Chrome.
2//!
3//! The implementation renders Lex documents to HTML using the existing HTML
4//! format, injects page-size specific CSS, then shells out to a Chrome/Chromium
5//! binary running in headless mode to print the page to PDF.
6
7use crate::error::FormatError;
8use crate::format::{Format, SerializedDocument};
9use crate::formats::html::HtmlFormat;
10use lex_core::lex::ast::Document;
11use std::collections::HashMap;
12use std::env;
13use std::fs;
14use std::io::Write;
15use std::path::PathBuf;
16use std::process::Command;
17use tempfile::tempdir;
18use url::Url;
19use which::which;
20
21/// Format implementation that shells out to Chrome/Chromium to generate PDFs.
22#[derive(Default)]
23pub struct PdfFormat {
24    html: HtmlFormat,
25}
26
27impl PdfFormat {
28    pub fn new() -> Self {
29        Self {
30            html: HtmlFormat::default(),
31        }
32    }
33}
34
35impl Format for PdfFormat {
36    fn name(&self) -> &str {
37        "pdf"
38    }
39
40    fn description(&self) -> &str {
41        "HTML-based PDF export via headless Chrome"
42    }
43
44    fn file_extensions(&self) -> &[&str] {
45        &["pdf"]
46    }
47
48    fn supports_serialization(&self) -> bool {
49        true
50    }
51
52    fn serialize(&self, _doc: &Document) -> Result<String, FormatError> {
53        Err(FormatError::NotSupported(
54            "PDF serialization produces binary output".to_string(),
55        ))
56    }
57
58    fn serialize_with_options(
59        &self,
60        doc: &Document,
61        options: &HashMap<String, String>,
62    ) -> Result<SerializedDocument, FormatError> {
63        let profile = PdfSizeProfile::from_options(options)?;
64        let html = self.html.serialize(doc)?;
65        let final_html = inject_page_css(&html, profile.print_css());
66        let pdf_bytes = render_html_to_pdf(&final_html, profile)?;
67        Ok(SerializedDocument::Binary(pdf_bytes))
68    }
69}
70
71#[derive(Clone, Copy, Debug, PartialEq, Eq)]
72enum PdfSizeProfile {
73    LexEd,
74    Mobile,
75}
76
77impl PdfSizeProfile {
78    fn from_options(options: &HashMap<String, String>) -> Result<Self, FormatError> {
79        let mobile = parse_bool_flag(options, "size-mobile", false)?;
80        let lexed = parse_bool_flag(options, "size-lexed", !mobile)?;
81
82        if mobile && lexed {
83            return Err(FormatError::SerializationError(
84                "Cannot enable both lexed and mobile PDF sizing at once".to_string(),
85            ));
86        }
87
88        if mobile {
89            Ok(PdfSizeProfile::Mobile)
90        } else {
91            Ok(PdfSizeProfile::LexEd)
92        }
93    }
94
95    fn print_css(&self) -> &'static str {
96        match self {
97            PdfSizeProfile::LexEd =>
98                "@page { size: 210mm 297mm; margin: 18mm; }\nbody { margin: 0; }\n",
99            PdfSizeProfile::Mobile =>
100                "@page { size: 90mm 160mm; margin: 5mm; }\nbody { margin: 0; }\n.lex-document { max-width: calc(90mm - 10mm); }\n",
101        }
102    }
103
104    fn viewport(&self) -> (u32, u32) {
105        match self {
106            PdfSizeProfile::LexEd => (1280, 960),
107            PdfSizeProfile::Mobile => (450, 900),
108        }
109    }
110}
111
112fn parse_bool_flag(
113    options: &HashMap<String, String>,
114    key: &str,
115    default: bool,
116) -> Result<bool, FormatError> {
117    if let Some(value) = options.get(key) {
118        if value.is_empty() {
119            return Ok(true);
120        }
121        match value.to_lowercase().as_str() {
122            "true" | "1" | "yes" | "y" => Ok(true),
123            "false" | "0" | "no" | "n" => Ok(false),
124            other => Err(FormatError::SerializationError(format!(
125                "Invalid boolean value '{other}' for --extra-{key}"
126            ))),
127        }
128    } else {
129        Ok(default)
130    }
131}
132
133fn inject_page_css(html: &str, css: &str) -> String {
134    let style_tag = format!("<style data-lex-pdf>\n{css}\n</style>");
135    if let Some(idx) = html.find("</head>") {
136        let mut output = String::with_capacity(html.len() + style_tag.len());
137        output.push_str(&html[..idx]);
138        output.push_str(&style_tag);
139        output.push_str(&html[idx..]);
140        output
141    } else {
142        format!("{style_tag}{html}")
143    }
144}
145
146fn render_html_to_pdf(html: &str, profile: PdfSizeProfile) -> Result<Vec<u8>, FormatError> {
147    let chrome = resolve_chrome_binary()?;
148    let temp_dir =
149        tempdir().map_err(|e| FormatError::SerializationError(format!("Temp dir error: {e}")))?;
150    let html_path = temp_dir.path().join("lex-export.html");
151    let mut html_file =
152        fs::File::create(&html_path).map_err(|e| FormatError::SerializationError(e.to_string()))?;
153    html_file
154        .write_all(html.as_bytes())
155        .map_err(|e| FormatError::SerializationError(e.to_string()))?;
156
157    let pdf_path = temp_dir.path().join("lex-export.pdf");
158    let file_url = Url::from_file_path(&html_path).map_err(|_| {
159        FormatError::SerializationError(
160            "Failed to construct file:// URL for HTML input".to_string(),
161        )
162    })?;
163
164    let pdf_arg = format!("--print-to-pdf={}", pdf_path.display());
165    let window_arg = {
166        let (w, h) = profile.viewport();
167        format!("--window-size={w},{h}")
168    };
169
170    let status = Command::new(&chrome)
171        .arg("--headless=new")
172        .arg("--disable-gpu")
173        .arg("--no-sandbox")
174        .arg("--disable-dev-shm-usage")
175        .arg("--no-pdf-header-footer")
176        .arg(pdf_arg)
177        .arg(window_arg)
178        .arg(file_url.as_str())
179        .status()
180        .map_err(|e| {
181            FormatError::SerializationError(format!(
182                "Failed to launch Chrome ({}): {}",
183                chrome.display(),
184                e
185            ))
186        })?;
187
188    if !status.success() {
189        return Err(FormatError::SerializationError(format!(
190            "Chrome exited with status {status}"
191        )));
192    }
193
194    fs::read(&pdf_path).map_err(|e| FormatError::SerializationError(e.to_string()))
195}
196
197fn resolve_chrome_binary() -> Result<PathBuf, FormatError> {
198    if let Some(path) = env::var_os("LEX_CHROME_BIN") {
199        if !path.is_empty() {
200            return Ok(PathBuf::from(path));
201        }
202    }
203
204    for var in ["GOOGLE_CHROME_BIN", "CHROME_BIN"] {
205        if let Some(path) = env::var_os(var) {
206            if !path.is_empty() {
207                return Ok(PathBuf::from(path));
208            }
209        }
210    }
211
212    for candidate in [
213        "google-chrome",
214        "google-chrome-stable",
215        "chromium",
216        "chromium-browser",
217        "chrome",
218        "msedge",
219    ] {
220        if let Ok(path) = which(candidate) {
221            return Ok(path);
222        }
223    }
224
225    #[cfg(target_os = "macos")]
226    {
227        let default_path = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome";
228        let candidate = PathBuf::from(default_path);
229        if candidate.exists() {
230            return Ok(candidate);
231        }
232    }
233
234    #[cfg(target_os = "windows")]
235    {
236        let candidates = [
237            r"C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe",
238            r"C:\\Program Files (x86)\\Google\\Chrome\\Application\\chrome.exe",
239        ];
240        for candidate in candidates {
241            let path = PathBuf::from(candidate);
242            if path.exists() {
243                return Ok(path);
244            }
245        }
246    }
247
248    #[cfg(target_os = "linux")]
249    {
250        let candidates = [
251            "/usr/bin/google-chrome",
252            "/usr/bin/google-chrome-stable",
253            "/usr/bin/chromium-browser",
254            "/usr/bin/chromium",
255        ];
256        for candidate in candidates {
257            let path = PathBuf::from(candidate);
258            if path.exists() {
259                return Ok(path);
260            }
261        }
262    }
263
264    Err(FormatError::SerializationError(
265        "Unable to locate a Chrome/Chromium binary. Set LEX_CHROME_BIN to override the detection."
266            .to_string(),
267    ))
268}