latex_to_html/
math_svg.rs

1use crate::ast::*;
2use indoc::{formatdoc, writedoc};
3use itertools::Itertools;
4use rayon::prelude::*;
5use sha2::{Digest, Sha256};
6use std::collections::HashSet;
7use std::fmt::{self, Display, Formatter};
8use std::fs::{self, File, OpenOptions};
9use std::io;
10use std::io::Write as IoWrite;
11use std::path::Path;
12use std::process::{self, Command};
13use std::sync::mpsc::channel;
14use tempdir::TempDir;
15
16fn write_latex(out: &mut impl io::Write, preamble: &[&str], latex: &str) -> Result<(), io::Error> {
17    let preamble = preamble
18        .iter()
19        .copied()
20        .format_with("\n", |line, f| f(&format_args!("{}", line)));
21    // TODO: Get rid of mathtools here?
22    writedoc! {out, r#"
23        \documentclass{{minimal}}
24        {preamble}
25        \usepackage{{mathtools}}
26        \mathtoolsset{{showonlyrefs}}
27        \begin{{document}}
28        {latex}
29        \end{{document}}
30    "#}
31}
32
33#[derive(Debug)]
34pub enum LatexToSvgError {
35    Io(io::Error),
36    PdfLatex(process::Output),
37    PdfCrop(process::Output),
38    Pdf2Svg(process::Output),
39    BadSvg,
40}
41
42impl From<io::Error> for LatexToSvgError {
43    fn from(err: io::Error) -> LatexToSvgError {
44        LatexToSvgError::Io(err)
45    }
46}
47
48pub fn pdf_latex(tex_file_path: &Path) -> Result<process::Output, io::Error> {
49    let mut cmd = Command::new("pdflatex");
50    if let Some(parent) = tex_file_path.parent() {
51        cmd.current_dir(parent);
52    }
53    cmd.arg("-interaction=nonstopmode");
54    cmd.arg(&tex_file_path);
55    let output = cmd.output()?;
56    Ok(output)
57}
58
59pub enum PreambleDiagnosis<'a> {
60    Ok(process::Output),
61    OffendingLines(process::Output, &'a [&'a str]),
62}
63
64pub fn dummy_pdf_latex(preamble: &[&str]) -> Result<process::Output, io::Error> {
65    let dummy_content = "$123$";
66
67    let tmp_dir = TempDir::new("latex-to-html")?;
68    let tex_file_path = tmp_dir.path().join("doc.tex");
69    let mut tex_file = File::create(&tex_file_path)?;
70    write_latex(&mut tex_file, preamble, dummy_content)?;
71    pdf_latex(&tex_file_path)
72}
73
74pub fn has_even_curly_braces(preamble_part: &[&str]) -> bool {
75    let mut open = 0;
76    let mut close = 0;
77    for c in preamble_part
78        .iter()
79        .copied()
80        .map(|line| line.chars())
81        .flatten()
82    {
83        if c == '{' {
84            open += 1;
85        }
86        if c == '}' {
87            close += 1;
88        }
89    }
90
91    open == close
92}
93
94pub fn split_preamble(preamble_part: &[&str]) -> Option<usize> {
95    if preamble_part.len() < 2 {
96        return None;
97    }
98
99    let mut split_index = preamble_part.len() / 2;
100
101    // If the split resulted in an unmatched curly braces in the lower part, reduce the split index
102    // until curly braces are matched.
103    while split_index > 0 && !has_even_curly_braces(&preamble_part[0..split_index]) {
104        split_index -= 1;
105    }
106
107    // If we couldn't find a split with matching curly braces, try again but this time increase the
108    // split index.
109    if split_index == 0 {
110        split_index = preamble_part.len() / 2;
111        while split_index < preamble_part.len()
112            && !has_even_curly_braces(&preamble_part[0..split_index])
113        {
114            split_index += 1;
115        }
116        if split_index == preamble_part.len() {
117            return None;
118        }
119    }
120
121    Some(split_index)
122}
123
124pub fn diagnose_preamble<'a>(preamble: &'a [&'a str]) -> Result<PreambleDiagnosis<'a>, io::Error> {
125    let output = dummy_pdf_latex(preamble)?;
126    if output.status.success() {
127        return Ok(PreambleDiagnosis::Ok(output));
128    }
129
130    let mut known_good = 0;
131    let mut known_bad = preamble.len();
132    let mut bad_output = output;
133
134    while let Some(split_index) = split_preamble(&preamble[known_good..known_bad]) {
135        let split_index = split_index + known_good;
136        let output = dummy_pdf_latex(&preamble[0..split_index])?;
137        if output.status.success() {
138            known_good = split_index;
139        } else {
140            bad_output = output;
141            known_bad = split_index;
142        }
143    }
144
145    Ok(PreambleDiagnosis::OffendingLines(
146        bad_output,
147        &preamble[known_good..known_bad],
148    ))
149}
150
151pub fn latex_to_svg(preamble: &[&str], latex: &str) -> Result<String, LatexToSvgError> {
152    let tmp_dir = TempDir::new("latex-to-html")?;
153
154    let tex_file_path = tmp_dir.path().join("doc.tex");
155    let pdf_file_path = tmp_dir.path().join("doc.pdf");
156    let pdf_crop_file_path = tmp_dir.path().join("doc-crop.pdf");
157    let svg_file_path = tmp_dir.path().join("doc.svg");
158
159    let mut tex_file = File::create(&tex_file_path).map_err(LatexToSvgError::Io)?;
160    write_latex(&mut tex_file, preamble, latex)?;
161
162    let pdf_latex_output = pdf_latex(&tex_file_path)?;
163    if !pdf_latex_output.status.success() {
164        return Err(LatexToSvgError::PdfLatex(pdf_latex_output));
165    }
166
167    let mut pdf_crop_cmd = Command::new("pdfcrop");
168    pdf_crop_cmd.current_dir(tmp_dir.path());
169    pdf_crop_cmd.arg(&pdf_file_path);
170    pdf_crop_cmd.arg(&pdf_crop_file_path);
171    let pdf_crop_output = pdf_crop_cmd.output()?;
172    if !pdf_crop_output.status.success() {
173        return Err(LatexToSvgError::PdfCrop(pdf_crop_output));
174    }
175
176    let mut pdf2svg_cmd = Command::new("pdf2svg");
177    pdf2svg_cmd.current_dir(tmp_dir.path());
178    pdf2svg_cmd.arg(&pdf_crop_file_path);
179    pdf2svg_cmd.arg(&svg_file_path);
180    let pdf2svg_output = pdf2svg_cmd.output()?;
181    if !pdf2svg_output.status.success() {
182        return Err(LatexToSvgError::Pdf2Svg(pdf2svg_output));
183    }
184
185    let svg = std::fs::read_to_string(&svg_file_path)?;
186    Ok(svg)
187}
188
189pub struct SvgInfo {
190    pub width_em: f64,
191    pub height_em: f64,
192    pub baseline_em: Option<f64>,
193}
194
195// Converts the dimensions of the svg from pt to em. Returns (width, height) in em.
196pub fn svg_dimensions_to_em(svg: &mut minidom::Element) -> Result<(f64, f64), LatexToSvgError> {
197    let bad_svg = || LatexToSvgError::BadSvg;
198
199    let width_attr = svg.attr("width").ok_or(bad_svg())?;
200    let width_pt: f64 = width_attr
201        .strip_suffix("pt")
202        .ok_or(bad_svg())?
203        .parse()
204        .map_err(|_| bad_svg())?;
205    let width_em = width_pt / 10.0;
206
207    let height_attr = svg.attr("height").ok_or(bad_svg())?;
208    let height_pt: f64 = height_attr
209        .strip_suffix("pt")
210        .ok_or(bad_svg())?
211        .parse()
212        .map_err(|_| bad_svg())?;
213    let height_em = height_pt / 10.0;
214
215    svg.set_attr("width", format!("{width_em}em"));
216    svg.set_attr("height", format!("{height_em}em"));
217
218    Ok((width_em, height_em))
219}
220
221// Removes the baseline point from the svg. Returns the y coordinate of the center of the point,
222// i.e. the y-coordinate that corresponds to the baseline.
223pub fn remove_baseline_point(svg_el: &mut minidom::Element) -> Result<f64, LatexToSvgError> {
224    let bad_svg = || LatexToSvgError::BadSvg;
225
226    let g_el: &mut minidom::element::Element = svg_el
227        .get_child_mut("g", minidom::NSChoice::Any)
228        .ok_or(bad_svg())?;
229    if g_el.attr("id") != Some("surface1") {
230        return Err(LatexToSvgError::BadSvg);
231    }
232
233    let path_el = g_el
234        .remove_child("path", minidom::NSChoice::Any)
235        .ok_or(bad_svg())?;
236    let transform_attr = path_el.attr("transform").ok_or(bad_svg())?;
237
238    let y_substr_begin = 1 + transform_attr.rfind(",").ok_or(bad_svg())?;
239    let y_substr_end = transform_attr.rfind(")").ok_or(bad_svg())?;
240    let y_str = &transform_attr[y_substr_begin..y_substr_end];
241
242    let y: f64 = y_str.parse().map_err(|_| bad_svg())?;
243
244    let baseline_em = (y + 0.5) / 10.0;
245    Ok(baseline_em)
246}
247
248pub fn math_to_svg(
249    preamble: &[&str],
250    math: &Math,
251) -> Result<(minidom::Element, SvgInfo), LatexToSvgError> {
252    use Math::*;
253    let latex = match math {
254        Inline(content) => {
255            formatdoc! {r#"
256                    $\makebox[0pt][l]{{\rule{{1pt}}{{1pt}}}}{content}$
257                "#}
258        }
259        Display { source, .. } | Mathpar { source, .. } => source.to_string(),
260    };
261
262    let svg = latex_to_svg(preamble, &latex)?;
263    let bad_svg = || LatexToSvgError::BadSvg;
264    let mut svg_el: minidom::Element = svg.parse().map_err(|_| bad_svg())?;
265    let (width_em, height_em) = svg_dimensions_to_em(&mut svg_el)?;
266
267    let baseline_em = match math {
268        Inline(_) => Some(remove_baseline_point(&mut svg_el)?),
269        Display { .. } | Mathpar { .. } => None,
270    };
271
272    Ok((
273        svg_el,
274        SvgInfo {
275            width_em,
276            height_em,
277            baseline_em,
278        },
279    ))
280}
281
282#[derive(Copy, Debug, Clone, PartialEq, Eq, Hash)]
283pub struct MathDigest(pub [u8; 32]);
284
285impl Display for MathDigest {
286    fn fmt(&self, out: &mut Formatter) -> fmt::Result {
287        write!(out, "{}", hex::encode(self.0))?;
288        Ok(())
289    }
290}
291
292pub fn hash_math(preamble: &[&str], math: &Math) -> MathDigest {
293    let mut hasher = Sha256::new();
294
295    for line in preamble {
296        hasher.update(line.as_bytes());
297    }
298
299    use Math::*;
300    match math {
301        Inline(source) => {
302            hasher.update(&[0]);
303            hasher.update(source);
304        }
305        Display { source, label: _ } => {
306            hasher.update(&[1]);
307            hasher.update(source);
308        }
309        Mathpar { source, label: _ } => {
310            hasher.update(&[2]);
311            hasher.update(source);
312        }
313    }
314
315    MathDigest(hasher.finalize().as_slice().try_into().unwrap())
316}
317
318pub const SVG_OUT_DIR: &'static str = "img-math";
319
320pub fn emit_math_svg_files<'a, 'b>(
321    out_dir: &'a Path,
322    preamble: &'b [&'b str],
323    math: &[&'b Math<'b>],
324) -> Result<(), (&'b Math<'b>, LatexToSvgError)> {
325    let out_dir = out_dir.join(SVG_OUT_DIR);
326    fs::create_dir_all(&out_dir).unwrap();
327
328    // Collect all math nodes that need to be compiled. There may be duplicate new math nodes;
329    // these need to be compiled only once. We detect duplicates by saving digests in a hash set.
330    let mut old_math_digests: HashSet<MathDigest> = HashSet::new();
331    let new_math: Vec<&'b Math<'b>> = math
332        .iter()
333        .copied()
334        .filter(|math| {
335            let digest = hash_math(preamble, &math);
336            let svg_path = out_dir.join(&format!("{digest}.svg"));
337            let is_new = !old_math_digests.contains(&digest) && !svg_path.exists();
338            old_math_digests.insert(digest);
339            is_new
340        })
341        .collect();
342
343    // Compile math nodes to svgs in parallel. We write to temporary files first and rename later
344    // to ensure consistency: We don't want files containing only partial contents, and we want to
345    // have the geometry information in the css file if the svg file exists.
346    let (compiled_math_sender, compiled_math_receiver) = channel::<(&'b Math<'b>, SvgInfo)>();
347    let compile_math_result: Result<(), (&'b Math<'b>, LatexToSvgError)> =
348        new_math.par_iter().copied().try_for_each_with(
349            compiled_math_sender,
350            |compiled_math_sender, math: &'b Math<'b>| {
351                let digest = hash_math(preamble, &math);
352                let svg_path_tmp = out_dir.join(&format!("{digest}.svg.tmp"));
353
354                let (svg, svg_info) = math_to_svg(preamble, math).map_err(|err| (math, err))?;
355                fs::write(&svg_path_tmp, &String::from(&svg)).unwrap();
356                compiled_math_sender.send((math, svg_info)).unwrap();
357                Ok(())
358            },
359        );
360    let compiled_math: Vec<(&'b Math<'b>, SvgInfo)> = compiled_math_receiver.iter().collect();
361
362    // Open the css file containing geometry information about the svgs. We append if it already
363    // exists and create otherwise.
364    let geometry_path = out_dir.join("geometry.css");
365    let mut geometry_file = OpenOptions::new()
366        .write(true)
367        .append(true)
368        .create(true)
369        .open(geometry_path)
370        .unwrap();
371
372    // Write geometry info for new math svgs to the css file.
373    for (math, svg_info) in compiled_math.iter() {
374        let SvgInfo {
375            width_em,
376            height_em,
377            baseline_em,
378        } = svg_info;
379
380        let top_em = match baseline_em {
381            None => 0.0,
382            Some(baseline_em) => height_em - baseline_em,
383        };
384
385        let digest = hash_math(preamble, &math);
386        writedoc! {geometry_file, r#"
387            img[src$="{digest}.svg"] {{
388                width: {width_em}em;
389                height: {height_em}em;
390                top: {top_em}em;
391            }}
392        "#}
393        .unwrap();
394    }
395    geometry_file.sync_data().unwrap();
396
397    // Rename temporary svg files.
398    for (math, _) in compiled_math.iter() {
399        let digest = hash_math(preamble, &math);
400        let svg_path = out_dir.join(&format!("{digest}.svg"));
401        let svg_path_tmp = out_dir.join(&format!("{digest}.svg.tmp"));
402
403        fs::rename(svg_path_tmp, svg_path).unwrap();
404    }
405
406    compile_math_result
407}