Skip to main content

tectonic/
driver.rs

1// Copyright 2018-2022 the Tectonic Project
2// Licensed under the MIT License.
3
4//! The high-level Tectonic document processing interface.
5//!
6//! The main struct in this module is [`ProcessingSession`], which knows how to
7//! run (and re-run if necessary) the various engines in the right order. Such a
8//! session can be created with a [`ProcessingSessionBuilder`], which you might
9//! obtain from a [`tectonic_docmodel::document::Document`] using the
10//! [`crate::docmodel::DocumentExt::setup_session`] extension method, if you’re
11//! using the Tectonic document model. You can set one up manually if not.
12//!
13//! For an example of how to use this module, see `src/bin/tectonic/main.rs`,
14//! which contains tectonic's main CLI program.
15
16use byte_unit::{Byte, UnitType};
17use quick_xml::{events::Event, NsReader};
18use std::{
19    collections::{HashMap, HashSet},
20    fs::File,
21    io::{Cursor, Read, Write},
22    path::{Path, PathBuf},
23    process::Command,
24    rc::Rc,
25    result::Result as StdResult,
26    str::FromStr,
27    time::{Duration, SystemTime},
28};
29use tectonic_bridge_core::{CoreBridgeLauncher, DriverHooks, SecuritySettings, SystemRequestError};
30use tectonic_bundles::Bundle;
31use tectonic_engine_spx2html::AssetSpecification;
32use tectonic_io_base::{
33    digest::DigestData,
34    filesystem::{FilesystemIo, FilesystemPrimaryInputIo},
35    stdstreams::{BufferedPrimaryIo, GenuineStdoutIo},
36    InputHandle, IoProvider, OpenResult, OutputHandle,
37};
38use which::which;
39
40use crate::{
41    ctry, errmsg,
42    errors::{ChainErrCompatExt, ErrorKind, Result},
43    io::{
44        format_cache::FormatCache,
45        memory::{MemoryFileCollection, MemoryIo},
46        InputOrigin,
47    },
48    status::StatusBackend,
49    tt_error, tt_note, tt_warning,
50    unstable_opts::UnstableOptions,
51    BibtexEngine, Spx2HtmlEngine, TexEngine, TexOutcome, XdvipdfmxEngine,
52};
53
54/// Different patterns with which files may have been accessed by the
55/// underlying engines. Once a file is marked as ReadThenWritten or
56/// WrittenThenRead, its pattern does not evolve further.
57#[derive(Clone, Copy, Debug, Eq, PartialEq)]
58enum AccessPattern {
59    /// This file is only ever read.
60    Read,
61
62    /// This file is only ever written. This suggests that it is
63    /// a final output of the processing session.
64    Written,
65
66    /// This file is read, then written. We call this a "circular" access
67    /// pattern. Multiple passes of an engine will result in outputs that
68    /// change if this file's contents change, or if the file did not exist at
69    /// the time of the first pass.
70    ReadThenWritten,
71
72    /// This file is written, then read. We call this a "temporary" access
73    /// pattern. This file is likely a temporary buffer that is not of
74    /// interest to the user.
75    WrittenThenRead,
76}
77
78/// A summary of the I/O that happened on a file. We record its access
79/// pattern; where it came from, if it was used as an input; the cryptographic
80/// digest of the file when it was last read; and the cryptographic digest of
81/// the file as it was last written.
82#[derive(Clone, Debug, Eq, PartialEq)]
83struct FileSummary {
84    access_pattern: AccessPattern,
85
86    /// If this file was read, where did it come from?
87    pub input_origin: InputOrigin,
88
89    /// If this file was read, this is the digest of its contents at the time it was *first* read.
90    /// The "first" is significant for files that were read and then written (for example, `.aux`
91    /// files).
92    ///
93    /// There's some chance that this will be `None` even if the file was read. Tectonic makes an
94    /// effort to compute the digest as the data is being read from the file, but this can fail if
95    /// tex decides to seek in the file as it is being written.
96    pub read_digest: Option<DigestData>,
97
98    /// If this file was written, this is the digest of its contents at the time it was last
99    /// written.
100    pub write_digest: Option<DigestData>,
101
102    got_written_to_disk: bool,
103}
104
105impl FileSummary {
106    fn new(access_pattern: AccessPattern, input_origin: InputOrigin) -> FileSummary {
107        FileSummary {
108            access_pattern,
109            input_origin,
110            read_digest: None,
111            write_digest: None,
112            got_written_to_disk: false,
113        }
114    }
115}
116
117/// The different types of output files that tectonic knows how to produce.
118#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
119pub enum OutputFormat {
120    /// A '.aux' file.
121    Aux,
122    /// A '.html' file.
123    Html,
124    /// An extended DVI file.
125    Xdv,
126    /// A '.pdf' file.
127    #[default]
128    Pdf,
129    /// A '.fmt' file, for initializing the TeX engine.
130    Format,
131}
132
133impl FromStr for OutputFormat {
134    type Err = &'static str;
135
136    fn from_str(a_str: &str) -> StdResult<Self, Self::Err> {
137        match a_str {
138            "aux" => Ok(OutputFormat::Aux),
139            "html" => Ok(OutputFormat::Html),
140            "xdv" => Ok(OutputFormat::Xdv),
141            "pdf" => Ok(OutputFormat::Pdf),
142            "fmt" => Ok(OutputFormat::Format),
143            _ => Err("unsupported or unknown format"),
144        }
145    }
146}
147
148/// The different types of "passes" that [`ProcessingSession`] knows how to run. See
149/// [`ProcessingSession::run`] for more details.
150#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
151pub enum PassSetting {
152    /// The default pass, which repeatedly runs TeX and BibTeX until it doesn't need to any more.
153    #[default]
154    Default,
155    /// Just run the TeX engine once.
156    Tex,
157    /// Like the default pass, but runs BibTeX once first, before doing anything else.
158    BibtexFirst,
159}
160
161impl FromStr for PassSetting {
162    type Err = &'static str;
163
164    fn from_str(a_str: &str) -> StdResult<Self, Self::Err> {
165        match a_str {
166            "default" => Ok(PassSetting::Default),
167            "bibtex_first" => Ok(PassSetting::BibtexFirst),
168            "tex" => Ok(PassSetting::Tex),
169            _ => Err("unsupported or unknown pass setting"),
170        }
171    }
172}
173
174/// Different places from which the "primary input" might originate.
175#[derive(Clone, Debug, Default, Eq, PartialEq)]
176enum PrimaryInputMode {
177    /// This process's standard input.
178    #[default]
179    Stdin,
180
181    /// A path on the filesystem.
182    Path(PathBuf),
183
184    /// An in-memory buffer.
185    Buffer(Vec<u8>),
186}
187
188/// Different places where the output files might land.
189#[derive(Clone, Debug, Default, Eq, PartialEq)]
190enum OutputDestination {
191    /// The "sensible" default. Files will land in the same directory as the
192    /// input file, or the current working directory if the input is something
193    /// without a path (such as standard input).
194    #[default]
195    Default,
196
197    /// Files should land in this particular directory.
198    Path(PathBuf),
199
200    /// Files will not be written to disk. The code running the engine should
201    /// examine the memory layer of the I/O stack to obtain the output files.
202    Nowhere,
203}
204
205/// The subset of the driver state that is captured when running a C/C++ engine.
206///
207/// The main purpose of this type is to implement the [`DriverHooks`] trait,
208/// which is defined by the `tectonic_core_bridge` crate and defines that
209/// interface that the C/C++ processing engines can use to access the outside
210/// world. While these engines are running, they hold a mutable reference to
211/// these data, so it is helpful to separate them out into a sub-structure of
212/// the larger [`ProcessingSession`] type.
213///
214/// Due to the needs of the C/C++ engines, this means that [`BridgeState`] must
215/// hold the fully-prepared I/O stack information as well as the "event"
216/// information that helps the driver implement the rerun logic.
217struct BridgeState {
218    /// I/O for the primary input source. This is boxed since it can come
219    /// from different sources: maybe a file, maybe an in-memory buffer, etc.
220    primary_input: Box<dyn IoProvider>,
221
222    /// I/O for the main backing bundle. This is boxed since there are several
223    /// different bundle implementations that might be used at runtime.
224    bundle: Box<dyn Bundle>,
225
226    /// Memory buffering for files written during processing.
227    mem: MemoryIo,
228
229    /// The main filesystem backing for input files in the project.
230    filesystem: FilesystemIo,
231
232    /// Extra paths we search through for files.
233    extra_search_paths: Vec<FilesystemIo>,
234
235    /// Additional filesystem backing used if "shell escape" functionality is
236    /// activated. If None, we take that to mean that shell-escape is
237    /// disallowed. We have to use a persistent filesystem directory for this
238    /// since some packages perform a whole series of shell-escape operations
239    /// that assume continuity from one to the next.
240    shell_escape_work: Option<FilesystemIo>,
241
242    /// I/O for saving any generated format files.
243    format_cache: FormatCache,
244
245    /// Possible redirection of "standard output" writes to actual standard
246    /// output.
247    genuine_stdout: Option<GenuineStdoutIo>,
248
249    // BEGIN AWARE REPORTS PATCH
250    /// When set, output files stream directly to the filesystem instead of
251    /// buffering in `mem`, giving an O(1) memory footprint independent of
252    /// document size (the XDV and PDF of a large report otherwise sit in
253    /// RAM in their entirety). Tried before all other providers for output
254    /// opens; read-back of written files is served by `filesystem`, which
255    /// shares the same root.
256    disk_outputs: Option<FilesystemIo>,
257    // END AWARE REPORTS PATCH
258
259    /// A possible alternative "primary input" when generating format files. If
260    /// Some(), we're in format-file generation mode; in most cases this is
261    /// None.
262    format_primary: Option<BufferedPrimaryIo>,
263
264    /// The I/O events that occurred while processing.
265    events: HashMap<String, FileSummary>,
266}
267
268impl BridgeState {
269    /// Tell the IoProvider implementation of the bridge state to enter "format
270    /// mode", in which the "primary input" is fixed, based on the requested
271    /// format file name, and filesystem I/O is bypassed.
272    fn enter_format_mode(&mut self, format_file_name: &str) {
273        self.format_primary = Some(BufferedPrimaryIo::from_text(format!(
274            "\\input {format_file_name}"
275        )));
276    }
277
278    /// Leave "format mode".
279    fn leave_format_mode(&mut self) {
280        self.format_primary = None;
281    }
282
283    /// Invoke an external tool as a pass in the processing pipeline.
284    fn external_tool_pass(
285        &mut self,
286        tool: &ExternalToolPass,
287        status: &mut dyn StatusBackend,
288    ) -> Result<()> {
289        status.note_highlighted("Running external tool ", &tool.argv[0], " ...");
290
291        // Process the command arguments. Filenames appearing in the arguments
292        // are treated as "requirements" that will be placed in the tool's
293        // working directory.
294
295        let mut cmd = Command::new(&tool.argv[0]);
296        let mut read_files = tool.extra_requires.clone();
297
298        {
299            let mem_files = &*self.mem.files.borrow();
300
301            for arg in &tool.argv[1..] {
302                cmd.arg(arg);
303
304                if mem_files.contains_key(arg) {
305                    read_files.insert(arg.to_owned());
306                }
307            }
308        }
309
310        // Now that we're validated, write those files to disk so that the tool
311        // can actually use them.
312
313        let tempdir = ctry!(
314            tempfile::Builder::new().tempdir();
315            "can't create temporary directory for external tool"
316        );
317
318        {
319            for name in &read_files {
320                // If a relative parent is found in the file to open, this fn
321                // does not properly handle that. Thus, throw an error.
322                if name.contains("../") {
323                    return Err(errmsg!(
324                        "relative parent paths are not supported for the \
325                        external tool. Got path `{}`.",
326                        name
327                    ));
328                }
329
330                let mut ih = ctry!(
331                    self.input_open_name(name, status).must_exist();
332                    "can't open path `{}`", name
333                );
334
335                // If the input path is absolute, we don't need to create a
336                // version in the tempdir, and in fact the current
337                // implementation below will blow away the input file. However,
338                // we do want to try to open the input so that it gets
339                // registered with the I/O tracking system.
340
341                let path = Path::new(name);
342                if path.is_absolute() {
343                    continue;
344                }
345
346                let tool_path = tempdir.path().join(name);
347                let tool_parent = tool_path.parent().unwrap();
348
349                if tool_parent != tempdir.path() {
350                    ctry!(
351                        std::fs::create_dir_all(tool_parent);
352                        "failed to create sub directory `{}`", tool_parent.display()
353                    );
354                }
355                let mut f = ctry!(
356                    File::create(&tool_path);
357                    "failed to create file `{}`", tool_path.display()
358                );
359                ctry!(
360                    std::io::copy(&mut ih, &mut f);
361                    "failed to write file `{}`", tool_path.display()
362                );
363            }
364        }
365
366        // Now we can actually run the command.
367
368        let output = cmd.current_dir(tempdir.path()).output()?;
369
370        if let Some(0) = output.status.code() {
371        } else {
372            tt_error!(
373                status,
374                "the external tool exited with an error code; its stdout was:\n"
375            );
376            status.dump_error_logs(&output.stdout[..]);
377            tt_error!(status, "its stderr was:\n");
378            status.dump_error_logs(&output.stderr[..]);
379
380            return if let Some(n) = output.status.code() {
381                Err(errmsg!("the external tool exited with error code {}", n))
382            } else {
383                Err(errmsg!("the external tool was terminated by a signal"))
384            };
385        }
386
387        // Search for any files that the tool created, and import them into the
388        // memory layer.
389
390        for entry in std::fs::read_dir(tempdir.path())? {
391            let entry = entry?;
392
393            if !entry.file_type()?.is_file() {
394                continue;
395            }
396
397            if let Some(basename) = entry.file_name().to_str() {
398                if !self.mem.files.borrow().contains_key(basename) {
399                    let path = entry.path();
400                    let mut data = Vec::new();
401
402                    let mut f = ctry!(
403                        File::open(&path);
404                        "failed to open tool-created file `{}`", path.display()
405                    );
406                    ctry!(
407                        f.read_to_end(&mut data);
408                        "failed to read tool-created file `{}`", path.display()
409                    );
410
411                    self.mem.create_entry(basename, data);
412                    self.events.insert(
413                        basename.to_owned(),
414                        FileSummary::new(AccessPattern::Written, InputOrigin::NotInput),
415                    );
416                }
417            }
418        }
419
420        // Mark the input files as having been read, and we're done.
421
422        for name in &read_files {
423            let summ = self.events.get_mut(name).unwrap();
424            summ.access_pattern = match summ.access_pattern {
425                AccessPattern::Written => AccessPattern::WrittenThenRead,
426                c => c, // identity mapping makes sense for remaining options
427            };
428        }
429
430        Ok(())
431    }
432
433    // Get the names of all intermediate files which are generated from
434    // previous passes.
435    fn get_intermediate_file_names(&self) -> Vec<String> {
436        // Currently, we only consider files in memory as intermediate files.
437        return self.mem.files.borrow().keys().cloned().collect();
438    }
439}
440
441macro_rules! bridgestate_ioprovider_try {
442    ($provider:expr, $($inner:tt)+) => {
443        let r = $provider.$($inner)+;
444        match r {
445            OpenResult::NotAvailable => {},
446            _ => return r,
447        };
448    }
449}
450
451macro_rules! bridgestate_ioprovider_cascade {
452    ($self:ident, $($inner:tt)+) => {
453        if let Some(ref mut p) = $self.genuine_stdout {
454            bridgestate_ioprovider_try!(p, $($inner)+);
455        }
456
457        // See enter_format_mode above. If creating a format file, disable local
458        // filesystem I/O.
459        let use_fs = if let Some(ref mut p) = $self.format_primary {
460            bridgestate_ioprovider_try!(p, $($inner)+);
461            false
462        } else {
463            bridgestate_ioprovider_try!($self.primary_input, $($inner)+);
464            true
465        };
466
467        bridgestate_ioprovider_try!($self.mem, $($inner)+);
468
469        if use_fs {
470            bridgestate_ioprovider_try!($self.filesystem, $($inner)+);
471
472            // With this ordering, we are preventing files created by
473            // shell-escape commands from overwriting/replacing source files.
474            // This seems very much like the behavior we want, unless there are
475            // some freaky shell-escape uses that depend on this behavior.
476            if let Some(ref mut p) = $self.shell_escape_work {
477                bridgestate_ioprovider_try!(p, $($inner)+);
478            }
479
480            // Extra search paths. This has higher priority than bundles but lower than current
481            // working dir to support the use case of overriding broken bundles (see issue #816).
482            for fsio in $self.extra_search_paths.iter_mut() {
483                bridgestate_ioprovider_try!(fsio, $($inner)+);
484            }
485        }
486
487        bridgestate_ioprovider_try!($self.bundle.as_ioprovider_mut(), $($inner)+);
488        bridgestate_ioprovider_try!($self.format_cache, $($inner)+);
489
490        return OpenResult::NotAvailable;
491    }
492}
493
494impl IoProvider for BridgeState {
495    fn output_open_name(&mut self, name: &str) -> OpenResult<OutputHandle> {
496        let r = (|| {
497            // BEGIN AWARE REPORTS PATCH
498            if let Some(ref mut p) = self.disk_outputs {
499                bridgestate_ioprovider_try!(p, output_open_name(name));
500            }
501            // END AWARE REPORTS PATCH
502            bridgestate_ioprovider_cascade!(self, output_open_name(name));
503        })();
504
505        if let OpenResult::Ok(_) = r {
506            if let Some(summ) = self.events.get_mut(name) {
507                summ.access_pattern = match summ.access_pattern {
508                    AccessPattern::Read => AccessPattern::ReadThenWritten,
509                    c => c, // identity mapping makes sense for remaining options
510                };
511            } else {
512                self.events.insert(
513                    name.to_owned(),
514                    FileSummary::new(AccessPattern::Written, InputOrigin::NotInput),
515                );
516            }
517        }
518
519        r
520    }
521
522    fn output_open_stdout(&mut self) -> OpenResult<OutputHandle> {
523        let r = (|| {
524            bridgestate_ioprovider_cascade!(self, output_open_stdout());
525        })();
526
527        // Life is easier if we track stdout in the same way that we do other
528        // output files.
529
530        if let OpenResult::Ok(_) = r {
531            if let Some(summ) = self.events.get_mut("") {
532                summ.access_pattern = match summ.access_pattern {
533                    AccessPattern::Read => AccessPattern::ReadThenWritten,
534                    c => c, // identity mapping makes sense for remaining options
535                };
536            } else {
537                self.events.insert(
538                    String::from(""),
539                    FileSummary::new(AccessPattern::Written, InputOrigin::NotInput),
540                );
541            }
542        }
543
544        r
545    }
546
547    fn input_open_name(
548        &mut self,
549        name: &str,
550        status: &mut dyn StatusBackend,
551    ) -> OpenResult<InputHandle> {
552        match self.input_open_name_with_abspath(name, status) {
553            OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih),
554            OpenResult::Err(e) => OpenResult::Err(e),
555            OpenResult::NotAvailable => OpenResult::NotAvailable,
556        }
557    }
558
559    fn input_open_name_with_abspath(
560        &mut self,
561        name: &str,
562        status: &mut dyn StatusBackend,
563    ) -> OpenResult<(InputHandle, Option<PathBuf>)> {
564        let r = (|| {
565            bridgestate_ioprovider_cascade!(self, input_open_name_with_abspath(name, status));
566        })();
567
568        match r {
569            OpenResult::Ok((ref ih, ref _path)) => {
570                if let Some(summ) = self.events.get_mut(name) {
571                    summ.access_pattern = match summ.access_pattern {
572                        AccessPattern::Written => AccessPattern::WrittenThenRead,
573                        c => c, // identity mapping makes sense for remaining options
574                    };
575                } else {
576                    self.events.insert(
577                        name.to_owned(),
578                        FileSummary::new(AccessPattern::Read, ih.origin()),
579                    );
580                }
581            }
582
583            OpenResult::NotAvailable => {
584                // For the purposes of file access pattern tracking, an attempt to
585                // open a nonexistent file counts as a read of a zero-size file. I
586                // don't see how such a file could have previously been written, but
587                // let's use the full update logic just in case.
588
589                if let Some(summ) = self.events.get_mut(name) {
590                    summ.access_pattern = match summ.access_pattern {
591                        AccessPattern::Written => AccessPattern::WrittenThenRead,
592                        c => c, // identity mapping makes sense for remaining options
593                    };
594                } else {
595                    // Unlike other cases, here we need to fill in the read_digest. `None`
596                    // is not an appropriate value since, if the file is written and then
597                    // read again later, the `None` will be overwritten; but what matters
598                    // is the contents of the file the very first time it was read.
599                    let mut fs = FileSummary::new(AccessPattern::Read, InputOrigin::NotInput);
600                    fs.read_digest = Some(DigestData::of_nothing());
601                    self.events.insert(name.to_owned(), fs);
602                }
603            }
604
605            OpenResult::Err(_) => {}
606        }
607
608        r
609    }
610
611    fn input_open_primary(&mut self, status: &mut dyn StatusBackend) -> OpenResult<InputHandle> {
612        match self.input_open_primary_with_abspath(status) {
613            OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih),
614            OpenResult::Err(e) => OpenResult::Err(e),
615            OpenResult::NotAvailable => OpenResult::NotAvailable,
616        }
617    }
618
619    fn input_open_primary_with_abspath(
620        &mut self,
621        status: &mut dyn StatusBackend,
622    ) -> OpenResult<(InputHandle, Option<PathBuf>)> {
623        bridgestate_ioprovider_cascade!(self, input_open_primary_with_abspath(status));
624    }
625
626    fn input_open_format(
627        &mut self,
628        name: &str,
629        status: &mut dyn StatusBackend,
630    ) -> OpenResult<InputHandle> {
631        let r = (|| {
632            bridgestate_ioprovider_cascade!(self, input_open_format(name, status));
633        })();
634
635        if let OpenResult::Ok(ref ih) = r {
636            if let Some(summ) = self.events.get_mut(name) {
637                summ.access_pattern = match summ.access_pattern {
638                    AccessPattern::Written => AccessPattern::WrittenThenRead,
639                    c => c, // identity mapping makes sense for remaining options
640                };
641            } else {
642                self.events.insert(
643                    name.to_owned(),
644                    FileSummary::new(AccessPattern::Read, ih.origin()),
645                );
646            }
647        }
648
649        r
650    }
651}
652
653impl DriverHooks for BridgeState {
654    fn io(&mut self) -> &mut dyn IoProvider {
655        self
656    }
657
658    fn event_output_closed(&mut self, name: String, digest: DigestData) {
659        let summ = self
660            .events
661            .get_mut(&name)
662            .expect("closing file that wasn't opened?");
663        summ.write_digest = Some(digest);
664    }
665
666    fn event_input_closed(
667        &mut self,
668        name: String,
669        digest: Option<DigestData>,
670        _status: &mut dyn StatusBackend,
671    ) {
672        let summ = self
673            .events
674            .get_mut(&name)
675            .expect("closing file that wasn't opened?");
676
677        // It's what was in the file the *first* time that it was read that
678        // matters, so don't replace the read digest if it's already got one.
679
680        if summ.read_digest.is_none() {
681            summ.read_digest = digest;
682        }
683    }
684
685    fn sysrq_shell_escape(
686        &mut self,
687        command: &str,
688        status: &mut dyn StatusBackend,
689    ) -> StdResult<(), SystemRequestError> {
690        #[cfg(unix)]
691        const SHELL: &[&str] = &["sh", "-c"];
692
693        #[cfg(windows)]
694        const SHELL: &[&str] = &["cmd.exe", "/c"];
695
696        // Write any TeX-created files in the memory cache to the shell-escape
697        // working directory, since the shell-escape program may need to use
698        // them. (This is the case for `minted`.) We basically just hope that
699        // nothing will want to access the actual TeX source, which will live in
700        // a different directory.
701        //
702        // This is suboptimally slow since we'll be rewriting the same files
703        // repeatedly for repeated shell-escape invocations, but I don't feel
704        // like optimizing that I/O right now. Shell-escape is a gnarly hack
705        // anyway!
706
707        if let Some(work) = self.shell_escape_work.as_ref() {
708            for (name, file) in &*self.mem.files.borrow() {
709                // If it's in the `mem` backend, it's of interest here ...
710                // unless it's stdout.
711                if name == self.mem.stdout_key() {
712                    continue;
713                }
714
715                let real_path = work.root().join(name);
716                if let Some(prefix) = real_path.parent() {
717                    std::fs::create_dir_all(prefix).map_err(|e| {
718                        tt_error!(status, "failed to create sub directory `{}`", prefix.display(); e.into());
719                        SystemRequestError::Failed
720                    })?;
721                }
722                let mut f = File::create(&real_path).map_err(|e| {
723                    tt_error!(status, "failed to create file `{}`", real_path.display(); e.into());
724                    SystemRequestError::Failed
725                })?;
726                f.write_all(&file.data).map_err(|e| {
727                    tt_error!(status, "failed to write file `{}`", real_path.display(); e.into());
728                    SystemRequestError::Failed
729                })?;
730            }
731
732            // Now we can actually run the command.
733
734            tt_note!(status, "running shell command: `{}`", command);
735
736            match Command::new(SHELL[0])
737                .args(&SHELL[1..])
738                .arg(command)
739                .current_dir(work.root())
740                .status()
741            {
742                Ok(s) => match s.code() {
743                    Some(0) => Ok(()),
744                    Some(n) => {
745                        tt_warning!(status, "command exited with error code {}", n);
746                        Err(SystemRequestError::Failed)
747                    }
748                    None => {
749                        tt_warning!(status, "command was terminated by signal");
750                        Err(SystemRequestError::Failed)
751                    }
752                },
753                Err(err) => {
754                    tt_warning!(status, "failed to run command"; err.into());
755                    Err(SystemRequestError::Failed)
756                }
757            }
758
759            // That's it! We shouldn't clean up here, because there might be
760            // multiple shell-escapes that build up in sequence, and any new
761            // files created by the shell-escape command will be picked up by
762            // the filesystem I/O.
763        } else {
764            // No shell-escape work directory. This "shouldn't happen" but means
765            // that shell-escape is supposed to be disabled anyway!
766            tt_error!(
767                status,
768                "the engine requested a shell-escape invocation but it's currently disabled"
769            );
770            Err(SystemRequestError::NotAllowed)
771        }
772    }
773}
774
775/// Possible modes for handling shell-escape functionality
776#[derive(Clone, Debug, Default, Eq, PartialEq)]
777enum ShellEscapeMode {
778    /// "Default" mode: shell-escape is disabled, unless it's been turned on in
779    /// the unstable options, in which case it will be allowed through a
780    /// temporary directory.
781    #[default]
782    Defaulted,
783
784    /// Shell-escape is disabled, overriding any unstable-option setting.
785    Disabled,
786
787    /// Shell-escape is enabled, using a temporary work directory managed by the
788    /// processing session. The work directory will be deleted after processing
789    /// completes.
790    TempDir,
791
792    /// Shell-escape is enabled, using some other work directory that is managed
793    /// externally. The processing session won't delete this directory.
794    ExternallyManagedDir(PathBuf),
795}
796
797/// A custom extra pass that invokes an external tool.
798///
799/// This is bad for reproducibility but comes in handy.
800#[derive(Debug)]
801struct ExternalToolPass {
802    argv: Vec<String>,
803    extra_requires: HashSet<String>,
804}
805
806/// A builder-style interface for creating a [`ProcessingSession`].
807///
808/// This uses standard builder patterns. The `Default` implementation defaults
809/// to restrictive security settings that disable all known-insecure features
810/// that could be abused by untrusted inputs. Use
811/// [`ProcessingSessionBuilder::new_with_security()`] in order to have the
812/// option to enable potentially-insecure features such as shell-escape.
813#[derive(Default)]
814pub struct ProcessingSessionBuilder {
815    security: SecuritySettings,
816    primary_input: PrimaryInputMode,
817    tex_input_name: Option<String>,
818    output_dest: OutputDestination,
819    filesystem_root: Option<PathBuf>,
820    format_name: Option<String>,
821    format_cache_path: Option<PathBuf>,
822    output_format: OutputFormat,
823    makefile_output_path: Option<PathBuf>,
824    hidden_input_paths: HashSet<PathBuf>,
825    pass: PassSetting,
826    reruns: Option<usize>,
827    print_stdout: bool,
828    bundle: Option<Box<dyn Bundle>>,
829    keep_intermediates: bool,
830    outputs_to_filesystem: bool,
831    keep_logs: bool,
832    synctex: bool,
833    build_date: Option<SystemTime>,
834    unstables: UnstableOptions,
835    shell_escape_mode: ShellEscapeMode,
836    html_assets_spec_path: Option<String>,
837    html_precomputed_assets: Option<AssetSpecification>,
838    html_do_not_emit_files: bool,
839    html_do_not_emit_assets: bool,
840}
841
842impl ProcessingSessionBuilder {
843    /// Create a new builder with customized security settings.
844    pub fn new_with_security(security: SecuritySettings) -> Self {
845        ProcessingSessionBuilder {
846            security,
847            ..Default::default()
848        }
849    }
850
851    /// Sets the path to the primary input file.
852    ///
853    /// If a primary input path is not specified, we will default to reading it from stdin.
854    pub fn primary_input_path<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
855        self.primary_input = PrimaryInputMode::Path(p.as_ref().to_owned());
856        self
857    }
858
859    /// Sets the primary input to be a caller-specified buffer.
860    ///
861    /// If neither this nor a primary input path is specified, we will default
862    /// to reading the primary input from stdin.
863    pub fn primary_input_buffer(&mut self, buf: &[u8]) -> &mut Self {
864        self.primary_input = PrimaryInputMode::Buffer(buf.to_owned());
865        self
866    }
867
868    /// Sets the name of the main input file.
869    ///
870    /// This value will be used to infer the names of the output files; for example, if
871    /// `tex_input_name` is set to `"texput.tex"` then the pdf output file will be `"texput.pdf"`.
872    /// As such, this parameter is mandatory, even if the real input is coming from stdin (if it is
873    /// not provided, [`ProcessingSessionBuilder::create`] will panic).
874    pub fn tex_input_name(&mut self, s: &str) -> &mut Self {
875        self.tex_input_name = Some(s.to_owned());
876        self
877    }
878
879    /// Set the directory that serves as the root for finding files on disk.
880    ///
881    /// If unspecified, and there is a primary input file, the directory
882    /// containing that file will serve as the filesystem root. Otherwise, it is
883    /// set to the current directory.
884    pub fn filesystem_root<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
885        self.filesystem_root = Some(p.as_ref().to_owned());
886        self
887    }
888
889    /// A path to the directory where output files should be created.
890    ///
891    /// This will default to the directory containing `primary_input_path`, or
892    /// the current working directory if the primary input is coming from
893    /// stdin.
894    pub fn output_dir<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
895        self.output_dest = OutputDestination::Path(p.as_ref().to_owned());
896        self
897    }
898
899    /// Indicate that output files should not be written to disk.
900    ///
901    /// By default, output files will be written to the directory containing
902    /// `primary_input_path`, or the current working directory if the primary
903    /// input is coming from stdin.
904    pub fn do_not_write_output_files(&mut self) -> &mut Self {
905        self.output_dest = OutputDestination::Nowhere;
906        self
907    }
908
909    /// The name of the `.fmt` file used to initialize the TeX engine.
910    ///
911    /// This file does not necessarily have to exist already; it will be created
912    /// if it doesn't. This parameter is mandatory (if it is not provided,
913    /// [`ProcessingSessionBuilder::create`] will panic).
914    pub fn format_name(&mut self, p: &str) -> &mut Self {
915        self.format_name = Some(p.to_owned());
916        self
917    }
918
919    /// Sets the path to the format file cache.
920    ///
921    /// This is used to, well, cache format files, which are generated as
922    /// needed from the backing bundle. Defaults to the same directory as the
923    /// input file, or PWD if the input is a non-file (such as standard
924    /// input).
925    pub fn format_cache_path<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
926        self.format_cache_path = Some(p.as_ref().to_owned());
927        self
928    }
929
930    /// The type of output to create.
931    pub fn output_format(&mut self, f: OutputFormat) -> &mut Self {
932        self.output_format = f;
933        self
934    }
935
936    /// If set, a makefile will be written out at the given path.
937    pub fn makefile_output_path<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
938        self.makefile_output_path = Some(p.as_ref().to_owned());
939        self
940    }
941
942    /// Which kind of pass should the `ProcessingSession` run? Defaults to `PassSetting::Default`
943    /// (duh).
944    pub fn pass(&mut self, p: PassSetting) -> &mut Self {
945        self.pass = p;
946        self
947    }
948
949    /// If set, and if the pass is set to `PassSetting::Default`, the TeX engine will be re-run
950    /// *exactly* this many times.
951    ///
952    /// If `reruns` is unset, we will auto-detect how many times the TeX engine needs to be re-run.
953    pub fn reruns(&mut self, r: usize) -> &mut Self {
954        self.reruns = Some(r);
955        self
956    }
957
958    /// If set to `true`, stdout from the TeX engine will be forwarded to actual stdout. (By
959    /// default, it will be suppressed.)
960    pub fn print_stdout(&mut self, p: bool) -> &mut Self {
961        self.print_stdout = p;
962        self
963    }
964
965    /// Marks a path as hidden, meaning that the TeX engine will pretend that it doesn't exist in
966    /// the filesystem.
967    pub fn hide<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
968        self.hidden_input_paths.insert(p.as_ref().to_owned());
969        self
970    }
971
972    /// Sets the bundle, which the various engines will use for finding style files, font files,
973    /// etc.
974    pub fn bundle(&mut self, b: Box<dyn Bundle>) -> &mut Self {
975        self.bundle = Some(b);
976        self
977    }
978
979    /// If set to `true`, various intermediate files will be written out to the filesystem.
980    pub fn keep_intermediates(&mut self, k: bool) -> &mut Self {
981        self.keep_intermediates = k;
982        self
983    }
984
985    // BEGIN AWARE REPORTS PATCH
986    /// If set to `true`, output files (XDV, PDF, logs, aux files) are
987    /// streamed directly to the filesystem root as the engines write them,
988    /// instead of being buffered in memory until the session ends. This
989    /// keeps the session's memory footprint independent of the document
990    /// size. Files land in the filesystem root regardless of
991    /// `keep_intermediates`.
992    pub fn outputs_to_filesystem(&mut self, k: bool) -> &mut Self {
993        self.outputs_to_filesystem = k;
994        self
995    }
996    // END AWARE REPORTS PATCH
997
998    /// If set to `true`, '.log' and '.blg' files will be written out to the filesystem.
999    pub fn keep_logs(&mut self, k: bool) -> &mut Self {
1000        self.keep_logs = k;
1001        self
1002    }
1003
1004    /// If set to `true`, tex files will be compiled using synctex information.
1005    pub fn synctex(&mut self, s: bool) -> &mut Self {
1006        self.synctex = s;
1007        self
1008    }
1009
1010    /// Sets the date and time of the processing session.
1011    /// See `TexEngine::build_date` for mor information.
1012    pub fn build_date(&mut self, date: SystemTime) -> &mut Self {
1013        self.build_date = Some(date);
1014        self
1015    }
1016
1017    /// Configures the date and time of the processing session from the environment:
1018    /// If `SOURCE_DATE_EPOCH` is set, it's used as the build date.
1019    /// If `force_deterministic` is set, we fall back to UNIX_EPOCH.
1020    /// Otherwise, we use the current system time.
1021    pub fn build_date_from_env(&mut self, force_deterministic: bool) -> &mut Self {
1022        let build_date_str = std::env::var("SOURCE_DATE_EPOCH").ok();
1023        let build_date = match (force_deterministic, build_date_str) {
1024            (_, Some(s)) => {
1025                let epoch = s
1026                    .parse::<u64>()
1027                    .expect("invalid SOURCE_DATE_EPOCH (not a number)");
1028
1029                SystemTime::UNIX_EPOCH
1030                    .checked_add(Duration::from_secs(epoch))
1031                    .expect("time overflow")
1032            }
1033            (true, None) => SystemTime::UNIX_EPOCH,
1034            (false, None) => SystemTime::now(),
1035        };
1036        self.build_date(build_date)
1037    }
1038
1039    /// Loads unstable options into the processing session
1040    pub fn unstables(&mut self, opts: UnstableOptions) -> &mut Self {
1041        self.unstables = opts;
1042        self
1043    }
1044
1045    /// Enable "shell escape" commands in the engines, and use the specified
1046    /// directory for shell-escape work. The caller is responsible for the
1047    /// creation and/or destruction of this directory. The default is to
1048    /// disable shell-escape unless the [`UnstableOptions`] say otherwise,
1049    /// in which case a driver-managed temporary directory will be used.
1050    pub fn shell_escape_with_work_dir<P: AsRef<Path>>(&mut self, path: P) -> &mut Self {
1051        if self.security.allow_shell_escape() {
1052            self.shell_escape_mode =
1053                ShellEscapeMode::ExternallyManagedDir(path.as_ref().to_owned());
1054        }
1055        self
1056    }
1057
1058    /// Forcibly enable shell-escape mode with a temporary directory, overriding
1059    /// any [`UnstableOptions`] settings. The default is to disable shell-escape
1060    /// unless the [`UnstableOptions`] say otherwise, in which case a
1061    /// driver-managed temporary directory will be used.
1062    pub fn shell_escape_with_temp_dir(&mut self) -> &mut Self {
1063        if self.security.allow_shell_escape() {
1064            self.shell_escape_mode = ShellEscapeMode::TempDir;
1065        }
1066        self
1067    }
1068
1069    /// Forcibly disable shell-escape mode, overriding any [`UnstableOptions`]
1070    /// settings. The default is to disable shell-escape unless the
1071    /// [`UnstableOptions`] say otherwise, in which case a driver-managed
1072    /// temporary directory will be used.
1073    pub fn shell_escape_disabled(&mut self) -> &mut Self {
1074        self.shell_escape_mode = ShellEscapeMode::Disabled;
1075        self
1076    }
1077
1078    /// When using HTML mode, emit an asset specification file instead of actual
1079    /// asset files.
1080    ///
1081    /// "Assets" are files like fonts and images that accompany the HTML output
1082    /// generated during processing. By default, these are emitted during
1083    /// processing. If this method is called, the assets will *not* be created.
1084    /// Instead, an "asset specification" file will be emitted to the given
1085    /// output path. This specification file contains the information needed to
1086    /// generate the assets upon a later invocation. Asset specification files
1087    /// can be merged, allowing the results of multiple separate TeX
1088    /// compilations to be synthesized into one HTML output tree.
1089    ///
1090    /// If the build does not use HTML mode, this setting has no effect.
1091    pub fn html_assets_spec_path<S: ToString>(&mut self, path: S) -> &mut Self {
1092        self.html_assets_spec_path = Some(path.to_string());
1093        self
1094    }
1095
1096    /// In HTML mode, use a precomputed asset specification.
1097    ///
1098    /// "Assets" are files like fonts and images that accompany the HTML output
1099    /// generated during processing. By default, the engine gathers these during
1100    /// processing and emits them at the end. After this method is used,
1101    /// however, it will generate HTML outputs assuming the information given in
1102    /// the asset specification given here. If the input calls for new assets or
1103    /// different options inconsistent with the specification, processing will
1104    /// abort with an error.
1105    ///
1106    /// The purpose of this mode is to allow for a unified set of assets to be
1107    /// created from multiple independent runs of the SPX-to-HTML stage. First,
1108    /// the different inputs should be processed independently, and their
1109    /// individual assets should saved. These should then be merged. Then the
1110    /// inputs should be reprocessed, all using the merged asset specification.
1111    /// In one — but only one — of these sessions, the assets should actually be
1112    /// emitted.
1113    pub fn html_precomputed_assets(&mut self, assets: AssetSpecification) -> &mut Self {
1114        self.html_precomputed_assets = Some(assets);
1115        self
1116    }
1117
1118    /// Set whether templated outputs should be created during HTML processing.
1119    ///
1120    /// This mode can be useful if you want to analyze what *would* be created
1121    /// during HTML processing without actually creating the files.
1122    pub fn html_emit_files(&mut self, do_emit: bool) -> &mut Self {
1123        self.html_do_not_emit_files = !do_emit;
1124        self
1125    }
1126
1127    /// Set whether supporting asset files should be created during HTML
1128    /// processing.
1129    ///
1130    /// This mode can be useful if you want to analyze what *would* be created
1131    /// during HTML processing without actually creating the files. If you call
1132    /// [`Self::html_assets_spec_path`], this setting will ignored, and no
1133    /// assets will be emitted to disk.
1134    pub fn html_emit_assets(&mut self, do_emit: bool) -> &mut Self {
1135        self.html_do_not_emit_assets = !do_emit;
1136        self
1137    }
1138
1139    /// Creates a `ProcessingSession`.
1140    pub fn create(self, status: &mut dyn StatusBackend) -> Result<ProcessingSession> {
1141        // First, work on the "bridge state", which gathers the subset of our
1142        // state that has to be held in a mutable reference while running the
1143        // C/C++ engines:
1144
1145        let mut bundle = self.bundle.expect("a bundle must be specified");
1146
1147        let mut filesystem_root = self.filesystem_root.unwrap_or_default();
1148
1149        let (pio, primary_input_path, default_output_path) = match self.primary_input {
1150            PrimaryInputMode::Path(p) => {
1151                // Set the filesystem root (that's the directory we'll search
1152                // for files in) to be the same directory as the main input
1153                // file.
1154                let parent = match p.parent() {
1155                    Some(parent) => parent.to_owned(),
1156                    None => {
1157                        return Err(errmsg!(
1158                            "can't figure out a parent directory for input path \"{}\"",
1159                            p.display()
1160                        ));
1161                    }
1162                };
1163
1164                filesystem_root.clone_from(&parent);
1165                let pio: Box<dyn IoProvider> = Box::new(FilesystemPrimaryInputIo::new(&p));
1166                (pio, Some(p), parent)
1167            }
1168
1169            PrimaryInputMode::Stdin => {
1170                // If the main input file is stdin, we don't set a filesystem
1171                // root, which means we'll default to the current working
1172                // directory.
1173                //
1174                // Note that, due to the expected need to rerun the engine
1175                // multiple times, we'll need to buffer stdin in its entirety,
1176                // so we might as well do that now.
1177                let pio = ctry!(BufferedPrimaryIo::from_stdin(); "error reading standard input");
1178                let pio: Box<dyn IoProvider> = Box::new(pio);
1179                (pio, None, "".into())
1180            }
1181
1182            PrimaryInputMode::Buffer(buf) => {
1183                // Same behavior as with stdin.
1184                let pio: Box<dyn IoProvider> = Box::new(BufferedPrimaryIo::from_buffer(buf));
1185                (pio, None, "".into())
1186            }
1187        };
1188
1189        let format_cache_path = self
1190            .format_cache_path
1191            .unwrap_or_else(|| filesystem_root.clone());
1192        let format_cache = FormatCache::new(bundle.get_digest()?, format_cache_path);
1193
1194        let genuine_stdout = if self.print_stdout {
1195            Some(GenuineStdoutIo::new())
1196        } else {
1197            None
1198        };
1199
1200        // move this out of self to get around borrow checker issues
1201        let hidden_input_paths = self.hidden_input_paths;
1202
1203        let extra_search_paths = if self.security.allow_extra_search_paths() {
1204            self.unstables
1205                .extra_search_paths
1206                .iter()
1207                .map(|p| FilesystemIo::new(p, false, false, hidden_input_paths.clone()))
1208                .collect()
1209        } else {
1210            if !self.unstables.extra_search_paths.is_empty() {
1211                tt_warning!(status, "Extra search path(s) ignored due to security");
1212            }
1213            Vec::new()
1214        };
1215
1216        let filesystem = FilesystemIo::new(&filesystem_root, false, true, hidden_input_paths);
1217
1218        let mem = MemoryIo::new(true);
1219
1220        // BEGIN AWARE REPORTS PATCH
1221        let disk_outputs = if self.outputs_to_filesystem {
1222            Some(FilesystemIo::new(
1223                &filesystem_root,
1224                true,
1225                false,
1226                HashSet::new(),
1227            ))
1228        } else {
1229            None
1230        };
1231        // END AWARE REPORTS PATCH
1232
1233        let bs = BridgeState {
1234            primary_input: pio,
1235            mem,
1236            filesystem,
1237            extra_search_paths,
1238            shell_escape_work: None,
1239            format_cache,
1240            bundle,
1241            genuine_stdout,
1242            format_primary: None,
1243            events: HashMap::new(),
1244            disk_outputs,
1245        };
1246
1247        // Now we can do the rest.
1248
1249        let output_path = match self.output_dest {
1250            OutputDestination::Default => Some(default_output_path),
1251            OutputDestination::Path(p) => Some(p),
1252            OutputDestination::Nowhere => None,
1253        };
1254
1255        let tex_input_name = self
1256            .tex_input_name
1257            .expect("tex_input_name must be specified");
1258        let mut aux_path = PathBuf::from(tex_input_name.clone());
1259        aux_path.set_extension("aux");
1260        let mut xdv_path = aux_path.clone();
1261        xdv_path.set_extension(if self.output_format == OutputFormat::Html {
1262            "spx"
1263        } else {
1264            "xdv"
1265        });
1266        let mut pdf_path = aux_path.clone();
1267        pdf_path.set_extension("pdf");
1268
1269        let shell_escape_mode = if !self.security.allow_shell_escape() {
1270            ShellEscapeMode::Disabled
1271        } else {
1272            match self.shell_escape_mode {
1273                ShellEscapeMode::Defaulted => {
1274                    if let Some(ref cwd) = self.unstables.shell_escape_cwd {
1275                        ShellEscapeMode::ExternallyManagedDir(cwd.into())
1276                    } else if self.unstables.shell_escape {
1277                        ShellEscapeMode::TempDir
1278                    } else {
1279                        ShellEscapeMode::Disabled
1280                    }
1281                }
1282
1283                other => other,
1284            }
1285        };
1286
1287        Ok(ProcessingSession {
1288            security: self.security,
1289            bs,
1290            pass: self.pass,
1291            primary_input_path,
1292            primary_input_tex_path: tex_input_name,
1293            format_name: self.format_name.unwrap(),
1294            tex_aux_path: aux_path.display().to_string(),
1295            tex_xdv_path: xdv_path.display().to_string(),
1296            tex_pdf_path: pdf_path.display().to_string(),
1297            output_format: self.output_format,
1298            makefile_output_path: self.makefile_output_path,
1299            output_path,
1300            tex_rerun_specification: self.reruns,
1301            keep_intermediates: self.keep_intermediates,
1302            keep_logs: self.keep_logs,
1303            synctex_enabled: self.synctex,
1304            build_date: self.build_date.unwrap_or(SystemTime::UNIX_EPOCH),
1305            unstables: self.unstables,
1306            shell_escape_mode,
1307            html_assets_spec_path: self.html_assets_spec_path,
1308            html_precomputed_assets: self.html_precomputed_assets,
1309            html_emit_files: !self.html_do_not_emit_files,
1310            html_emit_assets: !self.html_do_not_emit_assets,
1311        })
1312    }
1313}
1314
1315#[derive(Debug, Clone)]
1316enum RerunReason {
1317    Biber,
1318    Bibtex,
1319    FileChange(String),
1320}
1321
1322/// The ProcessingSession struct runs the whole show when we're actually
1323/// processing a file. It understands, for example, the need to re-run the TeX
1324/// engine if the `.aux` file changed.
1325pub struct ProcessingSession {
1326    // Security settings.
1327    security: SecuritySettings,
1328
1329    /// The subset of the session state that's can be mutated while the C/C++
1330    /// engines are running. Importantly, this includes the full I/O stack.
1331    bs: BridgeState,
1332
1333    /// If our primary input is an actual file on disk, this is its path.
1334    primary_input_path: Option<PathBuf>,
1335
1336    /// This is the name of the input that we tell TeX. It is the basename of
1337    /// the UTF8-ified version of `primary_input_path`; or something anodyne
1338    /// if the latter is None. (Name, "texput.tex").
1339    primary_input_tex_path: String,
1340
1341    /// This is the name of the format file to use. TeX has to open it by name
1342    /// internally, so it has to be String compatible.
1343    format_name: String,
1344
1345    /// These are the paths of the various output files as TeX knows them --
1346    /// just `primary_input_tex_path` with the extension changed.
1347    tex_aux_path: String,
1348    tex_xdv_path: String,
1349    tex_pdf_path: String,
1350
1351    /// If we're writing out Makefile rules, this is where they go. The TeX
1352    /// engine doesn't know about this path at all.
1353    makefile_output_path: Option<PathBuf>,
1354
1355    /// This is the path that the processed file will be saved at. It defaults
1356    /// to the path of `primary_input_path` or `.` if STDIN is used. If set to
1357    /// None, the output files will not be saved to disk — in which case, the
1358    /// caller should access the memory layer of the `io` field to gain access
1359    /// to the output files.
1360    output_path: Option<PathBuf>,
1361
1362    pass: PassSetting,
1363    output_format: OutputFormat,
1364    tex_rerun_specification: Option<usize>,
1365    keep_intermediates: bool,
1366    keep_logs: bool,
1367    synctex_enabled: bool,
1368
1369    /// See `TexEngine::with_date` and `XdvipdfmxEngine::with_date`.
1370    build_date: SystemTime,
1371
1372    unstables: UnstableOptions,
1373
1374    /// How to handle shell-escape. The `Defaulted` option will never
1375    /// be used here.
1376    shell_escape_mode: ShellEscapeMode,
1377
1378    html_assets_spec_path: Option<String>,
1379    html_precomputed_assets: Option<AssetSpecification>,
1380    html_emit_files: bool,
1381    html_emit_assets: bool,
1382}
1383
1384const DEFAULT_MAX_TEX_PASSES: usize = 6;
1385const ALWAYS_INTERMEDIATE_EXTENSIONS: &[&str] = &[
1386    ".snm", ".toc", // generated by Beamer
1387];
1388
1389impl ProcessingSession {
1390    /// Assess whether we need to rerun an engine. This is the case if there
1391    /// was a file that the engine read and then rewrote, and the rewritten
1392    /// version is different than the version that it read in.
1393    fn is_rerun_needed(&self, status: &mut dyn StatusBackend) -> Option<RerunReason> {
1394        // TODO: we should probably wire up diagnostics since I expect this
1395        // stuff could get finicky and we're going to want to be able to
1396        // figure out why rerun detection is breaking.
1397
1398        for (name, info) in &self.bs.events {
1399            if info.access_pattern == AccessPattern::ReadThenWritten {
1400                let file_changed = match (&info.read_digest, &info.write_digest) {
1401                    (Some(d1), Some(d2)) => d1 != d2,
1402                    (&None, &Some(_)) => true,
1403                    (_, _) => {
1404                        // Other cases shouldn't happen.
1405                        tt_warning!(
1406                            status,
1407                            "internal consistency problem when checking if {} changed",
1408                            name
1409                        );
1410                        true
1411                    }
1412                };
1413
1414                if file_changed {
1415                    return Some(RerunReason::FileChange(name.clone()));
1416                }
1417            }
1418        }
1419
1420        None
1421    }
1422
1423    #[allow(dead_code)]
1424    fn _dump_access_info(&self, status: &mut dyn StatusBackend) {
1425        for (name, info) in &self.bs.events {
1426            if info.access_pattern != AccessPattern::Read {
1427                let r = match info.read_digest {
1428                    Some(ref d) => d.to_string(),
1429                    None => "-".into(),
1430                };
1431                let w = match info.write_digest {
1432                    Some(ref d) => d.to_string(),
1433                    None => "-".into(),
1434                };
1435                tt_note!(
1436                    status,
1437                    "ACCESS: {} {:?} {:?} {:?}",
1438                    name,
1439                    info.access_pattern,
1440                    r,
1441                    w
1442                );
1443            }
1444        }
1445    }
1446
1447    /// Runs the session, generating the desired outputs.
1448    ///
1449    /// What this does depends on which [`PassSetting`] you asked for. The most common choice is
1450    /// `PassSetting::Default`, in which case this method does the following:
1451    ///
1452    /// - if a `.fmt` file does not yet exist, generate one and cache it
1453    /// - run the TeX engine once
1454    /// - run BibTeX, if it seems to be required
1455    /// - repeat the last two steps as often as needed
1456    /// - write the output files to disk, including a Makefile if it was requested.
1457    pub fn run(&mut self, status: &mut dyn StatusBackend) -> Result<()> {
1458        // Pre-invocation setup that requires cleanup even if the processing errors out.
1459
1460        let (shell_escape_work, clean_up_shell_escape) = match self.shell_escape_mode {
1461            ShellEscapeMode::Disabled => (None, false),
1462
1463            ShellEscapeMode::ExternallyManagedDir(ref p) => (
1464                Some(FilesystemIo::new(p, false, false, HashSet::new())),
1465                false,
1466            ),
1467
1468            ShellEscapeMode::TempDir => {
1469                let tempdir = ctry!(tempfile::Builder::new().tempdir(); "can't create temporary directory for shell-escape work");
1470                (
1471                    Some(FilesystemIo::new(
1472                        &tempdir.keep(),
1473                        false,
1474                        false,
1475                        HashSet::new(),
1476                    )),
1477                    true,
1478                )
1479            }
1480
1481            ShellEscapeMode::Defaulted => unreachable!(),
1482        };
1483
1484        self.bs.shell_escape_work = shell_escape_work;
1485
1486        // Go-time!
1487        let result = self.run_inner(status);
1488
1489        // Do that cleanup.
1490
1491        if clean_up_shell_escape {
1492            let shell_escape_work = self.bs.shell_escape_work.take().unwrap();
1493            let shell_escape_err = std::fs::remove_dir_all(shell_escape_work.root());
1494
1495            if let Err(e) = shell_escape_err {
1496                tt_warning!(status, "an error occurred while cleaning up the \
1497                    shell-escape temporary directory `{}`", shell_escape_work.root().display(); e.into());
1498            }
1499        }
1500
1501        // Propagate the actual result.
1502        result
1503    }
1504
1505    /// The bulk of the `run` implementation. We need to wrap it to manage the
1506    /// lifecycle of resources like the shell-escape temporary directory, if
1507    /// needed.
1508    fn run_inner(&mut self, status: &mut dyn StatusBackend) -> Result<()> {
1509        // Do we need to generate the format file?
1510
1511        let generate_format = if self.output_format == OutputFormat::Format {
1512            false
1513        } else {
1514            match self.bs.input_open_format(&self.format_name, status) {
1515                OpenResult::Ok(_) => false,
1516                OpenResult::NotAvailable => true,
1517                OpenResult::Err(e) => {
1518                    return Err(e)
1519                        .chain_err(|| format!("could not open format file {}", self.format_name));
1520                }
1521            }
1522        };
1523
1524        if generate_format {
1525            tt_note!(status, "generating format \"{}\"", self.format_name);
1526            self.make_format_pass(status)?;
1527        }
1528
1529        // Do the meat of the work.
1530
1531        let result = match self.pass {
1532            PassSetting::Tex => match self.tex_pass(None, status) {
1533                Ok(Some(warnings)) => {
1534                    tt_warning!(status, "{}", warnings);
1535                    Ok(0)
1536                }
1537                Ok(None) => Ok(0),
1538                Err(e) => Err(e),
1539            },
1540            PassSetting::Default => self.default_pass(false, status),
1541            PassSetting::BibtexFirst => self.default_pass(true, status),
1542        };
1543
1544        if let Err(e) = result {
1545            self.write_files(None, status, true)?;
1546            return Err(e);
1547        };
1548
1549        // Write output files and the first line of our Makefile output.
1550
1551        let mut mf_dest_maybe = match self.makefile_output_path {
1552            Some(ref p) => {
1553                if self.output_path.is_none() {
1554                    tt_warning!(
1555                        status,
1556                        "requested to generate Makefile rules, but no files written to disk!"
1557                    );
1558                    None
1559                } else {
1560                    Some(File::create(p)?)
1561                }
1562            }
1563
1564            None => None,
1565        };
1566
1567        let n_skipped_intermediates = self.write_files(mf_dest_maybe.as_mut(), status, false)?;
1568
1569        if n_skipped_intermediates > 0 {
1570            status.note_highlighted(
1571                "Skipped writing ",
1572                &format!("{n_skipped_intermediates}"),
1573                " intermediate files (use --keep-intermediates to keep them)",
1574            );
1575        }
1576
1577        // Finish Makefile rules, maybe.
1578
1579        if let Some(ref mut mf_dest) = mf_dest_maybe {
1580            ctry!(write!(mf_dest, ": "); "couldn't write to Makefile-rules file");
1581
1582            if let Some(ref pip) = self.primary_input_path {
1583                let opip = ctry!(pip.to_str(); "Makefile-rules file path must be Unicode-able");
1584                ctry!(mf_dest.write_all(opip.as_bytes()); "couldn't write to Makefile-rules file");
1585            }
1586
1587            // The check above ensures that this is never None.
1588            let root = self.output_path.as_ref().unwrap();
1589
1590            for (name, info) in &self.bs.events {
1591                if info.input_origin != InputOrigin::Filesystem {
1592                    continue;
1593                }
1594
1595                if info.got_written_to_disk {
1596                    // If the file originally came from the filesystem, and it
1597                    // was written as well as read, and we actually wrote it
1598                    // to disk, there's a circular dependency that's
1599                    // inappropriate to express in a Makefile. If it was
1600                    // "written" by the engine but we didn't actually write
1601                    // those modifications to disk, we're OK. If there's a
1602                    // two-stage compilation involving the .aux file, the
1603                    // latter case is what arises unless --keep-intermediates
1604                    // is specified.
1605                    tt_warning!(status, "omitting circular Makefile dependency for {}", name);
1606                    continue;
1607                }
1608
1609                ctry!(write!(mf_dest, " \\\n  {}", root.join(name).display()); "couldn't write to Makefile-rules file");
1610            }
1611
1612            ctry!(writeln!(mf_dest, ""); "couldn't write to Makefile-rules file");
1613        }
1614
1615        // All done.
1616
1617        Ok(())
1618    }
1619
1620    fn write_files(
1621        &mut self,
1622        mut mf_dest_maybe: Option<&mut File>,
1623        status: &mut dyn StatusBackend,
1624        only_logs: bool,
1625    ) -> Result<u32> {
1626        let root = match self.output_path {
1627            Some(ref p) => p,
1628
1629            None => {
1630                // We were told not to write anything!
1631                return Ok(0);
1632            }
1633        };
1634
1635        let mut n_skipped_intermediates = 0;
1636
1637        for (name, file) in &*self.bs.mem.files.borrow() {
1638            if name == self.bs.mem.stdout_key() {
1639                continue;
1640            }
1641
1642            let sname = name;
1643            let summ = self.bs.events.get_mut(name).unwrap();
1644
1645            if !only_logs && (self.output_format == OutputFormat::Aux) {
1646                // In this mode we're only writing the .aux file. I initially
1647                // wanted to be clever-ish and output all auxiliary-type
1648                // files, but doing so ended up causing non-obvious problems
1649                // for my use case, which involves using Ninja to manage
1650                // dependencies.
1651                if !sname.ends_with(".aux") {
1652                    continue;
1653                }
1654            } else if !self.keep_intermediates
1655                && (summ.access_pattern != AccessPattern::Written
1656                    || ALWAYS_INTERMEDIATE_EXTENSIONS
1657                        .iter()
1658                        .any(|ext| sname.ends_with(ext)))
1659            {
1660                n_skipped_intermediates += 1;
1661                continue;
1662            }
1663
1664            let is_logfile = sname.ends_with(".log") || sname.ends_with(".blg");
1665
1666            if is_logfile && !self.keep_logs {
1667                continue;
1668            }
1669
1670            if !is_logfile && only_logs {
1671                continue;
1672            }
1673
1674            if file.data.is_empty() {
1675                status.note_highlighted(
1676                    "Not writing ",
1677                    &format!("`{sname}`"),
1678                    ": it would be empty.",
1679                );
1680                continue;
1681            }
1682
1683            let real_path = root.join(name);
1684            let byte_len = Byte::from_u128(file.data.len() as u128).unwrap();
1685            status.note_highlighted(
1686                "Writing ",
1687                &format!("`{}`", real_path.display()),
1688                &format!(" ({})", byte_len.get_appropriate_unit(UnitType::Binary)),
1689            );
1690
1691            if let Some(parent) = real_path.parent() {
1692                std::fs::create_dir_all(parent)?;
1693            }
1694
1695            let mut f = File::create(&real_path)?;
1696            f.write_all(&file.data)?;
1697            summ.got_written_to_disk = true;
1698
1699            if let Some(ref mut mf_dest) = mf_dest_maybe {
1700                // Maybe it'd be better to have this just be a warning? But if
1701                // the program is supposed to write the file, you don't want
1702                // it exiting with error code zero if it couldn't do that
1703                // successfully.
1704                //
1705                // Not quite sure why, but I can't pull out the target path
1706                // here. I think 'self' is borrow inside the loop?
1707                ctry!(write!(mf_dest, "{} ", real_path.display()); "couldn't write to Makefile-rules file");
1708            }
1709        }
1710
1711        Ok(n_skipped_intermediates)
1712    }
1713
1714    /// The "default" pass really runs a bunch of sub-passes. It is a "Do What
1715    /// I Mean" operation.
1716    fn default_pass(&mut self, bibtex_first: bool, status: &mut dyn StatusBackend) -> Result<i32> {
1717        // If `bibtex_first` is true, we start by running bibtex, and run
1718        // proceed with the standard rerun logic. Otherwise, we run TeX,
1719        // auto-detect whether we need to run bibtex, possibly run it, and
1720        // then go ahead.
1721
1722        let mut warnings = None;
1723        let mut rerun_result = if bibtex_first {
1724            self.bibtex_pass(status)?;
1725            Some(RerunReason::Bibtex)
1726        } else {
1727            warnings = self.tex_pass(None, status)?;
1728            let maybe_biber = self.check_biber_requirement(status)?;
1729
1730            if let Some(biber) = maybe_biber {
1731                self.bs.external_tool_pass(&biber, status)?;
1732                Some(RerunReason::Biber)
1733            } else if self.is_bibtex_needed() {
1734                self.bibtex_pass(status)?;
1735                Some(RerunReason::Bibtex)
1736            } else {
1737                self.is_rerun_needed(status)
1738            }
1739        };
1740
1741        // Now we enter the main rerun loop.
1742
1743        let (pass_count, reruns_fixed) = match self.tex_rerun_specification {
1744            Some(n) => (n, true),
1745            None => (DEFAULT_MAX_TEX_PASSES, false),
1746        };
1747
1748        for i in 0..pass_count {
1749            let rerun_explanation = if reruns_fixed {
1750                "I was told to".to_owned()
1751            } else {
1752                match rerun_result {
1753                    Some(RerunReason::Biber) => "biber was run".to_owned(),
1754                    Some(RerunReason::Bibtex) => "bibtex was run".to_owned(),
1755                    Some(RerunReason::FileChange(ref s)) => format!("\"{s}\" changed"),
1756                    None => break,
1757                }
1758            };
1759
1760            // We're restarting the engine afresh, so clear the read inputs.
1761            // We do *not* clear the entire HashMap since we want to remember,
1762            // e.g., that bibtex wrote out the .bbl file, since that way we
1763            // can later know that it's OK to delete. I am not super confident
1764            // that the access_pattern data can just be left as-is when we do
1765            // this, but, uh, so far it seems to work.
1766            for summ in self.bs.events.values_mut() {
1767                summ.read_digest = None;
1768            }
1769
1770            warnings = self.tex_pass(Some(&rerun_explanation), status)?;
1771
1772            if !reruns_fixed {
1773                rerun_result = self.is_rerun_needed(status);
1774
1775                if rerun_result.is_some() && i == DEFAULT_MAX_TEX_PASSES - 1 {
1776                    tt_warning!(
1777                        status,
1778                        "TeX rerun seems needed, but stopping at {} passes",
1779                        DEFAULT_MAX_TEX_PASSES
1780                    );
1781                    break;
1782                }
1783            }
1784        }
1785
1786        // The last tex pass generated warnings.
1787        if let Some(warnings) = warnings {
1788            tt_warning!(status, "{}", warnings);
1789        }
1790
1791        // And finally, xdvipdfmx or spx2html. Maybe.
1792
1793        if let OutputFormat::Pdf = self.output_format {
1794            self.xdvipdfmx_pass(status)?;
1795        } else if let OutputFormat::Html = self.output_format {
1796            self.spx2html_pass(status)?;
1797        }
1798
1799        Ok(0)
1800    }
1801
1802    fn is_bibtex_needed(&self) -> bool {
1803        const BIBDATA: &[u8] = b"\\bibdata";
1804
1805        self.bs
1806            .mem
1807            .files
1808            .borrow()
1809            .get(&self.tex_aux_path)
1810            .map(|file| {
1811                // We used to use aho-corasick crate here, but it was removed to reduce the code
1812                // size.
1813                file.data.windows(BIBDATA.len()).any(|s| s == BIBDATA)
1814            })
1815            .unwrap_or(false)
1816    }
1817
1818    /// Use the TeX engine to generate a format file.
1819    #[allow(clippy::manual_split_once)] // requires Rust 1.52 (note that we don't actually define our MSRV)
1820    fn make_format_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
1821        // PathBuf.file_stem() doesn't do what we want since it only strips
1822        // one extension. As of 1.17, the compiler needs a type annotation for
1823        // some reason, which is why we use the `r` variable.
1824        let r: Result<&str> = self.format_name.split('.').next().ok_or_else(|| {
1825            ErrorKind::Msg(format!(
1826                "incomprehensible format file name \"{}\"",
1827                self.format_name
1828            ))
1829            .into()
1830        });
1831        let stem = r?;
1832
1833        let result = {
1834            self.bs
1835                .enter_format_mode(&format!("tectonic-format-{stem}.tex"));
1836            let mut launcher =
1837                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
1838            let r = TexEngine::default()
1839                .halt_on_error_mode(true)
1840                .initex_mode(true)
1841                .shell_escape(self.shell_escape_mode != ShellEscapeMode::Disabled)
1842                .process(&mut launcher, "UNUSED.fmt", "texput");
1843            self.bs.leave_format_mode();
1844            r
1845        };
1846
1847        match result {
1848            Ok(TexOutcome::Spotless) => {}
1849            Ok(TexOutcome::Warnings) => {
1850                tt_warning!(status, "warnings were issued by the TeX engine; use --print and/or --keep-logs for details.");
1851            }
1852            Ok(TexOutcome::Errors) => {
1853                tt_error!(status, "errors were issued by the TeX engine; use --print and/or --keep-logs for details.");
1854                return Err(ErrorKind::Msg("unhandled TeX engine error".to_owned()).into());
1855            }
1856            Err(e) => {
1857                return Err(e.into());
1858            }
1859        }
1860
1861        // Now we can write the format file to its special location. In
1862        // principle we could stream the format file directly to the staging
1863        // area as we ran the TeX engine, but we don't bother.
1864
1865        for (name, file) in &*self.bs.mem.files.borrow() {
1866            if name == self.bs.mem.stdout_key() {
1867                continue;
1868            }
1869
1870            let sname = name;
1871
1872            if !sname.ends_with(".fmt") {
1873                continue;
1874            }
1875
1876            // Note that we intentionally pass 'stem', not 'name'.
1877            ctry!(self.bs.format_cache.write_format(stem, &file.data, status); "cannot write format file {}", sname);
1878        }
1879
1880        // All done. Clear the memory layer since this was a special preparatory step.
1881        self.bs.mem.files.borrow_mut().clear();
1882
1883        Ok(0)
1884    }
1885
1886    /// Run one pass of the TeX engine.
1887    fn tex_pass(
1888        &mut self,
1889        rerun_explanation: Option<&str>,
1890        status: &mut dyn StatusBackend,
1891    ) -> Result<Option<&'static str>> {
1892        let result = {
1893            if let Some(s) = rerun_explanation {
1894                status.note_highlighted("Rerunning ", "TeX", &format!(" because {s} ..."));
1895            } else {
1896                status.note_highlighted("Running ", "TeX", " ...");
1897            }
1898
1899            let mut launcher =
1900                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
1901
1902            // In deterministic mode, we stub a few aspects of the environment.
1903            // They default to a "realistic" view, but we override them with static values:
1904            if self.unstables.deterministic_mode {
1905                launcher.with_expose_absolute_paths(false);
1906                launcher.with_mtime_override(Some(
1907                    self.build_date
1908                        .duration_since(SystemTime::UNIX_EPOCH)
1909                        .map(|x| x.as_secs() as i64)
1910                        .expect("invalid build date in deterministic mode"),
1911                ));
1912            }
1913
1914            TexEngine::default()
1915                .halt_on_error_mode(!self.unstables.continue_on_errors)
1916                .initex_mode(self.output_format == OutputFormat::Format)
1917                .synctex(self.synctex_enabled)
1918                .semantic_pagination(self.output_format == OutputFormat::Html)
1919                .shell_escape(self.shell_escape_mode != ShellEscapeMode::Disabled)
1920                .build_date(self.build_date)
1921                .process(
1922                    &mut launcher,
1923                    &self.format_name,
1924                    &self.primary_input_tex_path,
1925                )
1926        };
1927
1928        let warnings = match result {
1929            Ok(TexOutcome::Spotless) => None,
1930            Ok(TexOutcome::Warnings) =>
1931                    Some("warnings were issued by the TeX engine; use --print and/or --keep-logs for details."),
1932            Ok(TexOutcome::Errors) =>
1933                    Some("errors were issued by the TeX engine, but were ignored; \
1934                         use --print and/or --keep-logs for details."),
1935            Err(e) =>
1936                return Err(e.into()),
1937        };
1938
1939        if !self.bs.mem.files.borrow().contains_key(&self.tex_xdv_path) {
1940            // TeX did not produce the expected output file
1941            tt_warning!(
1942                status,
1943                "did not produce \"{}\"; this may mean that your document is empty",
1944                self.tex_xdv_path
1945            )
1946        }
1947
1948        Ok(warnings)
1949    }
1950
1951    // Run Bibtex process for one .aux file.
1952    fn bibtex_pass_for_one_aux_file(
1953        &mut self,
1954        status: &mut dyn StatusBackend,
1955        aux_file: &String,
1956    ) -> Result<i32> {
1957        let result = {
1958            status.note_highlighted("Running ", "BibTeX", &format!(" on {aux_file} ..."));
1959            let mut launcher =
1960                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
1961            let mut engine = BibtexEngine::new();
1962            engine.process(&mut launcher, aux_file, &self.unstables)
1963        };
1964
1965        match result {
1966            Ok(TexOutcome::Spotless) => {}
1967            Ok(TexOutcome::Warnings) => {
1968                tt_note!(
1969                    status,
1970                    "warnings were issued by BibTeX; use --print and/or --keep-logs for details."
1971                );
1972            }
1973            Ok(TexOutcome::Errors) => {
1974                tt_warning!(
1975                    status,
1976                    "errors were issued by BibTeX, but were ignored; \
1977                     use --print and/or --keep-logs for details."
1978                );
1979            }
1980            Err(e) => {
1981                return Err(e.chain_err(|| ErrorKind::EngineError("BibTeX")));
1982            }
1983        }
1984
1985        Ok(0)
1986    }
1987
1988    fn bibtex_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
1989        let mut aux_files = vec![self.tex_aux_path.clone()];
1990
1991        // find other .aux files generated by tex_pass
1992        for f in self.bs.get_intermediate_file_names() {
1993            if f.ends_with(".aux") && f != self.tex_aux_path {
1994                aux_files.push(f);
1995            }
1996        }
1997
1998        for f in aux_files {
1999            let _r = self.bibtex_pass_for_one_aux_file(status, &f)?;
2000        }
2001
2002        Ok(0)
2003    }
2004
2005    fn xdvipdfmx_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
2006        {
2007            status.note_highlighted("Running ", "xdvipdfmx", " ...");
2008
2009            let mut launcher =
2010                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
2011            let mut engine = XdvipdfmxEngine::default();
2012
2013            engine.build_date(self.build_date);
2014
2015            if let Some(ref ps) = self.unstables.paper_size {
2016                engine.paper_spec(ps.clone());
2017            }
2018
2019            engine.process(&mut launcher, &self.tex_xdv_path, &self.tex_pdf_path)?;
2020        }
2021
2022        self.bs.mem.files.borrow_mut().remove(&self.tex_xdv_path);
2023        Ok(0)
2024    }
2025
2026    fn spx2html_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
2027        {
2028            let mut engine = Spx2HtmlEngine::default();
2029
2030            match (self.html_emit_files, self.output_path.as_ref()) {
2031                (true, Some(p)) => engine.output_base(p),
2032                (false, _) => engine.do_not_emit_files(),
2033                (true, None) => return Err(errmsg!("HTML output must be saved directly to disk")),
2034            };
2035
2036            if let Some(p) = self.html_assets_spec_path.as_ref() {
2037                engine.assets_spec_path(p);
2038            } else if !self.html_emit_assets {
2039                engine.do_not_emit_assets();
2040            }
2041
2042            if let Some(a) = self.html_precomputed_assets.as_ref() {
2043                engine.precomputed_assets(a.clone());
2044            }
2045
2046            status.note_highlighted("Running ", "spx2html", " ...");
2047            engine.process_to_filesystem(&mut self.bs, status, &self.tex_xdv_path)?;
2048        }
2049
2050        self.bs.mem.files.borrow_mut().remove(&self.tex_xdv_path);
2051        Ok(0)
2052    }
2053
2054    /// Get what was printed to standard output, if anything.
2055    pub fn get_stdout_content(&self) -> Vec<u8> {
2056        self.bs
2057            .mem
2058            .files
2059            .borrow()
2060            .get(self.bs.mem.stdout_key())
2061            .map(|mfi| mfi.data.clone())
2062            .unwrap_or_default()
2063    }
2064
2065    /// Consume this session and return the current set of files in memory.
2066    ///
2067    /// This convenience function tries to help with the annoyances of getting
2068    /// access to the in-memory file data after the engine has been run.
2069    pub fn into_file_data(self) -> MemoryFileCollection {
2070        Rc::try_unwrap(self.bs.mem.files)
2071            .expect("multiple strong refs to MemoryIo files")
2072            .into_inner()
2073    }
2074
2075    /// See if we need to run `biber`, and parse the `.run.xml` file from the
2076    /// `loqreq` package to figure out what files `biber` needs. This
2077    /// functionality should probably become more generic, but I don't have a
2078    /// great sense as to how widely-used `logreq` is.
2079    fn check_biber_requirement(
2080        &self,
2081        status: &mut dyn StatusBackend,
2082    ) -> Result<Option<ExternalToolPass>> {
2083        // Is there a `.run.xml` file?
2084
2085        let mut run_xml_path = PathBuf::from(&self.primary_input_tex_path);
2086        run_xml_path.set_extension("run.xml");
2087        let run_xml_path = run_xml_path.display().to_string();
2088
2089        let mem_files = &*self.bs.mem.files.borrow();
2090        let run_xml_entry = match mem_files.get(&run_xml_path) {
2091            Some(e) => e,
2092            None => return Ok(None),
2093        };
2094
2095        // Yes, there is. Set up to potentially run biber. For testing support,
2096        // we let the rig specify a custom executable to use, which lets us
2097        // exercise different pieces of the external-tool behavior.
2098
2099        let s = (
2100            crate::config::is_config_test_mode_activated(),
2101            std::env::var("TECTONIC_TEST_FAKE_BIBER"),
2102        );
2103
2104        let mut argv = match s {
2105            (true, Ok(text)) if !text.trim().is_empty() => {
2106                text.split_whitespace().map(|x| x.to_owned()).collect()
2107            }
2108            // when `TECTONIC_TEST_FAKE_BIBER` is empty, proceed to discover
2109            // the biber binary as follows.
2110            _ => vec!["biber".to_owned()],
2111        };
2112
2113        // Moreover, we allow an override of the biber executable, to cope with
2114        // possible version mismatch of the bundled biblatex package, as filed
2115        // in issue #893. Since PR #1103, the `tectonic-biber` override can
2116        // also be invoked with `tectonic -X biber`.
2117        let find_by = |binary_name: &str| -> Option<String> {
2118            if let Ok(pathbuf) = which(binary_name) {
2119                if let Some(biber_path) = pathbuf.to_str() {
2120                    return Some(biber_path.to_owned());
2121                }
2122            }
2123            None
2124        };
2125
2126        let mut use_tectonic_biber_override = false;
2127        for binary_name in ["./tectonic-biber", "tectonic-biber"] {
2128            if let Some(biber_path) = find_by(binary_name) {
2129                argv = vec![biber_path];
2130                use_tectonic_biber_override = true;
2131                break;
2132            }
2133        }
2134
2135        let mut extra_requires = HashSet::new();
2136
2137        // Do a sketchy XML parse to see if there's info about a biber
2138        // invocation.
2139
2140        #[derive(Clone, Copy, Debug, Eq, PartialEq)]
2141        enum State {
2142            /// Searching for the biber section
2143            Searching,
2144
2145            /// In a <binary> element. Will its value be "biber"??!?
2146            InBinaryName,
2147
2148            /// In the <cmdline> part of the biber section.
2149            InBiberCmdline,
2150
2151            /// About to read an argument to the biber command.
2152            InBiberArgument,
2153
2154            /// Reading through the post-cmdline part of the biber section.
2155            InBiberRemainder,
2156
2157            /// In a "requirement" section like <input> or <requires> that contains
2158            /// filenames we should provide
2159            InBiberRequirementSection,
2160
2161            /// In a <file> requirement
2162            InBiberFileRequirement,
2163        }
2164
2165        let curs = Cursor::new(&run_xml_entry.data[..]);
2166        let mut reader = NsReader::from_reader(curs);
2167        let mut buf = Vec::new();
2168        let mut state = State::Searching;
2169
2170        loop {
2171            let event = ctry!(
2172                reader.read_event_into(&mut buf);
2173                "error parsing run.xml file"
2174            );
2175
2176            if let Event::Eof = event {
2177                break;
2178            }
2179
2180            match (state, event) {
2181                (State::Searching, Event::Start(ref e)) => {
2182                    let name = reader
2183                        .decoder()
2184                        .decode(e.local_name().into_inner())
2185                        .map_err(quick_xml::Error::from)?;
2186
2187                    if name == "binary" {
2188                        state = State::InBinaryName;
2189                    }
2190                }
2191
2192                (State::InBinaryName, Event::Text(ref e)) => {
2193                    let text = e.unescape()?;
2194
2195                    state = if &text == "biber" {
2196                        State::InBiberCmdline
2197                    } else {
2198                        State::Searching
2199                    };
2200                }
2201
2202                (State::InBinaryName, _) => {
2203                    state = State::Searching;
2204                }
2205
2206                (State::InBiberCmdline, Event::Start(ref e)) => {
2207                    let name = reader
2208                        .decoder()
2209                        .decode(e.local_name().into_inner())
2210                        .map_err(quick_xml::Error::from)?;
2211
2212                    // Note that the "infile" might be `foo` without the `.bcf`
2213                    // extension, so we can't use it for file-finding.
2214                    state = match &*name {
2215                        "infile" | "outfile" | "option" => State::InBiberArgument,
2216                        _ => State::InBiberRemainder,
2217                    }
2218                }
2219
2220                (State::InBiberCmdline, Event::End(ref e)) => {
2221                    let name = reader
2222                        .decoder()
2223                        .decode(e.local_name().into_inner())
2224                        .map_err(quick_xml::Error::from)?;
2225
2226                    if name == "cmdline" {
2227                        state = State::InBiberRemainder;
2228                    }
2229                }
2230
2231                (State::InBiberArgument, Event::Text(ref e)) => {
2232                    argv.push(e.unescape()?.to_string());
2233                    state = State::InBiberCmdline;
2234                }
2235
2236                (State::InBiberRemainder, Event::Start(ref e)) => {
2237                    let name = reader
2238                        .decoder()
2239                        .decode(e.local_name().into_inner())
2240                        .map_err(quick_xml::Error::from)?;
2241
2242                    state = match &*name {
2243                        "input" | "requires" => State::InBiberRequirementSection,
2244                        _ => State::InBiberRemainder,
2245                    }
2246                }
2247
2248                (State::InBiberRemainder, Event::End(ref e)) => {
2249                    let name = reader
2250                        .decoder()
2251                        .decode(e.local_name().into_inner())
2252                        .map_err(quick_xml::Error::from)?;
2253
2254                    if name == "external" {
2255                        break;
2256                    }
2257                }
2258
2259                (State::InBiberRequirementSection, Event::Start(ref e)) => {
2260                    let name = reader
2261                        .decoder()
2262                        .decode(e.local_name().into_inner())
2263                        .map_err(quick_xml::Error::from)?;
2264
2265                    state = match &*name {
2266                        "file" => State::InBiberFileRequirement,
2267                        _ => State::InBiberRemainder,
2268                    }
2269                }
2270
2271                (State::InBiberRequirementSection, Event::End(ref e)) => {
2272                    let name = reader
2273                        .decoder()
2274                        .decode(e.local_name().into_inner())
2275                        .map_err(quick_xml::Error::from)?;
2276
2277                    if name == "input" || name == "requires" {
2278                        state = State::InBiberRemainder;
2279                    }
2280                }
2281
2282                (State::InBiberFileRequirement, Event::Text(ref e)) => {
2283                    extra_requires.insert(e.unescape()?.to_string());
2284                    state = State::InBiberRequirementSection;
2285                }
2286
2287                (State::InBiberFileRequirement, _) => {
2288                    state = State::InBiberRequirementSection;
2289                }
2290
2291                _ => {}
2292            }
2293        }
2294
2295        // All done!
2296
2297        Ok(if state == State::Searching {
2298            // No biber invocation, in the end.
2299            None
2300        } else {
2301            if use_tectonic_biber_override {
2302                tt_note!(status, "using `tectonic-biber`, found at {}", argv[0]);
2303            }
2304            Some(ExternalToolPass {
2305                argv,
2306                extra_requires,
2307            })
2308        })
2309    }
2310}