Skip to main content

tectonic/
driver.rs

1// Copyright 2018-2022 the Tectonic Project
2// Licensed under the MIT License.
3
4//! The high-level Tectonic document processing interface.
5//!
6//! The main struct in this module is [`ProcessingSession`], which knows how to
7//! run (and re-run if necessary) the various engines in the right order. Such a
8//! session can be created with a [`ProcessingSessionBuilder`], which you might
9//! obtain from a [`tectonic_docmodel::document::Document`] using the
10//! [`crate::docmodel::DocumentExt::setup_session`] extension method, if you’re
11//! using the Tectonic document model. You can set one up manually if not.
12//!
13//! For an example of how to use this module, see `src/bin/tectonic/main.rs`,
14//! which contains tectonic's main CLI program.
15
16use byte_unit::{Byte, UnitType};
17use quick_xml::{events::Event, NsReader};
18use std::{
19    collections::{HashMap, HashSet},
20    fs::File,
21    io::{Cursor, Read, Write},
22    path::{Path, PathBuf},
23    process::Command,
24    rc::Rc,
25    result::Result as StdResult,
26    str::FromStr,
27    time::{Duration, SystemTime},
28};
29use tectonic_bridge_core::{CoreBridgeLauncher, DriverHooks, SecuritySettings, SystemRequestError};
30use tectonic_bundles::Bundle;
31use tectonic_engine_spx2html::AssetSpecification;
32use tectonic_io_base::{
33    digest::DigestData,
34    filesystem::{FilesystemIo, FilesystemPrimaryInputIo},
35    stdstreams::{BufferedPrimaryIo, GenuineStdoutIo},
36    InputHandle, IoProvider, OpenResult, OutputHandle,
37};
38use which::which;
39
40use crate::{
41    ctry, errmsg,
42    errors::{ChainErrCompatExt, ErrorKind, Result},
43    io::{
44        format_cache::FormatCache,
45        memory::{MemoryFileCollection, MemoryIo},
46        InputOrigin,
47    },
48    status::StatusBackend,
49    tt_error, tt_note, tt_warning,
50    unstable_opts::UnstableOptions,
51    BibtexEngine, Spx2HtmlEngine, TexEngine, TexOutcome, XdvipdfmxEngine,
52};
53
54/// Different patterns with which files may have been accessed by the
55/// underlying engines. Once a file is marked as ReadThenWritten or
56/// WrittenThenRead, its pattern does not evolve further.
57#[derive(Clone, Copy, Debug, Eq, PartialEq)]
58enum AccessPattern {
59    /// This file is only ever read.
60    Read,
61
62    /// This file is only ever written. This suggests that it is
63    /// a final output of the processing session.
64    Written,
65
66    /// This file is read, then written. We call this a "circular" access
67    /// pattern. Multiple passes of an engine will result in outputs that
68    /// change if this file's contents change, or if the file did not exist at
69    /// the time of the first pass.
70    ReadThenWritten,
71
72    /// This file is written, then read. We call this a "temporary" access
73    /// pattern. This file is likely a temporary buffer that is not of
74    /// interest to the user.
75    WrittenThenRead,
76}
77
78/// A summary of the I/O that happened on a file. We record its access
79/// pattern; where it came from, if it was used as an input; the cryptographic
80/// digest of the file when it was last read; and the cryptographic digest of
81/// the file as it was last written.
82#[derive(Clone, Debug, Eq, PartialEq)]
83struct FileSummary {
84    access_pattern: AccessPattern,
85
86    /// If this file was read, where did it come from?
87    pub input_origin: InputOrigin,
88
89    /// If this file was read, this is the digest of its contents at the time it was *first* read.
90    /// The "first" is significant for files that were read and then written (for example, `.aux`
91    /// files).
92    ///
93    /// There's some chance that this will be `None` even if the file was read. Tectonic makes an
94    /// effort to compute the digest as the data is being read from the file, but this can fail if
95    /// tex decides to seek in the file as it is being written.
96    pub read_digest: Option<DigestData>,
97
98    /// If this file was written, this is the digest of its contents at the time it was last
99    /// written.
100    pub write_digest: Option<DigestData>,
101
102    got_written_to_disk: bool,
103}
104
105impl FileSummary {
106    fn new(access_pattern: AccessPattern, input_origin: InputOrigin) -> FileSummary {
107        FileSummary {
108            access_pattern,
109            input_origin,
110            read_digest: None,
111            write_digest: None,
112            got_written_to_disk: false,
113        }
114    }
115}
116
117/// The different types of output files that tectonic knows how to produce.
118#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
119pub enum OutputFormat {
120    /// A '.aux' file.
121    Aux,
122    /// A '.html' file.
123    Html,
124    /// An extended DVI file.
125    Xdv,
126    /// A '.pdf' file.
127    #[default]
128    Pdf,
129    /// A '.fmt' file, for initializing the TeX engine.
130    Format,
131}
132
133impl FromStr for OutputFormat {
134    type Err = &'static str;
135
136    fn from_str(a_str: &str) -> StdResult<Self, Self::Err> {
137        match a_str {
138            "aux" => Ok(OutputFormat::Aux),
139            "html" => Ok(OutputFormat::Html),
140            "xdv" => Ok(OutputFormat::Xdv),
141            "pdf" => Ok(OutputFormat::Pdf),
142            "fmt" => Ok(OutputFormat::Format),
143            _ => Err("unsupported or unknown format"),
144        }
145    }
146}
147
148/// The different types of "passes" that [`ProcessingSession`] knows how to run. See
149/// [`ProcessingSession::run`] for more details.
150#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
151pub enum PassSetting {
152    /// The default pass, which repeatedly runs TeX and BibTeX until it doesn't need to any more.
153    #[default]
154    Default,
155    /// Just run the TeX engine once.
156    Tex,
157    /// Like the default pass, but runs BibTeX once first, before doing anything else.
158    BibtexFirst,
159}
160
161impl FromStr for PassSetting {
162    type Err = &'static str;
163
164    fn from_str(a_str: &str) -> StdResult<Self, Self::Err> {
165        match a_str {
166            "default" => Ok(PassSetting::Default),
167            "bibtex_first" => Ok(PassSetting::BibtexFirst),
168            "tex" => Ok(PassSetting::Tex),
169            _ => Err("unsupported or unknown pass setting"),
170        }
171    }
172}
173
174/// Different places from which the "primary input" might originate.
175#[derive(Clone, Debug, Default, Eq, PartialEq)]
176enum PrimaryInputMode {
177    /// This process's standard input.
178    #[default]
179    Stdin,
180
181    /// A path on the filesystem.
182    Path(PathBuf),
183
184    /// An in-memory buffer.
185    Buffer(Vec<u8>),
186}
187
188/// Different places where the output files might land.
189#[derive(Clone, Debug, Default, Eq, PartialEq)]
190enum OutputDestination {
191    /// The "sensible" default. Files will land in the same directory as the
192    /// input file, or the current working directory if the input is something
193    /// without a path (such as standard input).
194    #[default]
195    Default,
196
197    /// Files should land in this particular directory.
198    Path(PathBuf),
199
200    /// Files will not be written to disk. The code running the engine should
201    /// examine the memory layer of the I/O stack to obtain the output files.
202    Nowhere,
203}
204
205/// The subset of the driver state that is captured when running a C/C++ engine.
206///
207/// The main purpose of this type is to implement the [`DriverHooks`] trait,
208/// which is defined by the `tectonic_core_bridge` crate and defines that
209/// interface that the C/C++ processing engines can use to access the outside
210/// world. While these engines are running, they hold a mutable reference to
211/// these data, so it is helpful to separate them out into a sub-structure of
212/// the larger [`ProcessingSession`] type.
213///
214/// Due to the needs of the C/C++ engines, this means that [`BridgeState`] must
215/// hold the fully-prepared I/O stack information as well as the "event"
216/// information that helps the driver implement the rerun logic.
217struct BridgeState {
218    /// I/O for the primary input source. This is boxed since it can come
219    /// from different sources: maybe a file, maybe an in-memory buffer, etc.
220    primary_input: Box<dyn IoProvider>,
221
222    /// I/O for the main backing bundle. This is boxed since there are several
223    /// different bundle implementations that might be used at runtime.
224    bundle: Box<dyn Bundle>,
225
226    /// Memory buffering for files written during processing.
227    mem: MemoryIo,
228
229    /// The main filesystem backing for input files in the project.
230    filesystem: FilesystemIo,
231
232    /// Extra paths we search through for files.
233    extra_search_paths: Vec<FilesystemIo>,
234
235    /// Additional filesystem backing used if "shell escape" functionality is
236    /// activated. If None, we take that to mean that shell-escape is
237    /// disallowed. We have to use a persistent filesystem directory for this
238    /// since some packages perform a whole series of shell-escape operations
239    /// that assume continuity from one to the next.
240    shell_escape_work: Option<FilesystemIo>,
241
242    /// I/O for saving any generated format files.
243    format_cache: FormatCache,
244
245    /// Possible redirection of "standard output" writes to actual standard
246    /// output.
247    genuine_stdout: Option<GenuineStdoutIo>,
248
249    /// A possible alternative "primary input" when generating format files. If
250    /// Some(), we're in format-file generation mode; in most cases this is
251    /// None.
252    format_primary: Option<BufferedPrimaryIo>,
253
254    /// The I/O events that occurred while processing.
255    events: HashMap<String, FileSummary>,
256}
257
258impl BridgeState {
259    /// Tell the IoProvider implementation of the bridge state to enter "format
260    /// mode", in which the "primary input" is fixed, based on the requested
261    /// format file name, and filesystem I/O is bypassed.
262    fn enter_format_mode(&mut self, format_file_name: &str) {
263        self.format_primary = Some(BufferedPrimaryIo::from_text(format!(
264            "\\input {format_file_name}"
265        )));
266    }
267
268    /// Leave "format mode".
269    fn leave_format_mode(&mut self) {
270        self.format_primary = None;
271    }
272
273    /// Invoke an external tool as a pass in the processing pipeline.
274    fn external_tool_pass(
275        &mut self,
276        tool: &ExternalToolPass,
277        status: &mut dyn StatusBackend,
278    ) -> Result<()> {
279        status.note_highlighted("Running external tool ", &tool.argv[0], " ...");
280
281        // Process the command arguments. Filenames appearing in the arguments
282        // are treated as "requirements" that will be placed in the tool's
283        // working directory.
284
285        let mut cmd = Command::new(&tool.argv[0]);
286        let mut read_files = tool.extra_requires.clone();
287
288        {
289            let mem_files = &*self.mem.files.borrow();
290
291            for arg in &tool.argv[1..] {
292                cmd.arg(arg);
293
294                if mem_files.contains_key(arg) {
295                    read_files.insert(arg.to_owned());
296                }
297            }
298        }
299
300        // Now that we're validated, write those files to disk so that the tool
301        // can actually use them.
302
303        let tempdir = ctry!(
304            tempfile::Builder::new().tempdir();
305            "can't create temporary directory for external tool"
306        );
307
308        {
309            for name in &read_files {
310                // If a relative parent is found in the file to open, this fn
311                // does not properly handle that. Thus, throw an error.
312                if name.contains("../") {
313                    return Err(errmsg!(
314                        "relative parent paths are not supported for the \
315                        external tool. Got path `{}`.",
316                        name
317                    ));
318                }
319
320                let mut ih = ctry!(
321                    self.input_open_name(name, status).must_exist();
322                    "can't open path `{}`", name
323                );
324
325                // If the input path is absolute, we don't need to create a
326                // version in the tempdir, and in fact the current
327                // implementation below will blow away the input file. However,
328                // we do want to try to open the input so that it gets
329                // registered with the I/O tracking system.
330
331                let path = Path::new(name);
332                if path.is_absolute() {
333                    continue;
334                }
335
336                let tool_path = tempdir.path().join(name);
337                let tool_parent = tool_path.parent().unwrap();
338
339                if tool_parent != tempdir.path() {
340                    ctry!(
341                        std::fs::create_dir_all(tool_parent);
342                        "failed to create sub directory `{}`", tool_parent.display()
343                    );
344                }
345                let mut f = ctry!(
346                    File::create(&tool_path);
347                    "failed to create file `{}`", tool_path.display()
348                );
349                ctry!(
350                    std::io::copy(&mut ih, &mut f);
351                    "failed to write file `{}`", tool_path.display()
352                );
353            }
354        }
355
356        // Now we can actually run the command.
357
358        let output = cmd.current_dir(tempdir.path()).output()?;
359
360        if let Some(0) = output.status.code() {
361        } else {
362            tt_error!(
363                status,
364                "the external tool exited with an error code; its stdout was:\n"
365            );
366            status.dump_error_logs(&output.stdout[..]);
367            tt_error!(status, "its stderr was:\n");
368            status.dump_error_logs(&output.stderr[..]);
369
370            return if let Some(n) = output.status.code() {
371                Err(errmsg!("the external tool exited with error code {}", n))
372            } else {
373                Err(errmsg!("the external tool was terminated by a signal"))
374            };
375        }
376
377        // Search for any files that the tool created, and import them into the
378        // memory layer.
379
380        for entry in std::fs::read_dir(tempdir.path())? {
381            let entry = entry?;
382
383            if !entry.file_type()?.is_file() {
384                continue;
385            }
386
387            if let Some(basename) = entry.file_name().to_str() {
388                if !self.mem.files.borrow().contains_key(basename) {
389                    let path = entry.path();
390                    let mut data = Vec::new();
391
392                    let mut f = ctry!(
393                        File::open(&path);
394                        "failed to open tool-created file `{}`", path.display()
395                    );
396                    ctry!(
397                        f.read_to_end(&mut data);
398                        "failed to read tool-created file `{}`", path.display()
399                    );
400
401                    self.mem.create_entry(basename, data);
402                    self.events.insert(
403                        basename.to_owned(),
404                        FileSummary::new(AccessPattern::Written, InputOrigin::NotInput),
405                    );
406                }
407            }
408        }
409
410        // Mark the input files as having been read, and we're done.
411
412        for name in &read_files {
413            let summ = self.events.get_mut(name).unwrap();
414            summ.access_pattern = match summ.access_pattern {
415                AccessPattern::Written => AccessPattern::WrittenThenRead,
416                c => c, // identity mapping makes sense for remaining options
417            };
418        }
419
420        Ok(())
421    }
422
423    // Get the names of all intermediate files which are generated from
424    // previous passes.
425    fn get_intermediate_file_names(&self) -> Vec<String> {
426        // Currently, we only consider files in memory as intermediate files.
427        return self.mem.files.borrow().keys().cloned().collect();
428    }
429}
430
431macro_rules! bridgestate_ioprovider_try {
432    ($provider:expr, $($inner:tt)+) => {
433        let r = $provider.$($inner)+;
434        match r {
435            OpenResult::NotAvailable => {},
436            _ => return r,
437        };
438    }
439}
440
441macro_rules! bridgestate_ioprovider_cascade {
442    ($self:ident, $($inner:tt)+) => {
443        if let Some(ref mut p) = $self.genuine_stdout {
444            bridgestate_ioprovider_try!(p, $($inner)+);
445        }
446
447        // See enter_format_mode above. If creating a format file, disable local
448        // filesystem I/O.
449        let use_fs = if let Some(ref mut p) = $self.format_primary {
450            bridgestate_ioprovider_try!(p, $($inner)+);
451            false
452        } else {
453            bridgestate_ioprovider_try!($self.primary_input, $($inner)+);
454            true
455        };
456
457        bridgestate_ioprovider_try!($self.mem, $($inner)+);
458
459        if use_fs {
460            bridgestate_ioprovider_try!($self.filesystem, $($inner)+);
461
462            // With this ordering, we are preventing files created by
463            // shell-escape commands from overwriting/replacing source files.
464            // This seems very much like the behavior we want, unless there are
465            // some freaky shell-escape uses that depend on this behavior.
466            if let Some(ref mut p) = $self.shell_escape_work {
467                bridgestate_ioprovider_try!(p, $($inner)+);
468            }
469
470            // Extra search paths. This has higher priority than bundles but lower than current
471            // working dir to support the use case of overriding broken bundles (see issue #816).
472            for fsio in $self.extra_search_paths.iter_mut() {
473                bridgestate_ioprovider_try!(fsio, $($inner)+);
474            }
475        }
476
477        bridgestate_ioprovider_try!($self.bundle.as_ioprovider_mut(), $($inner)+);
478        bridgestate_ioprovider_try!($self.format_cache, $($inner)+);
479
480        return OpenResult::NotAvailable;
481    }
482}
483
484impl IoProvider for BridgeState {
485    fn output_open_name(&mut self, name: &str) -> OpenResult<OutputHandle> {
486        let r = (|| {
487            bridgestate_ioprovider_cascade!(self, output_open_name(name));
488        })();
489
490        if let OpenResult::Ok(_) = r {
491            if let Some(summ) = self.events.get_mut(name) {
492                summ.access_pattern = match summ.access_pattern {
493                    AccessPattern::Read => AccessPattern::ReadThenWritten,
494                    c => c, // identity mapping makes sense for remaining options
495                };
496            } else {
497                self.events.insert(
498                    name.to_owned(),
499                    FileSummary::new(AccessPattern::Written, InputOrigin::NotInput),
500                );
501            }
502        }
503
504        r
505    }
506
507    fn output_open_stdout(&mut self) -> OpenResult<OutputHandle> {
508        let r = (|| {
509            bridgestate_ioprovider_cascade!(self, output_open_stdout());
510        })();
511
512        // Life is easier if we track stdout in the same way that we do other
513        // output files.
514
515        if let OpenResult::Ok(_) = r {
516            if let Some(summ) = self.events.get_mut("") {
517                summ.access_pattern = match summ.access_pattern {
518                    AccessPattern::Read => AccessPattern::ReadThenWritten,
519                    c => c, // identity mapping makes sense for remaining options
520                };
521            } else {
522                self.events.insert(
523                    String::from(""),
524                    FileSummary::new(AccessPattern::Written, InputOrigin::NotInput),
525                );
526            }
527        }
528
529        r
530    }
531
532    fn input_open_name(
533        &mut self,
534        name: &str,
535        status: &mut dyn StatusBackend,
536    ) -> OpenResult<InputHandle> {
537        match self.input_open_name_with_abspath(name, status) {
538            OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih),
539            OpenResult::Err(e) => OpenResult::Err(e),
540            OpenResult::NotAvailable => OpenResult::NotAvailable,
541        }
542    }
543
544    fn input_open_name_with_abspath(
545        &mut self,
546        name: &str,
547        status: &mut dyn StatusBackend,
548    ) -> OpenResult<(InputHandle, Option<PathBuf>)> {
549        let r = (|| {
550            bridgestate_ioprovider_cascade!(self, input_open_name_with_abspath(name, status));
551        })();
552
553        match r {
554            OpenResult::Ok((ref ih, ref _path)) => {
555                if let Some(summ) = self.events.get_mut(name) {
556                    summ.access_pattern = match summ.access_pattern {
557                        AccessPattern::Written => AccessPattern::WrittenThenRead,
558                        c => c, // identity mapping makes sense for remaining options
559                    };
560                } else {
561                    self.events.insert(
562                        name.to_owned(),
563                        FileSummary::new(AccessPattern::Read, ih.origin()),
564                    );
565                }
566            }
567
568            OpenResult::NotAvailable => {
569                // For the purposes of file access pattern tracking, an attempt to
570                // open a nonexistent file counts as a read of a zero-size file. I
571                // don't see how such a file could have previously been written, but
572                // let's use the full update logic just in case.
573
574                if let Some(summ) = self.events.get_mut(name) {
575                    summ.access_pattern = match summ.access_pattern {
576                        AccessPattern::Written => AccessPattern::WrittenThenRead,
577                        c => c, // identity mapping makes sense for remaining options
578                    };
579                } else {
580                    // Unlike other cases, here we need to fill in the read_digest. `None`
581                    // is not an appropriate value since, if the file is written and then
582                    // read again later, the `None` will be overwritten; but what matters
583                    // is the contents of the file the very first time it was read.
584                    let mut fs = FileSummary::new(AccessPattern::Read, InputOrigin::NotInput);
585                    fs.read_digest = Some(DigestData::of_nothing());
586                    self.events.insert(name.to_owned(), fs);
587                }
588            }
589
590            OpenResult::Err(_) => {}
591        }
592
593        r
594    }
595
596    fn input_open_primary(&mut self, status: &mut dyn StatusBackend) -> OpenResult<InputHandle> {
597        match self.input_open_primary_with_abspath(status) {
598            OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih),
599            OpenResult::Err(e) => OpenResult::Err(e),
600            OpenResult::NotAvailable => OpenResult::NotAvailable,
601        }
602    }
603
604    fn input_open_primary_with_abspath(
605        &mut self,
606        status: &mut dyn StatusBackend,
607    ) -> OpenResult<(InputHandle, Option<PathBuf>)> {
608        bridgestate_ioprovider_cascade!(self, input_open_primary_with_abspath(status));
609    }
610
611    fn input_open_format(
612        &mut self,
613        name: &str,
614        status: &mut dyn StatusBackend,
615    ) -> OpenResult<InputHandle> {
616        let r = (|| {
617            bridgestate_ioprovider_cascade!(self, input_open_format(name, status));
618        })();
619
620        if let OpenResult::Ok(ref ih) = r {
621            if let Some(summ) = self.events.get_mut(name) {
622                summ.access_pattern = match summ.access_pattern {
623                    AccessPattern::Written => AccessPattern::WrittenThenRead,
624                    c => c, // identity mapping makes sense for remaining options
625                };
626            } else {
627                self.events.insert(
628                    name.to_owned(),
629                    FileSummary::new(AccessPattern::Read, ih.origin()),
630                );
631            }
632        }
633
634        r
635    }
636}
637
638impl DriverHooks for BridgeState {
639    fn io(&mut self) -> &mut dyn IoProvider {
640        self
641    }
642
643    fn event_output_closed(&mut self, name: String, digest: DigestData) {
644        let summ = self
645            .events
646            .get_mut(&name)
647            .expect("closing file that wasn't opened?");
648        summ.write_digest = Some(digest);
649    }
650
651    fn event_input_closed(
652        &mut self,
653        name: String,
654        digest: Option<DigestData>,
655        _status: &mut dyn StatusBackend,
656    ) {
657        let summ = self
658            .events
659            .get_mut(&name)
660            .expect("closing file that wasn't opened?");
661
662        // It's what was in the file the *first* time that it was read that
663        // matters, so don't replace the read digest if it's already got one.
664
665        if summ.read_digest.is_none() {
666            summ.read_digest = digest;
667        }
668    }
669
670    fn sysrq_shell_escape(
671        &mut self,
672        command: &str,
673        status: &mut dyn StatusBackend,
674    ) -> StdResult<(), SystemRequestError> {
675        #[cfg(unix)]
676        const SHELL: &[&str] = &["sh", "-c"];
677
678        #[cfg(windows)]
679        const SHELL: &[&str] = &["cmd.exe", "/c"];
680
681        // Write any TeX-created files in the memory cache to the shell-escape
682        // working directory, since the shell-escape program may need to use
683        // them. (This is the case for `minted`.) We basically just hope that
684        // nothing will want to access the actual TeX source, which will live in
685        // a different directory.
686        //
687        // This is suboptimally slow since we'll be rewriting the same files
688        // repeatedly for repeated shell-escape invocations, but I don't feel
689        // like optimizing that I/O right now. Shell-escape is a gnarly hack
690        // anyway!
691
692        if let Some(work) = self.shell_escape_work.as_ref() {
693            for (name, file) in &*self.mem.files.borrow() {
694                // If it's in the `mem` backend, it's of interest here ...
695                // unless it's stdout.
696                if name == self.mem.stdout_key() {
697                    continue;
698                }
699
700                let real_path = work.root().join(name);
701                if let Some(prefix) = real_path.parent() {
702                    std::fs::create_dir_all(prefix).map_err(|e| {
703                        tt_error!(status, "failed to create sub directory `{}`", prefix.display(); e.into());
704                        SystemRequestError::Failed
705                    })?;
706                }
707                let mut f = File::create(&real_path).map_err(|e| {
708                    tt_error!(status, "failed to create file `{}`", real_path.display(); e.into());
709                    SystemRequestError::Failed
710                })?;
711                f.write_all(&file.data).map_err(|e| {
712                    tt_error!(status, "failed to write file `{}`", real_path.display(); e.into());
713                    SystemRequestError::Failed
714                })?;
715            }
716
717            // Now we can actually run the command.
718
719            tt_note!(status, "running shell command: `{}`", command);
720
721            match Command::new(SHELL[0])
722                .args(&SHELL[1..])
723                .arg(command)
724                .current_dir(work.root())
725                .status()
726            {
727                Ok(s) => match s.code() {
728                    Some(0) => Ok(()),
729                    Some(n) => {
730                        tt_warning!(status, "command exited with error code {}", n);
731                        Err(SystemRequestError::Failed)
732                    }
733                    None => {
734                        tt_warning!(status, "command was terminated by signal");
735                        Err(SystemRequestError::Failed)
736                    }
737                },
738                Err(err) => {
739                    tt_warning!(status, "failed to run command"; err.into());
740                    Err(SystemRequestError::Failed)
741                }
742            }
743
744            // That's it! We shouldn't clean up here, because there might be
745            // multiple shell-escapes that build up in sequence, and any new
746            // files created by the shell-escape command will be picked up by
747            // the filesystem I/O.
748        } else {
749            // No shell-escape work directory. This "shouldn't happen" but means
750            // that shell-escape is supposed to be disabled anyway!
751            tt_error!(
752                status,
753                "the engine requested a shell-escape invocation but it's currently disabled"
754            );
755            Err(SystemRequestError::NotAllowed)
756        }
757    }
758}
759
760/// Possible modes for handling shell-escape functionality
761#[derive(Clone, Debug, Default, Eq, PartialEq)]
762enum ShellEscapeMode {
763    /// "Default" mode: shell-escape is disabled, unless it's been turned on in
764    /// the unstable options, in which case it will be allowed through a
765    /// temporary directory.
766    #[default]
767    Defaulted,
768
769    /// Shell-escape is disabled, overriding any unstable-option setting.
770    Disabled,
771
772    /// Shell-escape is enabled, using a temporary work directory managed by the
773    /// processing session. The work directory will be deleted after processing
774    /// completes.
775    TempDir,
776
777    /// Shell-escape is enabled, using some other work directory that is managed
778    /// externally. The processing session won't delete this directory.
779    ExternallyManagedDir(PathBuf),
780}
781
782/// A custom extra pass that invokes an external tool.
783///
784/// This is bad for reproducibility but comes in handy.
785#[derive(Debug)]
786struct ExternalToolPass {
787    argv: Vec<String>,
788    extra_requires: HashSet<String>,
789}
790
791/// A builder-style interface for creating a [`ProcessingSession`].
792///
793/// This uses standard builder patterns. The `Default` implementation defaults
794/// to restrictive security settings that disable all known-insecure features
795/// that could be abused by untrusted inputs. Use
796/// [`ProcessingSessionBuilder::new_with_security()`] in order to have the
797/// option to enable potentially-insecure features such as shell-escape.
798#[derive(Default)]
799pub struct ProcessingSessionBuilder {
800    security: SecuritySettings,
801    primary_input: PrimaryInputMode,
802    tex_input_name: Option<String>,
803    output_dest: OutputDestination,
804    filesystem_root: Option<PathBuf>,
805    format_name: Option<String>,
806    format_cache_path: Option<PathBuf>,
807    output_format: OutputFormat,
808    makefile_output_path: Option<PathBuf>,
809    hidden_input_paths: HashSet<PathBuf>,
810    pass: PassSetting,
811    reruns: Option<usize>,
812    print_stdout: bool,
813    bundle: Option<Box<dyn Bundle>>,
814    keep_intermediates: bool,
815    keep_logs: bool,
816    synctex: bool,
817    build_date: Option<SystemTime>,
818    unstables: UnstableOptions,
819    shell_escape_mode: ShellEscapeMode,
820    html_assets_spec_path: Option<String>,
821    html_precomputed_assets: Option<AssetSpecification>,
822    html_do_not_emit_files: bool,
823    html_do_not_emit_assets: bool,
824}
825
826impl ProcessingSessionBuilder {
827    /// Create a new builder with customized security settings.
828    pub fn new_with_security(security: SecuritySettings) -> Self {
829        ProcessingSessionBuilder {
830            security,
831            ..Default::default()
832        }
833    }
834
835    /// Sets the path to the primary input file.
836    ///
837    /// If a primary input path is not specified, we will default to reading it from stdin.
838    pub fn primary_input_path<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
839        self.primary_input = PrimaryInputMode::Path(p.as_ref().to_owned());
840        self
841    }
842
843    /// Sets the primary input to be a caller-specified buffer.
844    ///
845    /// If neither this nor a primary input path is specified, we will default
846    /// to reading the primary input from stdin.
847    pub fn primary_input_buffer(&mut self, buf: &[u8]) -> &mut Self {
848        self.primary_input = PrimaryInputMode::Buffer(buf.to_owned());
849        self
850    }
851
852    /// Sets the name of the main input file.
853    ///
854    /// This value will be used to infer the names of the output files; for example, if
855    /// `tex_input_name` is set to `"texput.tex"` then the pdf output file will be `"texput.pdf"`.
856    /// As such, this parameter is mandatory, even if the real input is coming from stdin (if it is
857    /// not provided, [`ProcessingSessionBuilder::create`] will panic).
858    pub fn tex_input_name(&mut self, s: &str) -> &mut Self {
859        self.tex_input_name = Some(s.to_owned());
860        self
861    }
862
863    /// Set the directory that serves as the root for finding files on disk.
864    ///
865    /// If unspecified, and there is a primary input file, the directory
866    /// containing that file will serve as the filesystem root. Otherwise, it is
867    /// set to the current directory.
868    pub fn filesystem_root<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
869        self.filesystem_root = Some(p.as_ref().to_owned());
870        self
871    }
872
873    /// A path to the directory where output files should be created.
874    ///
875    /// This will default to the directory containing `primary_input_path`, or
876    /// the current working directory if the primary input is coming from
877    /// stdin.
878    pub fn output_dir<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
879        self.output_dest = OutputDestination::Path(p.as_ref().to_owned());
880        self
881    }
882
883    /// Indicate that output files should not be written to disk.
884    ///
885    /// By default, output files will be written to the directory containing
886    /// `primary_input_path`, or the current working directory if the primary
887    /// input is coming from stdin.
888    pub fn do_not_write_output_files(&mut self) -> &mut Self {
889        self.output_dest = OutputDestination::Nowhere;
890        self
891    }
892
893    /// The name of the `.fmt` file used to initialize the TeX engine.
894    ///
895    /// This file does not necessarily have to exist already; it will be created
896    /// if it doesn't. This parameter is mandatory (if it is not provided,
897    /// [`ProcessingSessionBuilder::create`] will panic).
898    pub fn format_name(&mut self, p: &str) -> &mut Self {
899        self.format_name = Some(p.to_owned());
900        self
901    }
902
903    /// Sets the path to the format file cache.
904    ///
905    /// This is used to, well, cache format files, which are generated as
906    /// needed from the backing bundle. Defaults to the same directory as the
907    /// input file, or PWD if the input is a non-file (such as standard
908    /// input).
909    pub fn format_cache_path<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
910        self.format_cache_path = Some(p.as_ref().to_owned());
911        self
912    }
913
914    /// The type of output to create.
915    pub fn output_format(&mut self, f: OutputFormat) -> &mut Self {
916        self.output_format = f;
917        self
918    }
919
920    /// If set, a makefile will be written out at the given path.
921    pub fn makefile_output_path<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
922        self.makefile_output_path = Some(p.as_ref().to_owned());
923        self
924    }
925
926    /// Which kind of pass should the `ProcessingSession` run? Defaults to `PassSetting::Default`
927    /// (duh).
928    pub fn pass(&mut self, p: PassSetting) -> &mut Self {
929        self.pass = p;
930        self
931    }
932
933    /// If set, and if the pass is set to `PassSetting::Default`, the TeX engine will be re-run
934    /// *exactly* this many times.
935    ///
936    /// If `reruns` is unset, we will auto-detect how many times the TeX engine needs to be re-run.
937    pub fn reruns(&mut self, r: usize) -> &mut Self {
938        self.reruns = Some(r);
939        self
940    }
941
942    /// If set to `true`, stdout from the TeX engine will be forwarded to actual stdout. (By
943    /// default, it will be suppressed.)
944    pub fn print_stdout(&mut self, p: bool) -> &mut Self {
945        self.print_stdout = p;
946        self
947    }
948
949    /// Marks a path as hidden, meaning that the TeX engine will pretend that it doesn't exist in
950    /// the filesystem.
951    pub fn hide<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
952        self.hidden_input_paths.insert(p.as_ref().to_owned());
953        self
954    }
955
956    /// Sets the bundle, which the various engines will use for finding style files, font files,
957    /// etc.
958    pub fn bundle(&mut self, b: Box<dyn Bundle>) -> &mut Self {
959        self.bundle = Some(b);
960        self
961    }
962
963    /// If set to `true`, various intermediate files will be written out to the filesystem.
964    pub fn keep_intermediates(&mut self, k: bool) -> &mut Self {
965        self.keep_intermediates = k;
966        self
967    }
968
969    /// If set to `true`, '.log' and '.blg' files will be written out to the filesystem.
970    pub fn keep_logs(&mut self, k: bool) -> &mut Self {
971        self.keep_logs = k;
972        self
973    }
974
975    /// If set to `true`, tex files will be compiled using synctex information.
976    pub fn synctex(&mut self, s: bool) -> &mut Self {
977        self.synctex = s;
978        self
979    }
980
981    /// Sets the date and time of the processing session.
982    /// See `TexEngine::build_date` for mor information.
983    pub fn build_date(&mut self, date: SystemTime) -> &mut Self {
984        self.build_date = Some(date);
985        self
986    }
987
988    /// Configures the date and time of the processing session from the environment:
989    /// If `SOURCE_DATE_EPOCH` is set, it's used as the build date.
990    /// If `force_deterministic` is set, we fall back to UNIX_EPOCH.
991    /// Otherwise, we use the current system time.
992    pub fn build_date_from_env(&mut self, force_deterministic: bool) -> &mut Self {
993        let build_date_str = std::env::var("SOURCE_DATE_EPOCH").ok();
994        let build_date = match (force_deterministic, build_date_str) {
995            (_, Some(s)) => {
996                let epoch = s
997                    .parse::<u64>()
998                    .expect("invalid SOURCE_DATE_EPOCH (not a number)");
999
1000                SystemTime::UNIX_EPOCH
1001                    .checked_add(Duration::from_secs(epoch))
1002                    .expect("time overflow")
1003            }
1004            (true, None) => SystemTime::UNIX_EPOCH,
1005            (false, None) => SystemTime::now(),
1006        };
1007        self.build_date(build_date)
1008    }
1009
1010    /// Loads unstable options into the processing session
1011    pub fn unstables(&mut self, opts: UnstableOptions) -> &mut Self {
1012        self.unstables = opts;
1013        self
1014    }
1015
1016    /// Enable "shell escape" commands in the engines, and use the specified
1017    /// directory for shell-escape work. The caller is responsible for the
1018    /// creation and/or destruction of this directory. The default is to
1019    /// disable shell-escape unless the [`UnstableOptions`] say otherwise,
1020    /// in which case a driver-managed temporary directory will be used.
1021    pub fn shell_escape_with_work_dir<P: AsRef<Path>>(&mut self, path: P) -> &mut Self {
1022        if self.security.allow_shell_escape() {
1023            self.shell_escape_mode =
1024                ShellEscapeMode::ExternallyManagedDir(path.as_ref().to_owned());
1025        }
1026        self
1027    }
1028
1029    /// Forcibly enable shell-escape mode with a temporary directory, overriding
1030    /// any [`UnstableOptions`] settings. The default is to disable shell-escape
1031    /// unless the [`UnstableOptions`] say otherwise, in which case a
1032    /// driver-managed temporary directory will be used.
1033    pub fn shell_escape_with_temp_dir(&mut self) -> &mut Self {
1034        if self.security.allow_shell_escape() {
1035            self.shell_escape_mode = ShellEscapeMode::TempDir;
1036        }
1037        self
1038    }
1039
1040    /// Forcibly disable shell-escape mode, overriding any [`UnstableOptions`]
1041    /// settings. The default is to disable shell-escape unless the
1042    /// [`UnstableOptions`] say otherwise, in which case a driver-managed
1043    /// temporary directory will be used.
1044    pub fn shell_escape_disabled(&mut self) -> &mut Self {
1045        self.shell_escape_mode = ShellEscapeMode::Disabled;
1046        self
1047    }
1048
1049    /// When using HTML mode, emit an asset specification file instead of actual
1050    /// asset files.
1051    ///
1052    /// "Assets" are files like fonts and images that accompany the HTML output
1053    /// generated during processing. By default, these are emitted during
1054    /// processing. If this method is called, the assets will *not* be created.
1055    /// Instead, an "asset specification" file will be emitted to the given
1056    /// output path. This specification file contains the information needed to
1057    /// generate the assets upon a later invocation. Asset specification files
1058    /// can be merged, allowing the results of multiple separate TeX
1059    /// compilations to be synthesized into one HTML output tree.
1060    ///
1061    /// If the build does not use HTML mode, this setting has no effect.
1062    pub fn html_assets_spec_path<S: ToString>(&mut self, path: S) -> &mut Self {
1063        self.html_assets_spec_path = Some(path.to_string());
1064        self
1065    }
1066
1067    /// In HTML mode, use a precomputed asset specification.
1068    ///
1069    /// "Assets" are files like fonts and images that accompany the HTML output
1070    /// generated during processing. By default, the engine gathers these during
1071    /// processing and emits them at the end. After this method is used,
1072    /// however, it will generate HTML outputs assuming the information given in
1073    /// the asset specification given here. If the input calls for new assets or
1074    /// different options inconsistent with the specification, processing will
1075    /// abort with an error.
1076    ///
1077    /// The purpose of this mode is to allow for a unified set of assets to be
1078    /// created from multiple independent runs of the SPX-to-HTML stage. First,
1079    /// the different inputs should be processed independently, and their
1080    /// individual assets should saved. These should then be merged. Then the
1081    /// inputs should be reprocessed, all using the merged asset specification.
1082    /// In one — but only one — of these sessions, the assets should actually be
1083    /// emitted.
1084    pub fn html_precomputed_assets(&mut self, assets: AssetSpecification) -> &mut Self {
1085        self.html_precomputed_assets = Some(assets);
1086        self
1087    }
1088
1089    /// Set whether templated outputs should be created during HTML processing.
1090    ///
1091    /// This mode can be useful if you want to analyze what *would* be created
1092    /// during HTML processing without actually creating the files.
1093    pub fn html_emit_files(&mut self, do_emit: bool) -> &mut Self {
1094        self.html_do_not_emit_files = !do_emit;
1095        self
1096    }
1097
1098    /// Set whether supporting asset files should be created during HTML
1099    /// processing.
1100    ///
1101    /// This mode can be useful if you want to analyze what *would* be created
1102    /// during HTML processing without actually creating the files. If you call
1103    /// [`Self::html_assets_spec_path`], this setting will ignored, and no
1104    /// assets will be emitted to disk.
1105    pub fn html_emit_assets(&mut self, do_emit: bool) -> &mut Self {
1106        self.html_do_not_emit_assets = !do_emit;
1107        self
1108    }
1109
1110    /// Creates a `ProcessingSession`.
1111    pub fn create(self, status: &mut dyn StatusBackend) -> Result<ProcessingSession> {
1112        // First, work on the "bridge state", which gathers the subset of our
1113        // state that has to be held in a mutable reference while running the
1114        // C/C++ engines:
1115
1116        let mut bundle = self.bundle.expect("a bundle must be specified");
1117
1118        let mut filesystem_root = self.filesystem_root.unwrap_or_default();
1119
1120        let (pio, primary_input_path, default_output_path) = match self.primary_input {
1121            PrimaryInputMode::Path(p) => {
1122                // Set the filesystem root (that's the directory we'll search
1123                // for files in) to be the same directory as the main input
1124                // file.
1125                let parent = match p.parent() {
1126                    Some(parent) => parent.to_owned(),
1127                    None => {
1128                        return Err(errmsg!(
1129                            "can't figure out a parent directory for input path \"{}\"",
1130                            p.display()
1131                        ));
1132                    }
1133                };
1134
1135                filesystem_root.clone_from(&parent);
1136                let pio: Box<dyn IoProvider> = Box::new(FilesystemPrimaryInputIo::new(&p));
1137                (pio, Some(p), parent)
1138            }
1139
1140            PrimaryInputMode::Stdin => {
1141                // If the main input file is stdin, we don't set a filesystem
1142                // root, which means we'll default to the current working
1143                // directory.
1144                //
1145                // Note that, due to the expected need to rerun the engine
1146                // multiple times, we'll need to buffer stdin in its entirety,
1147                // so we might as well do that now.
1148                let pio = ctry!(BufferedPrimaryIo::from_stdin(); "error reading standard input");
1149                let pio: Box<dyn IoProvider> = Box::new(pio);
1150                (pio, None, "".into())
1151            }
1152
1153            PrimaryInputMode::Buffer(buf) => {
1154                // Same behavior as with stdin.
1155                let pio: Box<dyn IoProvider> = Box::new(BufferedPrimaryIo::from_buffer(buf));
1156                (pio, None, "".into())
1157            }
1158        };
1159
1160        let format_cache_path = self
1161            .format_cache_path
1162            .unwrap_or_else(|| filesystem_root.clone());
1163        let format_cache = FormatCache::new(bundle.get_digest()?, format_cache_path);
1164
1165        let genuine_stdout = if self.print_stdout {
1166            Some(GenuineStdoutIo::new())
1167        } else {
1168            None
1169        };
1170
1171        // move this out of self to get around borrow checker issues
1172        let hidden_input_paths = self.hidden_input_paths;
1173
1174        let extra_search_paths = if self.security.allow_extra_search_paths() {
1175            self.unstables
1176                .extra_search_paths
1177                .iter()
1178                .map(|p| FilesystemIo::new(p, false, false, hidden_input_paths.clone()))
1179                .collect()
1180        } else {
1181            if !self.unstables.extra_search_paths.is_empty() {
1182                tt_warning!(status, "Extra search path(s) ignored due to security");
1183            }
1184            Vec::new()
1185        };
1186
1187        let filesystem = FilesystemIo::new(&filesystem_root, false, true, hidden_input_paths);
1188
1189        let mem = MemoryIo::new(true);
1190
1191        let bs = BridgeState {
1192            primary_input: pio,
1193            mem,
1194            filesystem,
1195            extra_search_paths,
1196            shell_escape_work: None,
1197            format_cache,
1198            bundle,
1199            genuine_stdout,
1200            format_primary: None,
1201            events: HashMap::new(),
1202        };
1203
1204        // Now we can do the rest.
1205
1206        let output_path = match self.output_dest {
1207            OutputDestination::Default => Some(default_output_path),
1208            OutputDestination::Path(p) => Some(p),
1209            OutputDestination::Nowhere => None,
1210        };
1211
1212        let tex_input_name = self
1213            .tex_input_name
1214            .expect("tex_input_name must be specified");
1215        let mut aux_path = PathBuf::from(tex_input_name.clone());
1216        aux_path.set_extension("aux");
1217        let mut xdv_path = aux_path.clone();
1218        xdv_path.set_extension(if self.output_format == OutputFormat::Html {
1219            "spx"
1220        } else {
1221            "xdv"
1222        });
1223        let mut pdf_path = aux_path.clone();
1224        pdf_path.set_extension("pdf");
1225
1226        let shell_escape_mode = if !self.security.allow_shell_escape() {
1227            ShellEscapeMode::Disabled
1228        } else {
1229            match self.shell_escape_mode {
1230                ShellEscapeMode::Defaulted => {
1231                    if let Some(ref cwd) = self.unstables.shell_escape_cwd {
1232                        ShellEscapeMode::ExternallyManagedDir(cwd.into())
1233                    } else if self.unstables.shell_escape {
1234                        ShellEscapeMode::TempDir
1235                    } else {
1236                        ShellEscapeMode::Disabled
1237                    }
1238                }
1239
1240                other => other,
1241            }
1242        };
1243
1244        Ok(ProcessingSession {
1245            security: self.security,
1246            bs,
1247            pass: self.pass,
1248            primary_input_path,
1249            primary_input_tex_path: tex_input_name,
1250            format_name: self.format_name.unwrap(),
1251            tex_aux_path: aux_path.display().to_string(),
1252            tex_xdv_path: xdv_path.display().to_string(),
1253            tex_pdf_path: pdf_path.display().to_string(),
1254            output_format: self.output_format,
1255            makefile_output_path: self.makefile_output_path,
1256            output_path,
1257            tex_rerun_specification: self.reruns,
1258            keep_intermediates: self.keep_intermediates,
1259            keep_logs: self.keep_logs,
1260            synctex_enabled: self.synctex,
1261            build_date: self.build_date.unwrap_or(SystemTime::UNIX_EPOCH),
1262            unstables: self.unstables,
1263            shell_escape_mode,
1264            html_assets_spec_path: self.html_assets_spec_path,
1265            html_precomputed_assets: self.html_precomputed_assets,
1266            html_emit_files: !self.html_do_not_emit_files,
1267            html_emit_assets: !self.html_do_not_emit_assets,
1268        })
1269    }
1270}
1271
1272#[derive(Debug, Clone)]
1273enum RerunReason {
1274    Biber,
1275    Bibtex,
1276    FileChange(String),
1277}
1278
1279/// The ProcessingSession struct runs the whole show when we're actually
1280/// processing a file. It understands, for example, the need to re-run the TeX
1281/// engine if the `.aux` file changed.
1282pub struct ProcessingSession {
1283    // Security settings.
1284    security: SecuritySettings,
1285
1286    /// The subset of the session state that's can be mutated while the C/C++
1287    /// engines are running. Importantly, this includes the full I/O stack.
1288    bs: BridgeState,
1289
1290    /// If our primary input is an actual file on disk, this is its path.
1291    primary_input_path: Option<PathBuf>,
1292
1293    /// This is the name of the input that we tell TeX. It is the basename of
1294    /// the UTF8-ified version of `primary_input_path`; or something anodyne
1295    /// if the latter is None. (Name, "texput.tex").
1296    primary_input_tex_path: String,
1297
1298    /// This is the name of the format file to use. TeX has to open it by name
1299    /// internally, so it has to be String compatible.
1300    format_name: String,
1301
1302    /// These are the paths of the various output files as TeX knows them --
1303    /// just `primary_input_tex_path` with the extension changed.
1304    tex_aux_path: String,
1305    tex_xdv_path: String,
1306    tex_pdf_path: String,
1307
1308    /// If we're writing out Makefile rules, this is where they go. The TeX
1309    /// engine doesn't know about this path at all.
1310    makefile_output_path: Option<PathBuf>,
1311
1312    /// This is the path that the processed file will be saved at. It defaults
1313    /// to the path of `primary_input_path` or `.` if STDIN is used. If set to
1314    /// None, the output files will not be saved to disk — in which case, the
1315    /// caller should access the memory layer of the `io` field to gain access
1316    /// to the output files.
1317    output_path: Option<PathBuf>,
1318
1319    pass: PassSetting,
1320    output_format: OutputFormat,
1321    tex_rerun_specification: Option<usize>,
1322    keep_intermediates: bool,
1323    keep_logs: bool,
1324    synctex_enabled: bool,
1325
1326    /// See `TexEngine::with_date` and `XdvipdfmxEngine::with_date`.
1327    build_date: SystemTime,
1328
1329    unstables: UnstableOptions,
1330
1331    /// How to handle shell-escape. The `Defaulted` option will never
1332    /// be used here.
1333    shell_escape_mode: ShellEscapeMode,
1334
1335    html_assets_spec_path: Option<String>,
1336    html_precomputed_assets: Option<AssetSpecification>,
1337    html_emit_files: bool,
1338    html_emit_assets: bool,
1339}
1340
1341const DEFAULT_MAX_TEX_PASSES: usize = 6;
1342const ALWAYS_INTERMEDIATE_EXTENSIONS: &[&str] = &[
1343    ".snm", ".toc", // generated by Beamer
1344];
1345
1346impl ProcessingSession {
1347    /// Assess whether we need to rerun an engine. This is the case if there
1348    /// was a file that the engine read and then rewrote, and the rewritten
1349    /// version is different than the version that it read in.
1350    fn is_rerun_needed(&self, status: &mut dyn StatusBackend) -> Option<RerunReason> {
1351        // TODO: we should probably wire up diagnostics since I expect this
1352        // stuff could get finicky and we're going to want to be able to
1353        // figure out why rerun detection is breaking.
1354
1355        for (name, info) in &self.bs.events {
1356            if info.access_pattern == AccessPattern::ReadThenWritten {
1357                let file_changed = match (&info.read_digest, &info.write_digest) {
1358                    (Some(d1), Some(d2)) => d1 != d2,
1359                    (&None, &Some(_)) => true,
1360                    (_, _) => {
1361                        // Other cases shouldn't happen.
1362                        tt_warning!(
1363                            status,
1364                            "internal consistency problem when checking if {} changed",
1365                            name
1366                        );
1367                        true
1368                    }
1369                };
1370
1371                if file_changed {
1372                    return Some(RerunReason::FileChange(name.clone()));
1373                }
1374            }
1375        }
1376
1377        None
1378    }
1379
1380    #[allow(dead_code)]
1381    fn _dump_access_info(&self, status: &mut dyn StatusBackend) {
1382        for (name, info) in &self.bs.events {
1383            if info.access_pattern != AccessPattern::Read {
1384                let r = match info.read_digest {
1385                    Some(ref d) => d.to_string(),
1386                    None => "-".into(),
1387                };
1388                let w = match info.write_digest {
1389                    Some(ref d) => d.to_string(),
1390                    None => "-".into(),
1391                };
1392                tt_note!(
1393                    status,
1394                    "ACCESS: {} {:?} {:?} {:?}",
1395                    name,
1396                    info.access_pattern,
1397                    r,
1398                    w
1399                );
1400            }
1401        }
1402    }
1403
1404    /// Runs the session, generating the desired outputs.
1405    ///
1406    /// What this does depends on which [`PassSetting`] you asked for. The most common choice is
1407    /// `PassSetting::Default`, in which case this method does the following:
1408    ///
1409    /// - if a `.fmt` file does not yet exist, generate one and cache it
1410    /// - run the TeX engine once
1411    /// - run BibTeX, if it seems to be required
1412    /// - repeat the last two steps as often as needed
1413    /// - write the output files to disk, including a Makefile if it was requested.
1414    pub fn run(&mut self, status: &mut dyn StatusBackend) -> Result<()> {
1415        // Pre-invocation setup that requires cleanup even if the processing errors out.
1416
1417        let (shell_escape_work, clean_up_shell_escape) = match self.shell_escape_mode {
1418            ShellEscapeMode::Disabled => (None, false),
1419
1420            ShellEscapeMode::ExternallyManagedDir(ref p) => (
1421                Some(FilesystemIo::new(p, false, false, HashSet::new())),
1422                false,
1423            ),
1424
1425            ShellEscapeMode::TempDir => {
1426                let tempdir = ctry!(tempfile::Builder::new().tempdir(); "can't create temporary directory for shell-escape work");
1427                (
1428                    Some(FilesystemIo::new(
1429                        &tempdir.keep(),
1430                        false,
1431                        false,
1432                        HashSet::new(),
1433                    )),
1434                    true,
1435                )
1436            }
1437
1438            ShellEscapeMode::Defaulted => unreachable!(),
1439        };
1440
1441        self.bs.shell_escape_work = shell_escape_work;
1442
1443        // Go-time!
1444        let result = self.run_inner(status);
1445
1446        // Do that cleanup.
1447
1448        if clean_up_shell_escape {
1449            let shell_escape_work = self.bs.shell_escape_work.take().unwrap();
1450            let shell_escape_err = std::fs::remove_dir_all(shell_escape_work.root());
1451
1452            if let Err(e) = shell_escape_err {
1453                tt_warning!(status, "an error occurred while cleaning up the \
1454                    shell-escape temporary directory `{}`", shell_escape_work.root().display(); e.into());
1455            }
1456        }
1457
1458        // Propagate the actual result.
1459        result
1460    }
1461
1462    /// The bulk of the `run` implementation. We need to wrap it to manage the
1463    /// lifecycle of resources like the shell-escape temporary directory, if
1464    /// needed.
1465    fn run_inner(&mut self, status: &mut dyn StatusBackend) -> Result<()> {
1466        // Do we need to generate the format file?
1467
1468        let generate_format = if self.output_format == OutputFormat::Format {
1469            false
1470        } else {
1471            match self.bs.input_open_format(&self.format_name, status) {
1472                OpenResult::Ok(_) => false,
1473                OpenResult::NotAvailable => true,
1474                OpenResult::Err(e) => {
1475                    return Err(e)
1476                        .chain_err(|| format!("could not open format file {}", self.format_name));
1477                }
1478            }
1479        };
1480
1481        if generate_format {
1482            tt_note!(status, "generating format \"{}\"", self.format_name);
1483            self.make_format_pass(status)?;
1484        }
1485
1486        // Do the meat of the work.
1487
1488        let result = match self.pass {
1489            PassSetting::Tex => match self.tex_pass(None, status) {
1490                Ok(Some(warnings)) => {
1491                    tt_warning!(status, "{}", warnings);
1492                    Ok(0)
1493                }
1494                Ok(None) => Ok(0),
1495                Err(e) => Err(e),
1496            },
1497            PassSetting::Default => self.default_pass(false, status),
1498            PassSetting::BibtexFirst => self.default_pass(true, status),
1499        };
1500
1501        if let Err(e) = result {
1502            self.write_files(None, status, true)?;
1503            return Err(e);
1504        };
1505
1506        // Write output files and the first line of our Makefile output.
1507
1508        let mut mf_dest_maybe = match self.makefile_output_path {
1509            Some(ref p) => {
1510                if self.output_path.is_none() {
1511                    tt_warning!(
1512                        status,
1513                        "requested to generate Makefile rules, but no files written to disk!"
1514                    );
1515                    None
1516                } else {
1517                    Some(File::create(p)?)
1518                }
1519            }
1520
1521            None => None,
1522        };
1523
1524        let n_skipped_intermediates = self.write_files(mf_dest_maybe.as_mut(), status, false)?;
1525
1526        if n_skipped_intermediates > 0 {
1527            status.note_highlighted(
1528                "Skipped writing ",
1529                &format!("{n_skipped_intermediates}"),
1530                " intermediate files (use --keep-intermediates to keep them)",
1531            );
1532        }
1533
1534        // Finish Makefile rules, maybe.
1535
1536        if let Some(ref mut mf_dest) = mf_dest_maybe {
1537            ctry!(write!(mf_dest, ": "); "couldn't write to Makefile-rules file");
1538
1539            if let Some(ref pip) = self.primary_input_path {
1540                let opip = ctry!(pip.to_str(); "Makefile-rules file path must be Unicode-able");
1541                ctry!(mf_dest.write_all(opip.as_bytes()); "couldn't write to Makefile-rules file");
1542            }
1543
1544            // The check above ensures that this is never None.
1545            let root = self.output_path.as_ref().unwrap();
1546
1547            for (name, info) in &self.bs.events {
1548                if info.input_origin != InputOrigin::Filesystem {
1549                    continue;
1550                }
1551
1552                if info.got_written_to_disk {
1553                    // If the file originally came from the filesystem, and it
1554                    // was written as well as read, and we actually wrote it
1555                    // to disk, there's a circular dependency that's
1556                    // inappropriate to express in a Makefile. If it was
1557                    // "written" by the engine but we didn't actually write
1558                    // those modifications to disk, we're OK. If there's a
1559                    // two-stage compilation involving the .aux file, the
1560                    // latter case is what arises unless --keep-intermediates
1561                    // is specified.
1562                    tt_warning!(status, "omitting circular Makefile dependency for {}", name);
1563                    continue;
1564                }
1565
1566                ctry!(write!(mf_dest, " \\\n  {}", root.join(name).display()); "couldn't write to Makefile-rules file");
1567            }
1568
1569            ctry!(writeln!(mf_dest, ""); "couldn't write to Makefile-rules file");
1570        }
1571
1572        // All done.
1573
1574        Ok(())
1575    }
1576
1577    fn write_files(
1578        &mut self,
1579        mut mf_dest_maybe: Option<&mut File>,
1580        status: &mut dyn StatusBackend,
1581        only_logs: bool,
1582    ) -> Result<u32> {
1583        let root = match self.output_path {
1584            Some(ref p) => p,
1585
1586            None => {
1587                // We were told not to write anything!
1588                return Ok(0);
1589            }
1590        };
1591
1592        let mut n_skipped_intermediates = 0;
1593
1594        for (name, file) in &*self.bs.mem.files.borrow() {
1595            if name == self.bs.mem.stdout_key() {
1596                continue;
1597            }
1598
1599            let sname = name;
1600            let summ = self.bs.events.get_mut(name).unwrap();
1601
1602            if !only_logs && (self.output_format == OutputFormat::Aux) {
1603                // In this mode we're only writing the .aux file. I initially
1604                // wanted to be clever-ish and output all auxiliary-type
1605                // files, but doing so ended up causing non-obvious problems
1606                // for my use case, which involves using Ninja to manage
1607                // dependencies.
1608                if !sname.ends_with(".aux") {
1609                    continue;
1610                }
1611            } else if !self.keep_intermediates
1612                && (summ.access_pattern != AccessPattern::Written
1613                    || ALWAYS_INTERMEDIATE_EXTENSIONS
1614                        .iter()
1615                        .any(|ext| sname.ends_with(ext)))
1616            {
1617                n_skipped_intermediates += 1;
1618                continue;
1619            }
1620
1621            let is_logfile = sname.ends_with(".log") || sname.ends_with(".blg");
1622
1623            if is_logfile && !self.keep_logs {
1624                continue;
1625            }
1626
1627            if !is_logfile && only_logs {
1628                continue;
1629            }
1630
1631            if file.data.is_empty() {
1632                status.note_highlighted(
1633                    "Not writing ",
1634                    &format!("`{sname}`"),
1635                    ": it would be empty.",
1636                );
1637                continue;
1638            }
1639
1640            let real_path = root.join(name);
1641            let byte_len = Byte::from_u128(file.data.len() as u128).unwrap();
1642            status.note_highlighted(
1643                "Writing ",
1644                &format!("`{}`", real_path.display()),
1645                &format!(" ({})", byte_len.get_appropriate_unit(UnitType::Binary)),
1646            );
1647
1648            if let Some(parent) = real_path.parent() {
1649                std::fs::create_dir_all(parent)?;
1650            }
1651
1652            let mut f = File::create(&real_path)?;
1653            f.write_all(&file.data)?;
1654            summ.got_written_to_disk = true;
1655
1656            if let Some(ref mut mf_dest) = mf_dest_maybe {
1657                // Maybe it'd be better to have this just be a warning? But if
1658                // the program is supposed to write the file, you don't want
1659                // it exiting with error code zero if it couldn't do that
1660                // successfully.
1661                //
1662                // Not quite sure why, but I can't pull out the target path
1663                // here. I think 'self' is borrow inside the loop?
1664                ctry!(write!(mf_dest, "{} ", real_path.display()); "couldn't write to Makefile-rules file");
1665            }
1666        }
1667
1668        Ok(n_skipped_intermediates)
1669    }
1670
1671    /// The "default" pass really runs a bunch of sub-passes. It is a "Do What
1672    /// I Mean" operation.
1673    fn default_pass(&mut self, bibtex_first: bool, status: &mut dyn StatusBackend) -> Result<i32> {
1674        // If `bibtex_first` is true, we start by running bibtex, and run
1675        // proceed with the standard rerun logic. Otherwise, we run TeX,
1676        // auto-detect whether we need to run bibtex, possibly run it, and
1677        // then go ahead.
1678
1679        let mut warnings = None;
1680        let mut rerun_result = if bibtex_first {
1681            self.bibtex_pass(status)?;
1682            Some(RerunReason::Bibtex)
1683        } else {
1684            warnings = self.tex_pass(None, status)?;
1685            let maybe_biber = self.check_biber_requirement(status)?;
1686
1687            if let Some(biber) = maybe_biber {
1688                self.bs.external_tool_pass(&biber, status)?;
1689                Some(RerunReason::Biber)
1690            } else if self.is_bibtex_needed() {
1691                self.bibtex_pass(status)?;
1692                Some(RerunReason::Bibtex)
1693            } else {
1694                self.is_rerun_needed(status)
1695            }
1696        };
1697
1698        // Now we enter the main rerun loop.
1699
1700        let (pass_count, reruns_fixed) = match self.tex_rerun_specification {
1701            Some(n) => (n, true),
1702            None => (DEFAULT_MAX_TEX_PASSES, false),
1703        };
1704
1705        for i in 0..pass_count {
1706            let rerun_explanation = if reruns_fixed {
1707                "I was told to".to_owned()
1708            } else {
1709                match rerun_result {
1710                    Some(RerunReason::Biber) => "biber was run".to_owned(),
1711                    Some(RerunReason::Bibtex) => "bibtex was run".to_owned(),
1712                    Some(RerunReason::FileChange(ref s)) => format!("\"{s}\" changed"),
1713                    None => break,
1714                }
1715            };
1716
1717            // We're restarting the engine afresh, so clear the read inputs.
1718            // We do *not* clear the entire HashMap since we want to remember,
1719            // e.g., that bibtex wrote out the .bbl file, since that way we
1720            // can later know that it's OK to delete. I am not super confident
1721            // that the access_pattern data can just be left as-is when we do
1722            // this, but, uh, so far it seems to work.
1723            for summ in self.bs.events.values_mut() {
1724                summ.read_digest = None;
1725            }
1726
1727            warnings = self.tex_pass(Some(&rerun_explanation), status)?;
1728
1729            if !reruns_fixed {
1730                rerun_result = self.is_rerun_needed(status);
1731
1732                if rerun_result.is_some() && i == DEFAULT_MAX_TEX_PASSES - 1 {
1733                    tt_warning!(
1734                        status,
1735                        "TeX rerun seems needed, but stopping at {} passes",
1736                        DEFAULT_MAX_TEX_PASSES
1737                    );
1738                    break;
1739                }
1740            }
1741        }
1742
1743        // The last tex pass generated warnings.
1744        if let Some(warnings) = warnings {
1745            tt_warning!(status, "{}", warnings);
1746        }
1747
1748        // And finally, xdvipdfmx or spx2html. Maybe.
1749
1750        if let OutputFormat::Pdf = self.output_format {
1751            self.xdvipdfmx_pass(status)?;
1752        } else if let OutputFormat::Html = self.output_format {
1753            self.spx2html_pass(status)?;
1754        }
1755
1756        Ok(0)
1757    }
1758
1759    fn is_bibtex_needed(&self) -> bool {
1760        const BIBDATA: &[u8] = b"\\bibdata";
1761
1762        self.bs
1763            .mem
1764            .files
1765            .borrow()
1766            .get(&self.tex_aux_path)
1767            .map(|file| {
1768                // We used to use aho-corasick crate here, but it was removed to reduce the code
1769                // size.
1770                file.data.windows(BIBDATA.len()).any(|s| s == BIBDATA)
1771            })
1772            .unwrap_or(false)
1773    }
1774
1775    /// Use the TeX engine to generate a format file.
1776    #[allow(clippy::manual_split_once)] // requires Rust 1.52 (note that we don't actually define our MSRV)
1777    fn make_format_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
1778        // PathBuf.file_stem() doesn't do what we want since it only strips
1779        // one extension. As of 1.17, the compiler needs a type annotation for
1780        // some reason, which is why we use the `r` variable.
1781        let r: Result<&str> = self.format_name.split('.').next().ok_or_else(|| {
1782            ErrorKind::Msg(format!(
1783                "incomprehensible format file name \"{}\"",
1784                self.format_name
1785            ))
1786            .into()
1787        });
1788        let stem = r?;
1789
1790        let result = {
1791            self.bs
1792                .enter_format_mode(&format!("tectonic-format-{stem}.tex"));
1793            let mut launcher =
1794                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
1795            let r = TexEngine::default()
1796                .halt_on_error_mode(true)
1797                .initex_mode(true)
1798                .shell_escape(self.shell_escape_mode != ShellEscapeMode::Disabled)
1799                .process(&mut launcher, "UNUSED.fmt", "texput");
1800            self.bs.leave_format_mode();
1801            r
1802        };
1803
1804        match result {
1805            Ok(TexOutcome::Spotless) => {}
1806            Ok(TexOutcome::Warnings) => {
1807                tt_warning!(status, "warnings were issued by the TeX engine; use --print and/or --keep-logs for details.");
1808            }
1809            Ok(TexOutcome::Errors) => {
1810                tt_error!(status, "errors were issued by the TeX engine; use --print and/or --keep-logs for details.");
1811                return Err(ErrorKind::Msg("unhandled TeX engine error".to_owned()).into());
1812            }
1813            Err(e) => {
1814                return Err(e.into());
1815            }
1816        }
1817
1818        // Now we can write the format file to its special location. In
1819        // principle we could stream the format file directly to the staging
1820        // area as we ran the TeX engine, but we don't bother.
1821
1822        for (name, file) in &*self.bs.mem.files.borrow() {
1823            if name == self.bs.mem.stdout_key() {
1824                continue;
1825            }
1826
1827            let sname = name;
1828
1829            if !sname.ends_with(".fmt") {
1830                continue;
1831            }
1832
1833            // Note that we intentionally pass 'stem', not 'name'.
1834            ctry!(self.bs.format_cache.write_format(stem, &file.data, status); "cannot write format file {}", sname);
1835        }
1836
1837        // All done. Clear the memory layer since this was a special preparatory step.
1838        self.bs.mem.files.borrow_mut().clear();
1839
1840        Ok(0)
1841    }
1842
1843    /// Run one pass of the TeX engine.
1844    fn tex_pass(
1845        &mut self,
1846        rerun_explanation: Option<&str>,
1847        status: &mut dyn StatusBackend,
1848    ) -> Result<Option<&'static str>> {
1849        let result = {
1850            if let Some(s) = rerun_explanation {
1851                status.note_highlighted("Rerunning ", "TeX", &format!(" because {s} ..."));
1852            } else {
1853                status.note_highlighted("Running ", "TeX", " ...");
1854            }
1855
1856            let mut launcher =
1857                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
1858
1859            // In deterministic mode, we stub a few aspects of the environment.
1860            // They default to a "realistic" view, but we override them with static values:
1861            if self.unstables.deterministic_mode {
1862                launcher.with_expose_absolute_paths(false);
1863                launcher.with_mtime_override(Some(
1864                    self.build_date
1865                        .duration_since(SystemTime::UNIX_EPOCH)
1866                        .map(|x| x.as_secs() as i64)
1867                        .expect("invalid build date in deterministic mode"),
1868                ));
1869            }
1870
1871            TexEngine::default()
1872                .halt_on_error_mode(!self.unstables.continue_on_errors)
1873                .initex_mode(self.output_format == OutputFormat::Format)
1874                .synctex(self.synctex_enabled)
1875                .semantic_pagination(self.output_format == OutputFormat::Html)
1876                .shell_escape(self.shell_escape_mode != ShellEscapeMode::Disabled)
1877                .build_date(self.build_date)
1878                .process(
1879                    &mut launcher,
1880                    &self.format_name,
1881                    &self.primary_input_tex_path,
1882                )
1883        };
1884
1885        let warnings = match result {
1886            Ok(TexOutcome::Spotless) => None,
1887            Ok(TexOutcome::Warnings) =>
1888                    Some("warnings were issued by the TeX engine; use --print and/or --keep-logs for details."),
1889            Ok(TexOutcome::Errors) =>
1890                    Some("errors were issued by the TeX engine, but were ignored; \
1891                         use --print and/or --keep-logs for details."),
1892            Err(e) =>
1893                return Err(e.into()),
1894        };
1895
1896        if !self.bs.mem.files.borrow().contains_key(&self.tex_xdv_path) {
1897            // TeX did not produce the expected output file
1898            tt_warning!(
1899                status,
1900                "did not produce \"{}\"; this may mean that your document is empty",
1901                self.tex_xdv_path
1902            )
1903        }
1904
1905        Ok(warnings)
1906    }
1907
1908    // Run Bibtex process for one .aux file.
1909    fn bibtex_pass_for_one_aux_file(
1910        &mut self,
1911        status: &mut dyn StatusBackend,
1912        aux_file: &String,
1913    ) -> Result<i32> {
1914        let result = {
1915            status.note_highlighted("Running ", "BibTeX", &format!(" on {aux_file} ..."));
1916            let mut launcher =
1917                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
1918            let mut engine = BibtexEngine::new();
1919            engine.process(&mut launcher, aux_file, &self.unstables)
1920        };
1921
1922        match result {
1923            Ok(TexOutcome::Spotless) => {}
1924            Ok(TexOutcome::Warnings) => {
1925                tt_note!(
1926                    status,
1927                    "warnings were issued by BibTeX; use --print and/or --keep-logs for details."
1928                );
1929            }
1930            Ok(TexOutcome::Errors) => {
1931                tt_warning!(
1932                    status,
1933                    "errors were issued by BibTeX, but were ignored; \
1934                     use --print and/or --keep-logs for details."
1935                );
1936            }
1937            Err(e) => {
1938                return Err(e.chain_err(|| ErrorKind::EngineError("BibTeX")));
1939            }
1940        }
1941
1942        Ok(0)
1943    }
1944
1945    fn bibtex_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
1946        let mut aux_files = vec![self.tex_aux_path.clone()];
1947
1948        // find other .aux files generated by tex_pass
1949        for f in self.bs.get_intermediate_file_names() {
1950            if f.ends_with(".aux") && f != self.tex_aux_path {
1951                aux_files.push(f);
1952            }
1953        }
1954
1955        for f in aux_files {
1956            let _r = self.bibtex_pass_for_one_aux_file(status, &f)?;
1957        }
1958
1959        Ok(0)
1960    }
1961
1962    fn xdvipdfmx_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
1963        {
1964            status.note_highlighted("Running ", "xdvipdfmx", " ...");
1965
1966            let mut launcher =
1967                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
1968            let mut engine = XdvipdfmxEngine::default();
1969
1970            engine.build_date(self.build_date);
1971
1972            if let Some(ref ps) = self.unstables.paper_size {
1973                engine.paper_spec(ps.clone());
1974            }
1975
1976            engine.process(&mut launcher, &self.tex_xdv_path, &self.tex_pdf_path)?;
1977        }
1978
1979        self.bs.mem.files.borrow_mut().remove(&self.tex_xdv_path);
1980        Ok(0)
1981    }
1982
1983    fn spx2html_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
1984        {
1985            let mut engine = Spx2HtmlEngine::default();
1986
1987            match (self.html_emit_files, self.output_path.as_ref()) {
1988                (true, Some(p)) => engine.output_base(p),
1989                (false, _) => engine.do_not_emit_files(),
1990                (true, None) => return Err(errmsg!("HTML output must be saved directly to disk")),
1991            };
1992
1993            if let Some(p) = self.html_assets_spec_path.as_ref() {
1994                engine.assets_spec_path(p);
1995            } else if !self.html_emit_assets {
1996                engine.do_not_emit_assets();
1997            }
1998
1999            if let Some(a) = self.html_precomputed_assets.as_ref() {
2000                engine.precomputed_assets(a.clone());
2001            }
2002
2003            status.note_highlighted("Running ", "spx2html", " ...");
2004            engine.process_to_filesystem(&mut self.bs, status, &self.tex_xdv_path)?;
2005        }
2006
2007        self.bs.mem.files.borrow_mut().remove(&self.tex_xdv_path);
2008        Ok(0)
2009    }
2010
2011    /// Get what was printed to standard output, if anything.
2012    pub fn get_stdout_content(&self) -> Vec<u8> {
2013        self.bs
2014            .mem
2015            .files
2016            .borrow()
2017            .get(self.bs.mem.stdout_key())
2018            .map(|mfi| mfi.data.clone())
2019            .unwrap_or_default()
2020    }
2021
2022    /// Consume this session and return the current set of files in memory.
2023    ///
2024    /// This convenience function tries to help with the annoyances of getting
2025    /// access to the in-memory file data after the engine has been run.
2026    pub fn into_file_data(self) -> MemoryFileCollection {
2027        Rc::try_unwrap(self.bs.mem.files)
2028            .expect("multiple strong refs to MemoryIo files")
2029            .into_inner()
2030    }
2031
2032    /// See if we need to run `biber`, and parse the `.run.xml` file from the
2033    /// `loqreq` package to figure out what files `biber` needs. This
2034    /// functionality should probably become more generic, but I don't have a
2035    /// great sense as to how widely-used `logreq` is.
2036    fn check_biber_requirement(
2037        &self,
2038        status: &mut dyn StatusBackend,
2039    ) -> Result<Option<ExternalToolPass>> {
2040        // Is there a `.run.xml` file?
2041
2042        let mut run_xml_path = PathBuf::from(&self.primary_input_tex_path);
2043        run_xml_path.set_extension("run.xml");
2044        let run_xml_path = run_xml_path.display().to_string();
2045
2046        let mem_files = &*self.bs.mem.files.borrow();
2047        let run_xml_entry = match mem_files.get(&run_xml_path) {
2048            Some(e) => e,
2049            None => return Ok(None),
2050        };
2051
2052        // Yes, there is. Set up to potentially run biber. For testing support,
2053        // we let the rig specify a custom executable to use, which lets us
2054        // exercise different pieces of the external-tool behavior.
2055
2056        let s = (
2057            crate::config::is_config_test_mode_activated(),
2058            std::env::var("TECTONIC_TEST_FAKE_BIBER"),
2059        );
2060
2061        let mut argv = match s {
2062            (true, Ok(text)) if !text.trim().is_empty() => {
2063                text.split_whitespace().map(|x| x.to_owned()).collect()
2064            }
2065            // when `TECTONIC_TEST_FAKE_BIBER` is empty, proceed to discover
2066            // the biber binary as follows.
2067            _ => vec!["biber".to_owned()],
2068        };
2069
2070        // Moreover, we allow an override of the biber executable, to cope with
2071        // possible version mismatch of the bundled biblatex package, as filed
2072        // in issue #893. Since PR #1103, the `tectonic-biber` override can
2073        // also be invoked with `tectonic -X biber`.
2074        let find_by = |binary_name: &str| -> Option<String> {
2075            if let Ok(pathbuf) = which(binary_name) {
2076                if let Some(biber_path) = pathbuf.to_str() {
2077                    return Some(biber_path.to_owned());
2078                }
2079            }
2080            None
2081        };
2082
2083        let mut use_tectonic_biber_override = false;
2084        for binary_name in ["./tectonic-biber", "tectonic-biber"] {
2085            if let Some(biber_path) = find_by(binary_name) {
2086                argv = vec![biber_path];
2087                use_tectonic_biber_override = true;
2088                break;
2089            }
2090        }
2091
2092        let mut extra_requires = HashSet::new();
2093
2094        // Do a sketchy XML parse to see if there's info about a biber
2095        // invocation.
2096
2097        #[derive(Clone, Copy, Debug, Eq, PartialEq)]
2098        enum State {
2099            /// Searching for the biber section
2100            Searching,
2101
2102            /// In a <binary> element. Will its value be "biber"??!?
2103            InBinaryName,
2104
2105            /// In the <cmdline> part of the biber section.
2106            InBiberCmdline,
2107
2108            /// About to read an argument to the biber command.
2109            InBiberArgument,
2110
2111            /// Reading through the post-cmdline part of the biber section.
2112            InBiberRemainder,
2113
2114            /// In a "requirement" section like <input> or <requires> that contains
2115            /// filenames we should provide
2116            InBiberRequirementSection,
2117
2118            /// In a <file> requirement
2119            InBiberFileRequirement,
2120        }
2121
2122        let curs = Cursor::new(&run_xml_entry.data[..]);
2123        let mut reader = NsReader::from_reader(curs);
2124        let mut buf = Vec::new();
2125        let mut state = State::Searching;
2126
2127        loop {
2128            let event = ctry!(
2129                reader.read_event_into(&mut buf);
2130                "error parsing run.xml file"
2131            );
2132
2133            if let Event::Eof = event {
2134                break;
2135            }
2136
2137            match (state, event) {
2138                (State::Searching, Event::Start(ref e)) => {
2139                    let name = reader
2140                        .decoder()
2141                        .decode(e.local_name().into_inner())
2142                        .map_err(quick_xml::Error::from)?;
2143
2144                    if name == "binary" {
2145                        state = State::InBinaryName;
2146                    }
2147                }
2148
2149                (State::InBinaryName, Event::Text(ref e)) => {
2150                    let text = e.unescape()?;
2151
2152                    state = if &text == "biber" {
2153                        State::InBiberCmdline
2154                    } else {
2155                        State::Searching
2156                    };
2157                }
2158
2159                (State::InBinaryName, _) => {
2160                    state = State::Searching;
2161                }
2162
2163                (State::InBiberCmdline, Event::Start(ref e)) => {
2164                    let name = reader
2165                        .decoder()
2166                        .decode(e.local_name().into_inner())
2167                        .map_err(quick_xml::Error::from)?;
2168
2169                    // Note that the "infile" might be `foo` without the `.bcf`
2170                    // extension, so we can't use it for file-finding.
2171                    state = match &*name {
2172                        "infile" | "outfile" | "option" => State::InBiberArgument,
2173                        _ => State::InBiberRemainder,
2174                    }
2175                }
2176
2177                (State::InBiberCmdline, Event::End(ref e)) => {
2178                    let name = reader
2179                        .decoder()
2180                        .decode(e.local_name().into_inner())
2181                        .map_err(quick_xml::Error::from)?;
2182
2183                    if name == "cmdline" {
2184                        state = State::InBiberRemainder;
2185                    }
2186                }
2187
2188                (State::InBiberArgument, Event::Text(ref e)) => {
2189                    argv.push(e.unescape()?.to_string());
2190                    state = State::InBiberCmdline;
2191                }
2192
2193                (State::InBiberRemainder, Event::Start(ref e)) => {
2194                    let name = reader
2195                        .decoder()
2196                        .decode(e.local_name().into_inner())
2197                        .map_err(quick_xml::Error::from)?;
2198
2199                    state = match &*name {
2200                        "input" | "requires" => State::InBiberRequirementSection,
2201                        _ => State::InBiberRemainder,
2202                    }
2203                }
2204
2205                (State::InBiberRemainder, Event::End(ref e)) => {
2206                    let name = reader
2207                        .decoder()
2208                        .decode(e.local_name().into_inner())
2209                        .map_err(quick_xml::Error::from)?;
2210
2211                    if name == "external" {
2212                        break;
2213                    }
2214                }
2215
2216                (State::InBiberRequirementSection, Event::Start(ref e)) => {
2217                    let name = reader
2218                        .decoder()
2219                        .decode(e.local_name().into_inner())
2220                        .map_err(quick_xml::Error::from)?;
2221
2222                    state = match &*name {
2223                        "file" => State::InBiberFileRequirement,
2224                        _ => State::InBiberRemainder,
2225                    }
2226                }
2227
2228                (State::InBiberRequirementSection, Event::End(ref e)) => {
2229                    let name = reader
2230                        .decoder()
2231                        .decode(e.local_name().into_inner())
2232                        .map_err(quick_xml::Error::from)?;
2233
2234                    if name == "input" || name == "requires" {
2235                        state = State::InBiberRemainder;
2236                    }
2237                }
2238
2239                (State::InBiberFileRequirement, Event::Text(ref e)) => {
2240                    extra_requires.insert(e.unescape()?.to_string());
2241                    state = State::InBiberRequirementSection;
2242                }
2243
2244                (State::InBiberFileRequirement, _) => {
2245                    state = State::InBiberRequirementSection;
2246                }
2247
2248                _ => {}
2249            }
2250        }
2251
2252        // All done!
2253
2254        Ok(if state == State::Searching {
2255            // No biber invocation, in the end.
2256            None
2257        } else {
2258            if use_tectonic_biber_override {
2259                tt_note!(status, "using `tectonic-biber`, found at {}", argv[0]);
2260            }
2261            Some(ExternalToolPass {
2262                argv,
2263                extra_requires,
2264            })
2265        })
2266    }
2267}