Skip to main content

tectonic/
driver.rs

1// Copyright 2018-2022 the Tectonic Project
2// Licensed under the MIT License.
3
4//! The high-level Tectonic document processing interface.
5//!
6//! The main struct in this module is [`ProcessingSession`], which knows how to
7//! run (and re-run if necessary) the various engines in the right order. Such a
8//! session can be created with a [`ProcessingSessionBuilder`], which you might
9//! obtain from a [`tectonic_docmodel::document::Document`] using the
10//! [`crate::docmodel::DocumentExt::setup_session`] extension method, if you’re
11//! using the Tectonic document model. You can set one up manually if not.
12//!
13//! For an example of how to use this module, see `src/bin/tectonic/main.rs`,
14//! which contains tectonic's main CLI program.
15
16use byte_unit::{Byte, UnitType};
17use quick_xml::{events::Event, NsReader};
18use std::{
19    collections::{HashMap, HashSet},
20    fs::File,
21    io::{Cursor, Read, Write},
22    path::{Path, PathBuf},
23    process::Command,
24    rc::Rc,
25    result::Result as StdResult,
26    str::FromStr,
27    time::{Duration, SystemTime},
28};
29use tectonic_bridge_core::{CoreBridgeLauncher, DriverHooks, SecuritySettings, SystemRequestError};
30use tectonic_bundles::Bundle;
31use tectonic_engine_spx2html::AssetSpecification;
32use tectonic_io_base::{
33    digest::DigestData,
34    filesystem::{FilesystemIo, FilesystemPrimaryInputIo},
35    stdstreams::{BufferedPrimaryIo, GenuineStdoutIo},
36    InputHandle, IoProvider, OpenResult, OutputHandle,
37};
38use which::which;
39
40use crate::{
41    ctry, errmsg,
42    errors::{ChainErrCompatExt, ErrorKind, Result},
43    io::{
44        format_cache::FormatCache,
45        memory::{MemoryFileCollection, MemoryIo},
46        InputOrigin,
47    },
48    status::StatusBackend,
49    tt_error, tt_note, tt_warning,
50    unstable_opts::UnstableOptions,
51    BibtexEngine, Spx2HtmlEngine, TexEngine, TexOutcome, XdvipdfmxEngine,
52};
53
54/// Different patterns with which files may have been accessed by the
55/// underlying engines. Once a file is marked as ReadThenWritten or
56/// WrittenThenRead, its pattern does not evolve further.
57#[derive(Clone, Copy, Debug, Eq, PartialEq)]
58enum AccessPattern {
59    /// This file is only ever read.
60    Read,
61
62    /// This file is only ever written. This suggests that it is
63    /// a final output of the processing session.
64    Written,
65
66    /// This file is read, then written. We call this a "circular" access
67    /// pattern. Multiple passes of an engine will result in outputs that
68    /// change if this file's contents change, or if the file did not exist at
69    /// the time of the first pass.
70    ReadThenWritten,
71
72    /// This file is written, then read. We call this a "temporary" access
73    /// pattern. This file is likely a temporary buffer that is not of
74    /// interest to the user.
75    WrittenThenRead,
76}
77
78/// A summary of the I/O that happened on a file. We record its access
79/// pattern; where it came from, if it was used as an input; the cryptographic
80/// digest of the file when it was last read; and the cryptographic digest of
81/// the file as it was last written.
82#[derive(Clone, Debug, Eq, PartialEq)]
83struct FileSummary {
84    access_pattern: AccessPattern,
85
86    /// If this file was read, where did it come from?
87    pub input_origin: InputOrigin,
88
89    /// If this file was read, this is the digest of its contents at the time it was *first* read.
90    /// The "first" is significant for files that were read and then written (for example, `.aux`
91    /// files).
92    ///
93    /// There's some chance that this will be `None` even if the file was read. Tectonic makes an
94    /// effort to compute the digest as the data is being read from the file, but this can fail if
95    /// tex decides to seek in the file as it is being written.
96    pub read_digest: Option<DigestData>,
97
98    /// If this file was written, this is the digest of its contents at the time it was last
99    /// written.
100    pub write_digest: Option<DigestData>,
101
102    got_written_to_disk: bool,
103}
104
105impl FileSummary {
106    fn new(access_pattern: AccessPattern, input_origin: InputOrigin) -> FileSummary {
107        FileSummary {
108            access_pattern,
109            input_origin,
110            read_digest: None,
111            write_digest: None,
112            got_written_to_disk: false,
113        }
114    }
115}
116
117/// The different types of output files that tectonic knows how to produce.
118#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
119pub enum OutputFormat {
120    /// A '.aux' file.
121    Aux,
122    /// A '.html' file.
123    Html,
124    /// An extended DVI file.
125    Xdv,
126    /// A '.pdf' file.
127    #[default]
128    Pdf,
129    /// A '.fmt' file, for initializing the TeX engine.
130    Format,
131}
132
133impl FromStr for OutputFormat {
134    type Err = &'static str;
135
136    fn from_str(a_str: &str) -> StdResult<Self, Self::Err> {
137        match a_str {
138            "aux" => Ok(OutputFormat::Aux),
139            "html" => Ok(OutputFormat::Html),
140            "xdv" => Ok(OutputFormat::Xdv),
141            "pdf" => Ok(OutputFormat::Pdf),
142            "fmt" => Ok(OutputFormat::Format),
143            _ => Err("unsupported or unknown format"),
144        }
145    }
146}
147
148/// The different types of "passes" that [`ProcessingSession`] knows how to run. See
149/// [`ProcessingSession::run`] for more details.
150#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
151pub enum PassSetting {
152    /// The default pass, which repeatedly runs TeX and BibTeX until it doesn't need to any more.
153    #[default]
154    Default,
155    /// Just run the TeX engine once.
156    Tex,
157    /// Like the default pass, but runs BibTeX once first, before doing anything else.
158    BibtexFirst,
159}
160
161impl FromStr for PassSetting {
162    type Err = &'static str;
163
164    fn from_str(a_str: &str) -> StdResult<Self, Self::Err> {
165        match a_str {
166            "default" => Ok(PassSetting::Default),
167            "bibtex_first" => Ok(PassSetting::BibtexFirst),
168            "tex" => Ok(PassSetting::Tex),
169            _ => Err("unsupported or unknown pass setting"),
170        }
171    }
172}
173
174/// Different places from which the "primary input" might originate.
175#[derive(Clone, Debug, Default, Eq, PartialEq)]
176enum PrimaryInputMode {
177    /// This process's standard input.
178    #[default]
179    Stdin,
180
181    /// A path on the filesystem.
182    Path(PathBuf),
183
184    /// An in-memory buffer.
185    Buffer(Vec<u8>),
186}
187
188/// Different places where the output files might land.
189#[derive(Clone, Debug, Default, Eq, PartialEq)]
190enum OutputDestination {
191    /// The "sensible" default. Files will land in the same directory as the
192    /// input file, or the current working directory if the input is something
193    /// without a path (such as standard input).
194    #[default]
195    Default,
196
197    /// Files should land in this particular directory.
198    Path(PathBuf),
199
200    /// Files will not be written to disk. The code running the engine should
201    /// examine the memory layer of the I/O stack to obtain the output files.
202    Nowhere,
203}
204
205/// The subset of the driver state that is captured when running a C/C++ engine.
206///
207/// The main purpose of this type is to implement the [`DriverHooks`] trait,
208/// which is defined by the `tectonic_core_bridge` crate and defines that
209/// interface that the C/C++ processing engines can use to access the outside
210/// world. While these engines are running, they hold a mutable reference to
211/// these data, so it is helpful to separate them out into a sub-structure of
212/// the larger [`ProcessingSession`] type.
213///
214/// Due to the needs of the C/C++ engines, this means that [`BridgeState`] must
215/// hold the fully-prepared I/O stack information as well as the "event"
216/// information that helps the driver implement the rerun logic.
217struct BridgeState {
218    /// I/O for the primary input source. This is boxed since it can come
219    /// from different sources: maybe a file, maybe an in-memory buffer, etc.
220    primary_input: Box<dyn IoProvider>,
221
222    /// I/O for the main backing bundle. This is boxed since there are several
223    /// different bundle implementations that might be used at runtime.
224    bundle: Box<dyn Bundle>,
225
226    /// Memory buffering for files written during processing.
227    mem: MemoryIo,
228
229    /// The main filesystem backing for input files in the project.
230    filesystem: FilesystemIo,
231
232    /// Extra paths we search through for files.
233    extra_search_paths: Vec<FilesystemIo>,
234
235    /// Additional filesystem backing used if "shell escape" functionality is
236    /// activated. If None, we take that to mean that shell-escape is
237    /// disallowed. We have to use a persistent filesystem directory for this
238    /// since some packages perform a whole series of shell-escape operations
239    /// that assume continuity from one to the next.
240    shell_escape_work: Option<FilesystemIo>,
241
242    /// I/O for saving any generated format files.
243    format_cache: FormatCache,
244
245    /// Possible redirection of "standard output" writes to actual standard
246    /// output.
247    genuine_stdout: Option<GenuineStdoutIo>,
248
249    // BEGIN AWARE REPORTS PATCH
250    /// When set, output files stream directly to the filesystem instead of
251    /// buffering in `mem`, giving an O(1) memory footprint independent of
252    /// document size (the XDV and PDF of a large report otherwise sit in
253    /// RAM in their entirety). Tried before all other providers for output
254    /// opens; read-back of written files is served by `filesystem`, which
255    /// shares the same root.
256    disk_outputs: Option<FilesystemIo>,
257    // END AWARE REPORTS PATCH
258
259    /// A possible alternative "primary input" when generating format files. If
260    /// Some(), we're in format-file generation mode; in most cases this is
261    /// None.
262    format_primary: Option<BufferedPrimaryIo>,
263
264    /// The I/O events that occurred while processing.
265    events: HashMap<String, FileSummary>,
266}
267
268impl BridgeState {
269    /// Tell the IoProvider implementation of the bridge state to enter "format
270    /// mode", in which the "primary input" is fixed, based on the requested
271    /// format file name, and filesystem I/O is bypassed.
272    fn enter_format_mode(&mut self, format_file_name: &str) {
273        self.format_primary = Some(BufferedPrimaryIo::from_text(format!(
274            "\\input {format_file_name}"
275        )));
276    }
277
278    /// Leave "format mode".
279    fn leave_format_mode(&mut self) {
280        self.format_primary = None;
281    }
282
283    /// Invoke an external tool as a pass in the processing pipeline.
284    fn external_tool_pass(
285        &mut self,
286        tool: &ExternalToolPass,
287        status: &mut dyn StatusBackend,
288    ) -> Result<()> {
289        status.note_highlighted("Running external tool ", &tool.argv[0], " ...");
290
291        // Process the command arguments. Filenames appearing in the arguments
292        // are treated as "requirements" that will be placed in the tool's
293        // working directory.
294
295        let mut cmd = Command::new(&tool.argv[0]);
296        let mut read_files = tool.extra_requires.clone();
297
298        {
299            let mem_files = &*self.mem.files.borrow();
300
301            for arg in &tool.argv[1..] {
302                cmd.arg(arg);
303
304                if mem_files.contains_key(arg) {
305                    read_files.insert(arg.to_owned());
306                }
307            }
308        }
309
310        // Now that we're validated, write those files to disk so that the tool
311        // can actually use them.
312
313        let tempdir = ctry!(
314            tempfile::Builder::new().tempdir();
315            "can't create temporary directory for external tool"
316        );
317
318        {
319            for name in &read_files {
320                // If a relative parent is found in the file to open, this fn
321                // does not properly handle that. Thus, throw an error.
322                if name.contains("../") {
323                    return Err(errmsg!(
324                        "relative parent paths are not supported for the \
325                        external tool. Got path `{}`.",
326                        name
327                    ));
328                }
329
330                let mut ih = ctry!(
331                    self.input_open_name(name, status).must_exist();
332                    "can't open path `{}`", name
333                );
334
335                // If the input path is absolute, we don't need to create a
336                // version in the tempdir, and in fact the current
337                // implementation below will blow away the input file. However,
338                // we do want to try to open the input so that it gets
339                // registered with the I/O tracking system.
340
341                let path = Path::new(name);
342                if path.is_absolute() {
343                    continue;
344                }
345
346                let tool_path = tempdir.path().join(name);
347                let tool_parent = tool_path.parent().unwrap();
348
349                if tool_parent != tempdir.path() {
350                    ctry!(
351                        std::fs::create_dir_all(tool_parent);
352                        "failed to create sub directory `{}`", tool_parent.display()
353                    );
354                }
355                let mut f = ctry!(
356                    File::create(&tool_path);
357                    "failed to create file `{}`", tool_path.display()
358                );
359                ctry!(
360                    std::io::copy(&mut ih, &mut f);
361                    "failed to write file `{}`", tool_path.display()
362                );
363            }
364        }
365
366        // Now we can actually run the command.
367
368        let output = cmd.current_dir(tempdir.path()).output()?;
369
370        if let Some(0) = output.status.code() {
371        } else {
372            tt_error!(
373                status,
374                "the external tool exited with an error code; its stdout was:\n"
375            );
376            status.dump_error_logs(&output.stdout[..]);
377            tt_error!(status, "its stderr was:\n");
378            status.dump_error_logs(&output.stderr[..]);
379
380            return if let Some(n) = output.status.code() {
381                Err(errmsg!("the external tool exited with error code {}", n))
382            } else {
383                Err(errmsg!("the external tool was terminated by a signal"))
384            };
385        }
386
387        // Search for any files that the tool created, and import them into the
388        // memory layer.
389
390        for entry in std::fs::read_dir(tempdir.path())? {
391            let entry = entry?;
392
393            if !entry.file_type()?.is_file() {
394                continue;
395            }
396
397            if let Some(basename) = entry.file_name().to_str() {
398                if !self.mem.files.borrow().contains_key(basename) {
399                    let path = entry.path();
400                    let mut data = Vec::new();
401
402                    let mut f = ctry!(
403                        File::open(&path);
404                        "failed to open tool-created file `{}`", path.display()
405                    );
406                    ctry!(
407                        f.read_to_end(&mut data);
408                        "failed to read tool-created file `{}`", path.display()
409                    );
410
411                    self.mem.create_entry(basename, data);
412                    self.events.insert(
413                        basename.to_owned(),
414                        FileSummary::new(AccessPattern::Written, InputOrigin::NotInput),
415                    );
416                }
417            }
418        }
419
420        // Mark the input files as having been read, and we're done.
421
422        for name in &read_files {
423            let summ = self.events.get_mut(name).unwrap();
424            summ.access_pattern = match summ.access_pattern {
425                AccessPattern::Written => AccessPattern::WrittenThenRead,
426                c => c, // identity mapping makes sense for remaining options
427            };
428        }
429
430        Ok(())
431    }
432
433    // Get the names of all intermediate files which are generated from
434    // previous passes.
435    fn get_intermediate_file_names(&self) -> Vec<String> {
436        // Currently, we only consider files in memory as intermediate files.
437        return self.mem.files.borrow().keys().cloned().collect();
438    }
439}
440
441macro_rules! bridgestate_ioprovider_try {
442    ($provider:expr, $($inner:tt)+) => {
443        let r = $provider.$($inner)+;
444        match r {
445            OpenResult::NotAvailable => {},
446            _ => return r,
447        };
448    }
449}
450
451macro_rules! bridgestate_ioprovider_cascade {
452    ($self:ident, $($inner:tt)+) => {
453        if let Some(ref mut p) = $self.genuine_stdout {
454            bridgestate_ioprovider_try!(p, $($inner)+);
455        }
456
457        // See enter_format_mode above. If creating a format file, disable local
458        // filesystem I/O.
459        let use_fs = if let Some(ref mut p) = $self.format_primary {
460            bridgestate_ioprovider_try!(p, $($inner)+);
461            false
462        } else {
463            bridgestate_ioprovider_try!($self.primary_input, $($inner)+);
464            true
465        };
466
467        bridgestate_ioprovider_try!($self.mem, $($inner)+);
468
469        if use_fs {
470            bridgestate_ioprovider_try!($self.filesystem, $($inner)+);
471
472            // With this ordering, we are preventing files created by
473            // shell-escape commands from overwriting/replacing source files.
474            // This seems very much like the behavior we want, unless there are
475            // some freaky shell-escape uses that depend on this behavior.
476            if let Some(ref mut p) = $self.shell_escape_work {
477                bridgestate_ioprovider_try!(p, $($inner)+);
478            }
479
480            // Extra search paths. This has higher priority than bundles but lower than current
481            // working dir to support the use case of overriding broken bundles (see issue #816).
482            for fsio in $self.extra_search_paths.iter_mut() {
483                bridgestate_ioprovider_try!(fsio, $($inner)+);
484            }
485        }
486
487        bridgestate_ioprovider_try!($self.bundle.as_ioprovider_mut(), $($inner)+);
488        bridgestate_ioprovider_try!($self.format_cache, $($inner)+);
489
490        return OpenResult::NotAvailable;
491    }
492}
493
494impl IoProvider for BridgeState {
495    fn output_open_name(&mut self, name: &str) -> OpenResult<OutputHandle> {
496        let r = (|| {
497            // BEGIN AWARE REPORTS PATCH
498            if let Some(ref mut p) = self.disk_outputs {
499                bridgestate_ioprovider_try!(p, output_open_name(name));
500            }
501            // END AWARE REPORTS PATCH
502            bridgestate_ioprovider_cascade!(self, output_open_name(name));
503        })();
504
505        if let OpenResult::Ok(_) = r {
506            if let Some(summ) = self.events.get_mut(name) {
507                summ.access_pattern = match summ.access_pattern {
508                    AccessPattern::Read => AccessPattern::ReadThenWritten,
509                    c => c, // identity mapping makes sense for remaining options
510                };
511            } else {
512                self.events.insert(
513                    name.to_owned(),
514                    FileSummary::new(AccessPattern::Written, InputOrigin::NotInput),
515                );
516            }
517        }
518
519        r
520    }
521
522    fn output_open_stdout(&mut self) -> OpenResult<OutputHandle> {
523        let r = (|| {
524            bridgestate_ioprovider_cascade!(self, output_open_stdout());
525        })();
526
527        // Life is easier if we track stdout in the same way that we do other
528        // output files.
529
530        if let OpenResult::Ok(_) = r {
531            if let Some(summ) = self.events.get_mut("") {
532                summ.access_pattern = match summ.access_pattern {
533                    AccessPattern::Read => AccessPattern::ReadThenWritten,
534                    c => c, // identity mapping makes sense for remaining options
535                };
536            } else {
537                self.events.insert(
538                    String::from(""),
539                    FileSummary::new(AccessPattern::Written, InputOrigin::NotInput),
540                );
541            }
542        }
543
544        r
545    }
546
547    fn input_open_name(
548        &mut self,
549        name: &str,
550        status: &mut dyn StatusBackend,
551    ) -> OpenResult<InputHandle> {
552        match self.input_open_name_with_abspath(name, status) {
553            OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih),
554            OpenResult::Err(e) => OpenResult::Err(e),
555            OpenResult::NotAvailable => OpenResult::NotAvailable,
556        }
557    }
558
559    fn input_open_name_with_abspath(
560        &mut self,
561        name: &str,
562        status: &mut dyn StatusBackend,
563    ) -> OpenResult<(InputHandle, Option<PathBuf>)> {
564        let r = (|| {
565            bridgestate_ioprovider_cascade!(self, input_open_name_with_abspath(name, status));
566        })();
567
568        match r {
569            OpenResult::Ok((ref ih, ref _path)) => {
570                if let Some(summ) = self.events.get_mut(name) {
571                    summ.access_pattern = match summ.access_pattern {
572                        AccessPattern::Written => AccessPattern::WrittenThenRead,
573                        c => c, // identity mapping makes sense for remaining options
574                    };
575                } else {
576                    self.events.insert(
577                        name.to_owned(),
578                        FileSummary::new(AccessPattern::Read, ih.origin()),
579                    );
580                }
581            }
582
583            OpenResult::NotAvailable => {
584                // For the purposes of file access pattern tracking, an attempt to
585                // open a nonexistent file counts as a read of a zero-size file. I
586                // don't see how such a file could have previously been written, but
587                // let's use the full update logic just in case.
588
589                if let Some(summ) = self.events.get_mut(name) {
590                    summ.access_pattern = match summ.access_pattern {
591                        AccessPattern::Written => AccessPattern::WrittenThenRead,
592                        c => c, // identity mapping makes sense for remaining options
593                    };
594                } else {
595                    // Unlike other cases, here we need to fill in the read_digest. `None`
596                    // is not an appropriate value since, if the file is written and then
597                    // read again later, the `None` will be overwritten; but what matters
598                    // is the contents of the file the very first time it was read.
599                    let mut fs = FileSummary::new(AccessPattern::Read, InputOrigin::NotInput);
600                    fs.read_digest = Some(DigestData::of_nothing());
601                    self.events.insert(name.to_owned(), fs);
602                }
603            }
604
605            OpenResult::Err(_) => {}
606        }
607
608        r
609    }
610
611    fn input_open_primary(&mut self, status: &mut dyn StatusBackend) -> OpenResult<InputHandle> {
612        match self.input_open_primary_with_abspath(status) {
613            OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih),
614            OpenResult::Err(e) => OpenResult::Err(e),
615            OpenResult::NotAvailable => OpenResult::NotAvailable,
616        }
617    }
618
619    fn input_open_primary_with_abspath(
620        &mut self,
621        status: &mut dyn StatusBackend,
622    ) -> OpenResult<(InputHandle, Option<PathBuf>)> {
623        bridgestate_ioprovider_cascade!(self, input_open_primary_with_abspath(status));
624    }
625
626    fn input_open_format(
627        &mut self,
628        name: &str,
629        status: &mut dyn StatusBackend,
630    ) -> OpenResult<InputHandle> {
631        let r = (|| {
632            bridgestate_ioprovider_cascade!(self, input_open_format(name, status));
633        })();
634
635        if let OpenResult::Ok(ref ih) = r {
636            if let Some(summ) = self.events.get_mut(name) {
637                summ.access_pattern = match summ.access_pattern {
638                    AccessPattern::Written => AccessPattern::WrittenThenRead,
639                    c => c, // identity mapping makes sense for remaining options
640                };
641            } else {
642                self.events.insert(
643                    name.to_owned(),
644                    FileSummary::new(AccessPattern::Read, ih.origin()),
645                );
646            }
647        }
648
649        r
650    }
651}
652
653impl DriverHooks for BridgeState {
654    fn io(&mut self) -> &mut dyn IoProvider {
655        self
656    }
657
658    fn event_output_closed(&mut self, name: String, digest: DigestData) {
659        let summ = self
660            .events
661            .get_mut(&name)
662            .expect("closing file that wasn't opened?");
663        summ.write_digest = Some(digest);
664    }
665
666    fn event_input_closed(
667        &mut self,
668        name: String,
669        digest: Option<DigestData>,
670        _status: &mut dyn StatusBackend,
671    ) {
672        let summ = self
673            .events
674            .get_mut(&name)
675            .expect("closing file that wasn't opened?");
676
677        // It's what was in the file the *first* time that it was read that
678        // matters, so don't replace the read digest if it's already got one.
679
680        if summ.read_digest.is_none() {
681            summ.read_digest = digest;
682        }
683    }
684
685    fn sysrq_shell_escape(
686        &mut self,
687        command: &str,
688        status: &mut dyn StatusBackend,
689    ) -> StdResult<(), SystemRequestError> {
690        #[cfg(unix)]
691        const SHELL: &[&str] = &["sh", "-c"];
692
693        #[cfg(windows)]
694        const SHELL: &[&str] = &["cmd.exe", "/c"];
695
696        // Write any TeX-created files in the memory cache to the shell-escape
697        // working directory, since the shell-escape program may need to use
698        // them. (This is the case for `minted`.) We basically just hope that
699        // nothing will want to access the actual TeX source, which will live in
700        // a different directory.
701        //
702        // This is suboptimally slow since we'll be rewriting the same files
703        // repeatedly for repeated shell-escape invocations, but I don't feel
704        // like optimizing that I/O right now. Shell-escape is a gnarly hack
705        // anyway!
706
707        if let Some(work) = self.shell_escape_work.as_ref() {
708            for (name, file) in &*self.mem.files.borrow() {
709                // If it's in the `mem` backend, it's of interest here ...
710                // unless it's stdout.
711                if name == self.mem.stdout_key() {
712                    continue;
713                }
714
715                let real_path = work.root().join(name);
716                if let Some(prefix) = real_path.parent() {
717                    std::fs::create_dir_all(prefix).map_err(|e| {
718                        tt_error!(status, "failed to create sub directory `{}`", prefix.display(); e.into());
719                        SystemRequestError::Failed
720                    })?;
721                }
722                let mut f = File::create(&real_path).map_err(|e| {
723                    tt_error!(status, "failed to create file `{}`", real_path.display(); e.into());
724                    SystemRequestError::Failed
725                })?;
726                f.write_all(&file.data).map_err(|e| {
727                    tt_error!(status, "failed to write file `{}`", real_path.display(); e.into());
728                    SystemRequestError::Failed
729                })?;
730            }
731
732            // Now we can actually run the command.
733
734            tt_note!(status, "running shell command: `{}`", command);
735
736            match Command::new(SHELL[0])
737                .args(&SHELL[1..])
738                .arg(command)
739                .current_dir(work.root())
740                .status()
741            {
742                Ok(s) => match s.code() {
743                    Some(0) => Ok(()),
744                    Some(n) => {
745                        tt_warning!(status, "command exited with error code {}", n);
746                        Err(SystemRequestError::Failed)
747                    }
748                    None => {
749                        tt_warning!(status, "command was terminated by signal");
750                        Err(SystemRequestError::Failed)
751                    }
752                },
753                Err(err) => {
754                    tt_warning!(status, "failed to run command"; err.into());
755                    Err(SystemRequestError::Failed)
756                }
757            }
758
759            // That's it! We shouldn't clean up here, because there might be
760            // multiple shell-escapes that build up in sequence, and any new
761            // files created by the shell-escape command will be picked up by
762            // the filesystem I/O.
763        } else {
764            // No shell-escape work directory. This "shouldn't happen" but means
765            // that shell-escape is supposed to be disabled anyway!
766            tt_error!(
767                status,
768                "the engine requested a shell-escape invocation but it's currently disabled"
769            );
770            Err(SystemRequestError::NotAllowed)
771        }
772    }
773}
774
775/// Possible modes for handling shell-escape functionality
776#[derive(Clone, Debug, Default, Eq, PartialEq)]
777enum ShellEscapeMode {
778    /// "Default" mode: shell-escape is disabled, unless it's been turned on in
779    /// the unstable options, in which case it will be allowed through a
780    /// temporary directory.
781    #[default]
782    Defaulted,
783
784    /// Shell-escape is disabled, overriding any unstable-option setting.
785    Disabled,
786
787    /// Shell-escape is enabled, using a temporary work directory managed by the
788    /// processing session. The work directory will be deleted after processing
789    /// completes.
790    TempDir,
791
792    /// Shell-escape is enabled, using some other work directory that is managed
793    /// externally. The processing session won't delete this directory.
794    ExternallyManagedDir(PathBuf),
795}
796
797/// A custom extra pass that invokes an external tool.
798///
799/// This is bad for reproducibility but comes in handy.
800#[derive(Debug)]
801struct ExternalToolPass {
802    argv: Vec<String>,
803    extra_requires: HashSet<String>,
804}
805
806/// A builder-style interface for creating a [`ProcessingSession`].
807///
808/// This uses standard builder patterns. The `Default` implementation defaults
809/// to restrictive security settings that disable all known-insecure features
810/// that could be abused by untrusted inputs. Use
811/// [`ProcessingSessionBuilder::new_with_security()`] in order to have the
812/// option to enable potentially-insecure features such as shell-escape.
813#[derive(Default)]
814pub struct ProcessingSessionBuilder {
815    security: SecuritySettings,
816    primary_input: PrimaryInputMode,
817    tex_input_name: Option<String>,
818    output_dest: OutputDestination,
819    filesystem_root: Option<PathBuf>,
820    format_name: Option<String>,
821    format_cache_path: Option<PathBuf>,
822    output_format: OutputFormat,
823    makefile_output_path: Option<PathBuf>,
824    hidden_input_paths: HashSet<PathBuf>,
825    pass: PassSetting,
826    reruns: Option<usize>,
827    print_stdout: bool,
828    bundle: Option<Box<dyn Bundle>>,
829    keep_intermediates: bool,
830    outputs_to_filesystem: bool,
831    // BEGIN AWARE REPORTS PATCH
832    /// Page-origin offset (x, y) in bp passed to the xdvipdfmx pass. When
833    /// `None`, the engine keeps its default 1-inch origin. Aware Reports sets
834    /// `(0.0, 0.0)` to place the TeX origin at the page corner.
835    pdf_origin_offset: Option<(f64, f64)>,
836    // END AWARE REPORTS PATCH
837    keep_logs: bool,
838    synctex: bool,
839    build_date: Option<SystemTime>,
840    unstables: UnstableOptions,
841    shell_escape_mode: ShellEscapeMode,
842    html_assets_spec_path: Option<String>,
843    html_precomputed_assets: Option<AssetSpecification>,
844    html_do_not_emit_files: bool,
845    html_do_not_emit_assets: bool,
846}
847
848impl ProcessingSessionBuilder {
849    /// Create a new builder with customized security settings.
850    pub fn new_with_security(security: SecuritySettings) -> Self {
851        ProcessingSessionBuilder {
852            security,
853            ..Default::default()
854        }
855    }
856
857    /// Sets the path to the primary input file.
858    ///
859    /// If a primary input path is not specified, we will default to reading it from stdin.
860    pub fn primary_input_path<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
861        self.primary_input = PrimaryInputMode::Path(p.as_ref().to_owned());
862        self
863    }
864
865    /// Sets the primary input to be a caller-specified buffer.
866    ///
867    /// If neither this nor a primary input path is specified, we will default
868    /// to reading the primary input from stdin.
869    pub fn primary_input_buffer(&mut self, buf: &[u8]) -> &mut Self {
870        self.primary_input = PrimaryInputMode::Buffer(buf.to_owned());
871        self
872    }
873
874    /// Sets the name of the main input file.
875    ///
876    /// This value will be used to infer the names of the output files; for example, if
877    /// `tex_input_name` is set to `"texput.tex"` then the pdf output file will be `"texput.pdf"`.
878    /// As such, this parameter is mandatory, even if the real input is coming from stdin (if it is
879    /// not provided, [`ProcessingSessionBuilder::create`] will panic).
880    pub fn tex_input_name(&mut self, s: &str) -> &mut Self {
881        self.tex_input_name = Some(s.to_owned());
882        self
883    }
884
885    /// Set the directory that serves as the root for finding files on disk.
886    ///
887    /// If unspecified, and there is a primary input file, the directory
888    /// containing that file will serve as the filesystem root. Otherwise, it is
889    /// set to the current directory.
890    pub fn filesystem_root<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
891        self.filesystem_root = Some(p.as_ref().to_owned());
892        self
893    }
894
895    /// A path to the directory where output files should be created.
896    ///
897    /// This will default to the directory containing `primary_input_path`, or
898    /// the current working directory if the primary input is coming from
899    /// stdin.
900    pub fn output_dir<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
901        self.output_dest = OutputDestination::Path(p.as_ref().to_owned());
902        self
903    }
904
905    /// Indicate that output files should not be written to disk.
906    ///
907    /// By default, output files will be written to the directory containing
908    /// `primary_input_path`, or the current working directory if the primary
909    /// input is coming from stdin.
910    pub fn do_not_write_output_files(&mut self) -> &mut Self {
911        self.output_dest = OutputDestination::Nowhere;
912        self
913    }
914
915    /// The name of the `.fmt` file used to initialize the TeX engine.
916    ///
917    /// This file does not necessarily have to exist already; it will be created
918    /// if it doesn't. This parameter is mandatory (if it is not provided,
919    /// [`ProcessingSessionBuilder::create`] will panic).
920    pub fn format_name(&mut self, p: &str) -> &mut Self {
921        self.format_name = Some(p.to_owned());
922        self
923    }
924
925    /// Sets the path to the format file cache.
926    ///
927    /// This is used to, well, cache format files, which are generated as
928    /// needed from the backing bundle. Defaults to the same directory as the
929    /// input file, or PWD if the input is a non-file (such as standard
930    /// input).
931    pub fn format_cache_path<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
932        self.format_cache_path = Some(p.as_ref().to_owned());
933        self
934    }
935
936    /// The type of output to create.
937    pub fn output_format(&mut self, f: OutputFormat) -> &mut Self {
938        self.output_format = f;
939        self
940    }
941
942    /// If set, a makefile will be written out at the given path.
943    pub fn makefile_output_path<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
944        self.makefile_output_path = Some(p.as_ref().to_owned());
945        self
946    }
947
948    /// Which kind of pass should the `ProcessingSession` run? Defaults to `PassSetting::Default`
949    /// (duh).
950    pub fn pass(&mut self, p: PassSetting) -> &mut Self {
951        self.pass = p;
952        self
953    }
954
955    /// If set, and if the pass is set to `PassSetting::Default`, the TeX engine will be re-run
956    /// *exactly* this many times.
957    ///
958    /// If `reruns` is unset, we will auto-detect how many times the TeX engine needs to be re-run.
959    pub fn reruns(&mut self, r: usize) -> &mut Self {
960        self.reruns = Some(r);
961        self
962    }
963
964    /// If set to `true`, stdout from the TeX engine will be forwarded to actual stdout. (By
965    /// default, it will be suppressed.)
966    pub fn print_stdout(&mut self, p: bool) -> &mut Self {
967        self.print_stdout = p;
968        self
969    }
970
971    /// Marks a path as hidden, meaning that the TeX engine will pretend that it doesn't exist in
972    /// the filesystem.
973    pub fn hide<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
974        self.hidden_input_paths.insert(p.as_ref().to_owned());
975        self
976    }
977
978    /// Sets the bundle, which the various engines will use for finding style files, font files,
979    /// etc.
980    pub fn bundle(&mut self, b: Box<dyn Bundle>) -> &mut Self {
981        self.bundle = Some(b);
982        self
983    }
984
985    /// If set to `true`, various intermediate files will be written out to the filesystem.
986    pub fn keep_intermediates(&mut self, k: bool) -> &mut Self {
987        self.keep_intermediates = k;
988        self
989    }
990
991    // BEGIN AWARE REPORTS PATCH
992    /// If set to `true`, output files (XDV, PDF, logs, aux files) are
993    /// streamed directly to the filesystem root as the engines write them,
994    /// instead of being buffered in memory until the session ends. This
995    /// keeps the session's memory footprint independent of the document
996    /// size. Files land in the filesystem root regardless of
997    /// `keep_intermediates`.
998    pub fn outputs_to_filesystem(&mut self, k: bool) -> &mut Self {
999        self.outputs_to_filesystem = k;
1000        self
1001    }
1002
1003    /// Set the PDF page-origin offset (x, y), in PostScript points (bp),
1004    /// applied during the xdvipdfmx pass. The engine's default is 1 inch
1005    /// (72.0, 72.0), the standard TeX origin; pass `(0.0, 0.0)` to place the
1006    /// TeX origin at the physical page corner.
1007    pub fn pdf_origin_offset(&mut self, x: f64, y: f64) -> &mut Self {
1008        self.pdf_origin_offset = Some((x, y));
1009        self
1010    }
1011    // END AWARE REPORTS PATCH
1012
1013    /// If set to `true`, '.log' and '.blg' files will be written out to the filesystem.
1014    pub fn keep_logs(&mut self, k: bool) -> &mut Self {
1015        self.keep_logs = k;
1016        self
1017    }
1018
1019    /// If set to `true`, tex files will be compiled using synctex information.
1020    pub fn synctex(&mut self, s: bool) -> &mut Self {
1021        self.synctex = s;
1022        self
1023    }
1024
1025    /// Sets the date and time of the processing session.
1026    /// See `TexEngine::build_date` for mor information.
1027    pub fn build_date(&mut self, date: SystemTime) -> &mut Self {
1028        self.build_date = Some(date);
1029        self
1030    }
1031
1032    /// Configures the date and time of the processing session from the environment:
1033    /// If `SOURCE_DATE_EPOCH` is set, it's used as the build date.
1034    /// If `force_deterministic` is set, we fall back to UNIX_EPOCH.
1035    /// Otherwise, we use the current system time.
1036    pub fn build_date_from_env(&mut self, force_deterministic: bool) -> &mut Self {
1037        let build_date_str = std::env::var("SOURCE_DATE_EPOCH").ok();
1038        let build_date = match (force_deterministic, build_date_str) {
1039            (_, Some(s)) => {
1040                let epoch = s
1041                    .parse::<u64>()
1042                    .expect("invalid SOURCE_DATE_EPOCH (not a number)");
1043
1044                SystemTime::UNIX_EPOCH
1045                    .checked_add(Duration::from_secs(epoch))
1046                    .expect("time overflow")
1047            }
1048            (true, None) => SystemTime::UNIX_EPOCH,
1049            (false, None) => SystemTime::now(),
1050        };
1051        self.build_date(build_date)
1052    }
1053
1054    /// Loads unstable options into the processing session
1055    pub fn unstables(&mut self, opts: UnstableOptions) -> &mut Self {
1056        self.unstables = opts;
1057        self
1058    }
1059
1060    /// Enable "shell escape" commands in the engines, and use the specified
1061    /// directory for shell-escape work. The caller is responsible for the
1062    /// creation and/or destruction of this directory. The default is to
1063    /// disable shell-escape unless the [`UnstableOptions`] say otherwise,
1064    /// in which case a driver-managed temporary directory will be used.
1065    pub fn shell_escape_with_work_dir<P: AsRef<Path>>(&mut self, path: P) -> &mut Self {
1066        if self.security.allow_shell_escape() {
1067            self.shell_escape_mode =
1068                ShellEscapeMode::ExternallyManagedDir(path.as_ref().to_owned());
1069        }
1070        self
1071    }
1072
1073    /// Forcibly enable shell-escape mode with a temporary directory, overriding
1074    /// any [`UnstableOptions`] settings. The default is to disable shell-escape
1075    /// unless the [`UnstableOptions`] say otherwise, in which case a
1076    /// driver-managed temporary directory will be used.
1077    pub fn shell_escape_with_temp_dir(&mut self) -> &mut Self {
1078        if self.security.allow_shell_escape() {
1079            self.shell_escape_mode = ShellEscapeMode::TempDir;
1080        }
1081        self
1082    }
1083
1084    /// Forcibly disable shell-escape mode, overriding any [`UnstableOptions`]
1085    /// settings. The default is to disable shell-escape unless the
1086    /// [`UnstableOptions`] say otherwise, in which case a driver-managed
1087    /// temporary directory will be used.
1088    pub fn shell_escape_disabled(&mut self) -> &mut Self {
1089        self.shell_escape_mode = ShellEscapeMode::Disabled;
1090        self
1091    }
1092
1093    /// When using HTML mode, emit an asset specification file instead of actual
1094    /// asset files.
1095    ///
1096    /// "Assets" are files like fonts and images that accompany the HTML output
1097    /// generated during processing. By default, these are emitted during
1098    /// processing. If this method is called, the assets will *not* be created.
1099    /// Instead, an "asset specification" file will be emitted to the given
1100    /// output path. This specification file contains the information needed to
1101    /// generate the assets upon a later invocation. Asset specification files
1102    /// can be merged, allowing the results of multiple separate TeX
1103    /// compilations to be synthesized into one HTML output tree.
1104    ///
1105    /// If the build does not use HTML mode, this setting has no effect.
1106    pub fn html_assets_spec_path<S: ToString>(&mut self, path: S) -> &mut Self {
1107        self.html_assets_spec_path = Some(path.to_string());
1108        self
1109    }
1110
1111    /// In HTML mode, use a precomputed asset specification.
1112    ///
1113    /// "Assets" are files like fonts and images that accompany the HTML output
1114    /// generated during processing. By default, the engine gathers these during
1115    /// processing and emits them at the end. After this method is used,
1116    /// however, it will generate HTML outputs assuming the information given in
1117    /// the asset specification given here. If the input calls for new assets or
1118    /// different options inconsistent with the specification, processing will
1119    /// abort with an error.
1120    ///
1121    /// The purpose of this mode is to allow for a unified set of assets to be
1122    /// created from multiple independent runs of the SPX-to-HTML stage. First,
1123    /// the different inputs should be processed independently, and their
1124    /// individual assets should saved. These should then be merged. Then the
1125    /// inputs should be reprocessed, all using the merged asset specification.
1126    /// In one — but only one — of these sessions, the assets should actually be
1127    /// emitted.
1128    pub fn html_precomputed_assets(&mut self, assets: AssetSpecification) -> &mut Self {
1129        self.html_precomputed_assets = Some(assets);
1130        self
1131    }
1132
1133    /// Set whether templated outputs should be created during HTML processing.
1134    ///
1135    /// This mode can be useful if you want to analyze what *would* be created
1136    /// during HTML processing without actually creating the files.
1137    pub fn html_emit_files(&mut self, do_emit: bool) -> &mut Self {
1138        self.html_do_not_emit_files = !do_emit;
1139        self
1140    }
1141
1142    /// Set whether supporting asset files should be created during HTML
1143    /// processing.
1144    ///
1145    /// This mode can be useful if you want to analyze what *would* be created
1146    /// during HTML processing without actually creating the files. If you call
1147    /// [`Self::html_assets_spec_path`], this setting will ignored, and no
1148    /// assets will be emitted to disk.
1149    pub fn html_emit_assets(&mut self, do_emit: bool) -> &mut Self {
1150        self.html_do_not_emit_assets = !do_emit;
1151        self
1152    }
1153
1154    /// Creates a `ProcessingSession`.
1155    pub fn create(self, status: &mut dyn StatusBackend) -> Result<ProcessingSession> {
1156        // First, work on the "bridge state", which gathers the subset of our
1157        // state that has to be held in a mutable reference while running the
1158        // C/C++ engines:
1159
1160        let mut bundle = self.bundle.expect("a bundle must be specified");
1161
1162        let mut filesystem_root = self.filesystem_root.unwrap_or_default();
1163
1164        let (pio, primary_input_path, default_output_path) = match self.primary_input {
1165            PrimaryInputMode::Path(p) => {
1166                // Set the filesystem root (that's the directory we'll search
1167                // for files in) to be the same directory as the main input
1168                // file.
1169                let parent = match p.parent() {
1170                    Some(parent) => parent.to_owned(),
1171                    None => {
1172                        return Err(errmsg!(
1173                            "can't figure out a parent directory for input path \"{}\"",
1174                            p.display()
1175                        ));
1176                    }
1177                };
1178
1179                filesystem_root.clone_from(&parent);
1180                let pio: Box<dyn IoProvider> = Box::new(FilesystemPrimaryInputIo::new(&p));
1181                (pio, Some(p), parent)
1182            }
1183
1184            PrimaryInputMode::Stdin => {
1185                // If the main input file is stdin, we don't set a filesystem
1186                // root, which means we'll default to the current working
1187                // directory.
1188                //
1189                // Note that, due to the expected need to rerun the engine
1190                // multiple times, we'll need to buffer stdin in its entirety,
1191                // so we might as well do that now.
1192                let pio = ctry!(BufferedPrimaryIo::from_stdin(); "error reading standard input");
1193                let pio: Box<dyn IoProvider> = Box::new(pio);
1194                (pio, None, "".into())
1195            }
1196
1197            PrimaryInputMode::Buffer(buf) => {
1198                // Same behavior as with stdin.
1199                let pio: Box<dyn IoProvider> = Box::new(BufferedPrimaryIo::from_buffer(buf));
1200                (pio, None, "".into())
1201            }
1202        };
1203
1204        let format_cache_path = self
1205            .format_cache_path
1206            .unwrap_or_else(|| filesystem_root.clone());
1207        let format_cache = FormatCache::new(bundle.get_digest()?, format_cache_path);
1208
1209        let genuine_stdout = if self.print_stdout {
1210            Some(GenuineStdoutIo::new())
1211        } else {
1212            None
1213        };
1214
1215        // move this out of self to get around borrow checker issues
1216        let hidden_input_paths = self.hidden_input_paths;
1217
1218        let extra_search_paths = if self.security.allow_extra_search_paths() {
1219            self.unstables
1220                .extra_search_paths
1221                .iter()
1222                .map(|p| FilesystemIo::new(p, false, false, hidden_input_paths.clone()))
1223                .collect()
1224        } else {
1225            if !self.unstables.extra_search_paths.is_empty() {
1226                tt_warning!(status, "Extra search path(s) ignored due to security");
1227            }
1228            Vec::new()
1229        };
1230
1231        let filesystem = FilesystemIo::new(&filesystem_root, false, true, hidden_input_paths);
1232
1233        let mem = MemoryIo::new(true);
1234
1235        // BEGIN AWARE REPORTS PATCH
1236        let disk_outputs = if self.outputs_to_filesystem {
1237            Some(FilesystemIo::new(
1238                &filesystem_root,
1239                true,
1240                false,
1241                HashSet::new(),
1242            ))
1243        } else {
1244            None
1245        };
1246        // END AWARE REPORTS PATCH
1247
1248        let bs = BridgeState {
1249            primary_input: pio,
1250            mem,
1251            filesystem,
1252            extra_search_paths,
1253            shell_escape_work: None,
1254            format_cache,
1255            bundle,
1256            genuine_stdout,
1257            format_primary: None,
1258            events: HashMap::new(),
1259            disk_outputs,
1260        };
1261
1262        // Now we can do the rest.
1263
1264        let output_path = match self.output_dest {
1265            OutputDestination::Default => Some(default_output_path),
1266            OutputDestination::Path(p) => Some(p),
1267            OutputDestination::Nowhere => None,
1268        };
1269
1270        let tex_input_name = self
1271            .tex_input_name
1272            .expect("tex_input_name must be specified");
1273        let mut aux_path = PathBuf::from(tex_input_name.clone());
1274        aux_path.set_extension("aux");
1275        let mut xdv_path = aux_path.clone();
1276        xdv_path.set_extension(if self.output_format == OutputFormat::Html {
1277            "spx"
1278        } else {
1279            "xdv"
1280        });
1281        let mut pdf_path = aux_path.clone();
1282        pdf_path.set_extension("pdf");
1283
1284        let shell_escape_mode = if !self.security.allow_shell_escape() {
1285            ShellEscapeMode::Disabled
1286        } else {
1287            match self.shell_escape_mode {
1288                ShellEscapeMode::Defaulted => {
1289                    if let Some(ref cwd) = self.unstables.shell_escape_cwd {
1290                        ShellEscapeMode::ExternallyManagedDir(cwd.into())
1291                    } else if self.unstables.shell_escape {
1292                        ShellEscapeMode::TempDir
1293                    } else {
1294                        ShellEscapeMode::Disabled
1295                    }
1296                }
1297
1298                other => other,
1299            }
1300        };
1301
1302        Ok(ProcessingSession {
1303            security: self.security,
1304            bs,
1305            pass: self.pass,
1306            primary_input_path,
1307            primary_input_tex_path: tex_input_name,
1308            format_name: self.format_name.unwrap(),
1309            tex_aux_path: aux_path.display().to_string(),
1310            tex_xdv_path: xdv_path.display().to_string(),
1311            tex_pdf_path: pdf_path.display().to_string(),
1312            output_format: self.output_format,
1313            makefile_output_path: self.makefile_output_path,
1314            output_path,
1315            tex_rerun_specification: self.reruns,
1316            keep_intermediates: self.keep_intermediates,
1317            keep_logs: self.keep_logs,
1318            synctex_enabled: self.synctex,
1319            build_date: self.build_date.unwrap_or(SystemTime::UNIX_EPOCH),
1320            // BEGIN AWARE REPORTS PATCH
1321            pdf_origin_offset: self.pdf_origin_offset,
1322            // END AWARE REPORTS PATCH
1323            unstables: self.unstables,
1324            shell_escape_mode,
1325            html_assets_spec_path: self.html_assets_spec_path,
1326            html_precomputed_assets: self.html_precomputed_assets,
1327            html_emit_files: !self.html_do_not_emit_files,
1328            html_emit_assets: !self.html_do_not_emit_assets,
1329        })
1330    }
1331}
1332
1333#[derive(Debug, Clone)]
1334enum RerunReason {
1335    Biber,
1336    Bibtex,
1337    FileChange(String),
1338}
1339
1340/// The ProcessingSession struct runs the whole show when we're actually
1341/// processing a file. It understands, for example, the need to re-run the TeX
1342/// engine if the `.aux` file changed.
1343pub struct ProcessingSession {
1344    // Security settings.
1345    security: SecuritySettings,
1346
1347    /// The subset of the session state that's can be mutated while the C/C++
1348    /// engines are running. Importantly, this includes the full I/O stack.
1349    bs: BridgeState,
1350
1351    /// If our primary input is an actual file on disk, this is its path.
1352    primary_input_path: Option<PathBuf>,
1353
1354    /// This is the name of the input that we tell TeX. It is the basename of
1355    /// the UTF8-ified version of `primary_input_path`; or something anodyne
1356    /// if the latter is None. (Name, "texput.tex").
1357    primary_input_tex_path: String,
1358
1359    /// This is the name of the format file to use. TeX has to open it by name
1360    /// internally, so it has to be String compatible.
1361    format_name: String,
1362
1363    /// These are the paths of the various output files as TeX knows them --
1364    /// just `primary_input_tex_path` with the extension changed.
1365    tex_aux_path: String,
1366    tex_xdv_path: String,
1367    tex_pdf_path: String,
1368
1369    /// If we're writing out Makefile rules, this is where they go. The TeX
1370    /// engine doesn't know about this path at all.
1371    makefile_output_path: Option<PathBuf>,
1372
1373    /// This is the path that the processed file will be saved at. It defaults
1374    /// to the path of `primary_input_path` or `.` if STDIN is used. If set to
1375    /// None, the output files will not be saved to disk — in which case, the
1376    /// caller should access the memory layer of the `io` field to gain access
1377    /// to the output files.
1378    output_path: Option<PathBuf>,
1379
1380    pass: PassSetting,
1381    output_format: OutputFormat,
1382    tex_rerun_specification: Option<usize>,
1383    keep_intermediates: bool,
1384    keep_logs: bool,
1385    synctex_enabled: bool,
1386
1387    /// See `TexEngine::with_date` and `XdvipdfmxEngine::with_date`.
1388    build_date: SystemTime,
1389
1390    // BEGIN AWARE REPORTS PATCH
1391    /// Page-origin offset (x, y) in bp for the xdvipdfmx pass; `None` keeps the
1392    /// engine default (1 inch). See `ProcessingSessionBuilder::pdf_origin_offset`.
1393    pdf_origin_offset: Option<(f64, f64)>,
1394    // END AWARE REPORTS PATCH
1395
1396    unstables: UnstableOptions,
1397
1398    /// How to handle shell-escape. The `Defaulted` option will never
1399    /// be used here.
1400    shell_escape_mode: ShellEscapeMode,
1401
1402    html_assets_spec_path: Option<String>,
1403    html_precomputed_assets: Option<AssetSpecification>,
1404    html_emit_files: bool,
1405    html_emit_assets: bool,
1406}
1407
1408const DEFAULT_MAX_TEX_PASSES: usize = 6;
1409const ALWAYS_INTERMEDIATE_EXTENSIONS: &[&str] = &[
1410    ".snm", ".toc", // generated by Beamer
1411];
1412
1413impl ProcessingSession {
1414    /// Assess whether we need to rerun an engine. This is the case if there
1415    /// was a file that the engine read and then rewrote, and the rewritten
1416    /// version is different than the version that it read in.
1417    fn is_rerun_needed(&self, status: &mut dyn StatusBackend) -> Option<RerunReason> {
1418        // TODO: we should probably wire up diagnostics since I expect this
1419        // stuff could get finicky and we're going to want to be able to
1420        // figure out why rerun detection is breaking.
1421
1422        for (name, info) in &self.bs.events {
1423            if info.access_pattern == AccessPattern::ReadThenWritten {
1424                let file_changed = match (&info.read_digest, &info.write_digest) {
1425                    (Some(d1), Some(d2)) => d1 != d2,
1426                    (&None, &Some(_)) => true,
1427                    (_, _) => {
1428                        // Other cases shouldn't happen.
1429                        tt_warning!(
1430                            status,
1431                            "internal consistency problem when checking if {} changed",
1432                            name
1433                        );
1434                        true
1435                    }
1436                };
1437
1438                if file_changed {
1439                    return Some(RerunReason::FileChange(name.clone()));
1440                }
1441            }
1442        }
1443
1444        None
1445    }
1446
1447    #[allow(dead_code)]
1448    fn _dump_access_info(&self, status: &mut dyn StatusBackend) {
1449        for (name, info) in &self.bs.events {
1450            if info.access_pattern != AccessPattern::Read {
1451                let r = match info.read_digest {
1452                    Some(ref d) => d.to_string(),
1453                    None => "-".into(),
1454                };
1455                let w = match info.write_digest {
1456                    Some(ref d) => d.to_string(),
1457                    None => "-".into(),
1458                };
1459                tt_note!(
1460                    status,
1461                    "ACCESS: {} {:?} {:?} {:?}",
1462                    name,
1463                    info.access_pattern,
1464                    r,
1465                    w
1466                );
1467            }
1468        }
1469    }
1470
1471    /// Runs the session, generating the desired outputs.
1472    ///
1473    /// What this does depends on which [`PassSetting`] you asked for. The most common choice is
1474    /// `PassSetting::Default`, in which case this method does the following:
1475    ///
1476    /// - if a `.fmt` file does not yet exist, generate one and cache it
1477    /// - run the TeX engine once
1478    /// - run BibTeX, if it seems to be required
1479    /// - repeat the last two steps as often as needed
1480    /// - write the output files to disk, including a Makefile if it was requested.
1481    pub fn run(&mut self, status: &mut dyn StatusBackend) -> Result<()> {
1482        // Pre-invocation setup that requires cleanup even if the processing errors out.
1483
1484        let (shell_escape_work, clean_up_shell_escape) = match self.shell_escape_mode {
1485            ShellEscapeMode::Disabled => (None, false),
1486
1487            ShellEscapeMode::ExternallyManagedDir(ref p) => (
1488                Some(FilesystemIo::new(p, false, false, HashSet::new())),
1489                false,
1490            ),
1491
1492            ShellEscapeMode::TempDir => {
1493                let tempdir = ctry!(tempfile::Builder::new().tempdir(); "can't create temporary directory for shell-escape work");
1494                (
1495                    Some(FilesystemIo::new(
1496                        &tempdir.keep(),
1497                        false,
1498                        false,
1499                        HashSet::new(),
1500                    )),
1501                    true,
1502                )
1503            }
1504
1505            ShellEscapeMode::Defaulted => unreachable!(),
1506        };
1507
1508        self.bs.shell_escape_work = shell_escape_work;
1509
1510        // Go-time!
1511        let result = self.run_inner(status);
1512
1513        // Do that cleanup.
1514
1515        if clean_up_shell_escape {
1516            let shell_escape_work = self.bs.shell_escape_work.take().unwrap();
1517            let shell_escape_err = std::fs::remove_dir_all(shell_escape_work.root());
1518
1519            if let Err(e) = shell_escape_err {
1520                tt_warning!(status, "an error occurred while cleaning up the \
1521                    shell-escape temporary directory `{}`", shell_escape_work.root().display(); e.into());
1522            }
1523        }
1524
1525        // Propagate the actual result.
1526        result
1527    }
1528
1529    /// The bulk of the `run` implementation. We need to wrap it to manage the
1530    /// lifecycle of resources like the shell-escape temporary directory, if
1531    /// needed.
1532    fn run_inner(&mut self, status: &mut dyn StatusBackend) -> Result<()> {
1533        // Do we need to generate the format file?
1534
1535        let generate_format = if self.output_format == OutputFormat::Format {
1536            false
1537        } else {
1538            match self.bs.input_open_format(&self.format_name, status) {
1539                OpenResult::Ok(_) => false,
1540                OpenResult::NotAvailable => true,
1541                OpenResult::Err(e) => {
1542                    return Err(e)
1543                        .chain_err(|| format!("could not open format file {}", self.format_name));
1544                }
1545            }
1546        };
1547
1548        if generate_format {
1549            tt_note!(status, "generating format \"{}\"", self.format_name);
1550            self.make_format_pass(status)?;
1551        }
1552
1553        // Do the meat of the work.
1554
1555        let result = match self.pass {
1556            PassSetting::Tex => match self.tex_pass(None, status) {
1557                Ok(Some(warnings)) => {
1558                    tt_warning!(status, "{}", warnings);
1559                    Ok(0)
1560                }
1561                Ok(None) => Ok(0),
1562                Err(e) => Err(e),
1563            },
1564            PassSetting::Default => self.default_pass(false, status),
1565            PassSetting::BibtexFirst => self.default_pass(true, status),
1566        };
1567
1568        if let Err(e) = result {
1569            self.write_files(None, status, true)?;
1570            return Err(e);
1571        };
1572
1573        // Write output files and the first line of our Makefile output.
1574
1575        let mut mf_dest_maybe = match self.makefile_output_path {
1576            Some(ref p) => {
1577                if self.output_path.is_none() {
1578                    tt_warning!(
1579                        status,
1580                        "requested to generate Makefile rules, but no files written to disk!"
1581                    );
1582                    None
1583                } else {
1584                    Some(File::create(p)?)
1585                }
1586            }
1587
1588            None => None,
1589        };
1590
1591        let n_skipped_intermediates = self.write_files(mf_dest_maybe.as_mut(), status, false)?;
1592
1593        if n_skipped_intermediates > 0 {
1594            status.note_highlighted(
1595                "Skipped writing ",
1596                &format!("{n_skipped_intermediates}"),
1597                " intermediate files (use --keep-intermediates to keep them)",
1598            );
1599        }
1600
1601        // Finish Makefile rules, maybe.
1602
1603        if let Some(ref mut mf_dest) = mf_dest_maybe {
1604            ctry!(write!(mf_dest, ": "); "couldn't write to Makefile-rules file");
1605
1606            if let Some(ref pip) = self.primary_input_path {
1607                let opip = ctry!(pip.to_str(); "Makefile-rules file path must be Unicode-able");
1608                ctry!(mf_dest.write_all(opip.as_bytes()); "couldn't write to Makefile-rules file");
1609            }
1610
1611            // The check above ensures that this is never None.
1612            let root = self.output_path.as_ref().unwrap();
1613
1614            for (name, info) in &self.bs.events {
1615                if info.input_origin != InputOrigin::Filesystem {
1616                    continue;
1617                }
1618
1619                if info.got_written_to_disk {
1620                    // If the file originally came from the filesystem, and it
1621                    // was written as well as read, and we actually wrote it
1622                    // to disk, there's a circular dependency that's
1623                    // inappropriate to express in a Makefile. If it was
1624                    // "written" by the engine but we didn't actually write
1625                    // those modifications to disk, we're OK. If there's a
1626                    // two-stage compilation involving the .aux file, the
1627                    // latter case is what arises unless --keep-intermediates
1628                    // is specified.
1629                    tt_warning!(status, "omitting circular Makefile dependency for {}", name);
1630                    continue;
1631                }
1632
1633                ctry!(write!(mf_dest, " \\\n  {}", root.join(name).display()); "couldn't write to Makefile-rules file");
1634            }
1635
1636            ctry!(writeln!(mf_dest, ""); "couldn't write to Makefile-rules file");
1637        }
1638
1639        // All done.
1640
1641        Ok(())
1642    }
1643
1644    fn write_files(
1645        &mut self,
1646        mut mf_dest_maybe: Option<&mut File>,
1647        status: &mut dyn StatusBackend,
1648        only_logs: bool,
1649    ) -> Result<u32> {
1650        let root = match self.output_path {
1651            Some(ref p) => p,
1652
1653            None => {
1654                // We were told not to write anything!
1655                return Ok(0);
1656            }
1657        };
1658
1659        let mut n_skipped_intermediates = 0;
1660
1661        for (name, file) in &*self.bs.mem.files.borrow() {
1662            if name == self.bs.mem.stdout_key() {
1663                continue;
1664            }
1665
1666            let sname = name;
1667            let summ = self.bs.events.get_mut(name).unwrap();
1668
1669            if !only_logs && (self.output_format == OutputFormat::Aux) {
1670                // In this mode we're only writing the .aux file. I initially
1671                // wanted to be clever-ish and output all auxiliary-type
1672                // files, but doing so ended up causing non-obvious problems
1673                // for my use case, which involves using Ninja to manage
1674                // dependencies.
1675                if !sname.ends_with(".aux") {
1676                    continue;
1677                }
1678            } else if !self.keep_intermediates
1679                && (summ.access_pattern != AccessPattern::Written
1680                    || ALWAYS_INTERMEDIATE_EXTENSIONS
1681                        .iter()
1682                        .any(|ext| sname.ends_with(ext)))
1683            {
1684                n_skipped_intermediates += 1;
1685                continue;
1686            }
1687
1688            let is_logfile = sname.ends_with(".log") || sname.ends_with(".blg");
1689
1690            if is_logfile && !self.keep_logs {
1691                continue;
1692            }
1693
1694            if !is_logfile && only_logs {
1695                continue;
1696            }
1697
1698            if file.data.is_empty() {
1699                status.note_highlighted(
1700                    "Not writing ",
1701                    &format!("`{sname}`"),
1702                    ": it would be empty.",
1703                );
1704                continue;
1705            }
1706
1707            let real_path = root.join(name);
1708            let byte_len = Byte::from_u128(file.data.len() as u128).unwrap();
1709            status.note_highlighted(
1710                "Writing ",
1711                &format!("`{}`", real_path.display()),
1712                &format!(" ({})", byte_len.get_appropriate_unit(UnitType::Binary)),
1713            );
1714
1715            if let Some(parent) = real_path.parent() {
1716                std::fs::create_dir_all(parent)?;
1717            }
1718
1719            let mut f = File::create(&real_path)?;
1720            f.write_all(&file.data)?;
1721            summ.got_written_to_disk = true;
1722
1723            if let Some(ref mut mf_dest) = mf_dest_maybe {
1724                // Maybe it'd be better to have this just be a warning? But if
1725                // the program is supposed to write the file, you don't want
1726                // it exiting with error code zero if it couldn't do that
1727                // successfully.
1728                //
1729                // Not quite sure why, but I can't pull out the target path
1730                // here. I think 'self' is borrow inside the loop?
1731                ctry!(write!(mf_dest, "{} ", real_path.display()); "couldn't write to Makefile-rules file");
1732            }
1733        }
1734
1735        Ok(n_skipped_intermediates)
1736    }
1737
1738    /// The "default" pass really runs a bunch of sub-passes. It is a "Do What
1739    /// I Mean" operation.
1740    fn default_pass(&mut self, bibtex_first: bool, status: &mut dyn StatusBackend) -> Result<i32> {
1741        // If `bibtex_first` is true, we start by running bibtex, and run
1742        // proceed with the standard rerun logic. Otherwise, we run TeX,
1743        // auto-detect whether we need to run bibtex, possibly run it, and
1744        // then go ahead.
1745
1746        let mut warnings = None;
1747        let mut rerun_result = if bibtex_first {
1748            self.bibtex_pass(status)?;
1749            Some(RerunReason::Bibtex)
1750        } else {
1751            warnings = self.tex_pass(None, status)?;
1752            let maybe_biber = self.check_biber_requirement(status)?;
1753
1754            if let Some(biber) = maybe_biber {
1755                self.bs.external_tool_pass(&biber, status)?;
1756                Some(RerunReason::Biber)
1757            } else if self.is_bibtex_needed() {
1758                self.bibtex_pass(status)?;
1759                Some(RerunReason::Bibtex)
1760            } else {
1761                self.is_rerun_needed(status)
1762            }
1763        };
1764
1765        // Now we enter the main rerun loop.
1766
1767        let (pass_count, reruns_fixed) = match self.tex_rerun_specification {
1768            Some(n) => (n, true),
1769            None => (DEFAULT_MAX_TEX_PASSES, false),
1770        };
1771
1772        for i in 0..pass_count {
1773            let rerun_explanation = if reruns_fixed {
1774                "I was told to".to_owned()
1775            } else {
1776                match rerun_result {
1777                    Some(RerunReason::Biber) => "biber was run".to_owned(),
1778                    Some(RerunReason::Bibtex) => "bibtex was run".to_owned(),
1779                    Some(RerunReason::FileChange(ref s)) => format!("\"{s}\" changed"),
1780                    None => break,
1781                }
1782            };
1783
1784            // We're restarting the engine afresh, so clear the read inputs.
1785            // We do *not* clear the entire HashMap since we want to remember,
1786            // e.g., that bibtex wrote out the .bbl file, since that way we
1787            // can later know that it's OK to delete. I am not super confident
1788            // that the access_pattern data can just be left as-is when we do
1789            // this, but, uh, so far it seems to work.
1790            for summ in self.bs.events.values_mut() {
1791                summ.read_digest = None;
1792            }
1793
1794            warnings = self.tex_pass(Some(&rerun_explanation), status)?;
1795
1796            if !reruns_fixed {
1797                rerun_result = self.is_rerun_needed(status);
1798
1799                if rerun_result.is_some() && i == DEFAULT_MAX_TEX_PASSES - 1 {
1800                    tt_warning!(
1801                        status,
1802                        "TeX rerun seems needed, but stopping at {} passes",
1803                        DEFAULT_MAX_TEX_PASSES
1804                    );
1805                    break;
1806                }
1807            }
1808        }
1809
1810        // The last tex pass generated warnings.
1811        if let Some(warnings) = warnings {
1812            tt_warning!(status, "{}", warnings);
1813        }
1814
1815        // And finally, xdvipdfmx or spx2html. Maybe.
1816
1817        if let OutputFormat::Pdf = self.output_format {
1818            self.xdvipdfmx_pass(status)?;
1819        } else if let OutputFormat::Html = self.output_format {
1820            self.spx2html_pass(status)?;
1821        }
1822
1823        Ok(0)
1824    }
1825
1826    fn is_bibtex_needed(&self) -> bool {
1827        const BIBDATA: &[u8] = b"\\bibdata";
1828
1829        self.bs
1830            .mem
1831            .files
1832            .borrow()
1833            .get(&self.tex_aux_path)
1834            .map(|file| {
1835                // We used to use aho-corasick crate here, but it was removed to reduce the code
1836                // size.
1837                file.data.windows(BIBDATA.len()).any(|s| s == BIBDATA)
1838            })
1839            .unwrap_or(false)
1840    }
1841
1842    /// Use the TeX engine to generate a format file.
1843    #[allow(clippy::manual_split_once)] // requires Rust 1.52 (note that we don't actually define our MSRV)
1844    fn make_format_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
1845        // PathBuf.file_stem() doesn't do what we want since it only strips
1846        // one extension. As of 1.17, the compiler needs a type annotation for
1847        // some reason, which is why we use the `r` variable.
1848        let r: Result<&str> = self.format_name.split('.').next().ok_or_else(|| {
1849            ErrorKind::Msg(format!(
1850                "incomprehensible format file name \"{}\"",
1851                self.format_name
1852            ))
1853            .into()
1854        });
1855        let stem = r?;
1856
1857        let result = {
1858            self.bs
1859                .enter_format_mode(&format!("tectonic-format-{stem}.tex"));
1860            let mut launcher =
1861                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
1862            let r = TexEngine::default()
1863                .halt_on_error_mode(true)
1864                .initex_mode(true)
1865                .shell_escape(self.shell_escape_mode != ShellEscapeMode::Disabled)
1866                .process(&mut launcher, "UNUSED.fmt", "texput");
1867            self.bs.leave_format_mode();
1868            r
1869        };
1870
1871        match result {
1872            Ok(TexOutcome::Spotless) => {}
1873            Ok(TexOutcome::Warnings) => {
1874                tt_warning!(status, "warnings were issued by the TeX engine; use --print and/or --keep-logs for details.");
1875            }
1876            Ok(TexOutcome::Errors) => {
1877                tt_error!(status, "errors were issued by the TeX engine; use --print and/or --keep-logs for details.");
1878                return Err(ErrorKind::Msg("unhandled TeX engine error".to_owned()).into());
1879            }
1880            Err(e) => {
1881                return Err(e.into());
1882            }
1883        }
1884
1885        // Now we can write the format file to its special location. In
1886        // principle we could stream the format file directly to the staging
1887        // area as we ran the TeX engine, but we don't bother.
1888
1889        for (name, file) in &*self.bs.mem.files.borrow() {
1890            if name == self.bs.mem.stdout_key() {
1891                continue;
1892            }
1893
1894            let sname = name;
1895
1896            if !sname.ends_with(".fmt") {
1897                continue;
1898            }
1899
1900            // Note that we intentionally pass 'stem', not 'name'.
1901            ctry!(self.bs.format_cache.write_format(stem, &file.data, status); "cannot write format file {}", sname);
1902        }
1903
1904        // All done. Clear the memory layer since this was a special preparatory step.
1905        self.bs.mem.files.borrow_mut().clear();
1906
1907        Ok(0)
1908    }
1909
1910    /// Run one pass of the TeX engine.
1911    fn tex_pass(
1912        &mut self,
1913        rerun_explanation: Option<&str>,
1914        status: &mut dyn StatusBackend,
1915    ) -> Result<Option<&'static str>> {
1916        let result = {
1917            if let Some(s) = rerun_explanation {
1918                status.note_highlighted("Rerunning ", "TeX", &format!(" because {s} ..."));
1919            } else {
1920                status.note_highlighted("Running ", "TeX", " ...");
1921            }
1922
1923            let mut launcher =
1924                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
1925
1926            // In deterministic mode, we stub a few aspects of the environment.
1927            // They default to a "realistic" view, but we override them with static values:
1928            if self.unstables.deterministic_mode {
1929                launcher.with_expose_absolute_paths(false);
1930                launcher.with_mtime_override(Some(
1931                    self.build_date
1932                        .duration_since(SystemTime::UNIX_EPOCH)
1933                        .map(|x| x.as_secs() as i64)
1934                        .expect("invalid build date in deterministic mode"),
1935                ));
1936            }
1937
1938            TexEngine::default()
1939                .halt_on_error_mode(!self.unstables.continue_on_errors)
1940                .initex_mode(self.output_format == OutputFormat::Format)
1941                .synctex(self.synctex_enabled)
1942                .semantic_pagination(self.output_format == OutputFormat::Html)
1943                .shell_escape(self.shell_escape_mode != ShellEscapeMode::Disabled)
1944                .build_date(self.build_date)
1945                .process(
1946                    &mut launcher,
1947                    &self.format_name,
1948                    &self.primary_input_tex_path,
1949                )
1950        };
1951
1952        let warnings = match result {
1953            Ok(TexOutcome::Spotless) => None,
1954            Ok(TexOutcome::Warnings) =>
1955                    Some("warnings were issued by the TeX engine; use --print and/or --keep-logs for details."),
1956            Ok(TexOutcome::Errors) =>
1957                    Some("errors were issued by the TeX engine, but were ignored; \
1958                         use --print and/or --keep-logs for details."),
1959            Err(e) =>
1960                return Err(e.into()),
1961        };
1962
1963        if !self.bs.mem.files.borrow().contains_key(&self.tex_xdv_path) {
1964            // TeX did not produce the expected output file
1965            tt_warning!(
1966                status,
1967                "did not produce \"{}\"; this may mean that your document is empty",
1968                self.tex_xdv_path
1969            )
1970        }
1971
1972        Ok(warnings)
1973    }
1974
1975    // Run Bibtex process for one .aux file.
1976    fn bibtex_pass_for_one_aux_file(
1977        &mut self,
1978        status: &mut dyn StatusBackend,
1979        aux_file: &String,
1980    ) -> Result<i32> {
1981        let result = {
1982            status.note_highlighted("Running ", "BibTeX", &format!(" on {aux_file} ..."));
1983            let mut launcher =
1984                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
1985            let mut engine = BibtexEngine::new();
1986            engine.process(&mut launcher, aux_file, &self.unstables)
1987        };
1988
1989        match result {
1990            Ok(TexOutcome::Spotless) => {}
1991            Ok(TexOutcome::Warnings) => {
1992                tt_note!(
1993                    status,
1994                    "warnings were issued by BibTeX; use --print and/or --keep-logs for details."
1995                );
1996            }
1997            Ok(TexOutcome::Errors) => {
1998                tt_warning!(
1999                    status,
2000                    "errors were issued by BibTeX, but were ignored; \
2001                     use --print and/or --keep-logs for details."
2002                );
2003            }
2004            Err(e) => {
2005                return Err(e.chain_err(|| ErrorKind::EngineError("BibTeX")));
2006            }
2007        }
2008
2009        Ok(0)
2010    }
2011
2012    fn bibtex_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
2013        let mut aux_files = vec![self.tex_aux_path.clone()];
2014
2015        // find other .aux files generated by tex_pass
2016        for f in self.bs.get_intermediate_file_names() {
2017            if f.ends_with(".aux") && f != self.tex_aux_path {
2018                aux_files.push(f);
2019            }
2020        }
2021
2022        for f in aux_files {
2023            let _r = self.bibtex_pass_for_one_aux_file(status, &f)?;
2024        }
2025
2026        Ok(0)
2027    }
2028
2029    fn xdvipdfmx_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
2030        {
2031            status.note_highlighted("Running ", "xdvipdfmx", " ...");
2032
2033            let mut launcher =
2034                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
2035            let mut engine = XdvipdfmxEngine::default();
2036
2037            engine.build_date(self.build_date);
2038
2039            if let Some(ref ps) = self.unstables.paper_size {
2040                engine.paper_spec(ps.clone());
2041            }
2042
2043            // BEGIN AWARE REPORTS PATCH
2044            if let Some((x, y)) = self.pdf_origin_offset {
2045                engine.origin_offset(x, y);
2046            }
2047            // END AWARE REPORTS PATCH
2048
2049            engine.process(&mut launcher, &self.tex_xdv_path, &self.tex_pdf_path)?;
2050        }
2051
2052        self.bs.mem.files.borrow_mut().remove(&self.tex_xdv_path);
2053        Ok(0)
2054    }
2055
2056    fn spx2html_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
2057        {
2058            let mut engine = Spx2HtmlEngine::default();
2059
2060            match (self.html_emit_files, self.output_path.as_ref()) {
2061                (true, Some(p)) => engine.output_base(p),
2062                (false, _) => engine.do_not_emit_files(),
2063                (true, None) => return Err(errmsg!("HTML output must be saved directly to disk")),
2064            };
2065
2066            if let Some(p) = self.html_assets_spec_path.as_ref() {
2067                engine.assets_spec_path(p);
2068            } else if !self.html_emit_assets {
2069                engine.do_not_emit_assets();
2070            }
2071
2072            if let Some(a) = self.html_precomputed_assets.as_ref() {
2073                engine.precomputed_assets(a.clone());
2074            }
2075
2076            status.note_highlighted("Running ", "spx2html", " ...");
2077            engine.process_to_filesystem(&mut self.bs, status, &self.tex_xdv_path)?;
2078        }
2079
2080        self.bs.mem.files.borrow_mut().remove(&self.tex_xdv_path);
2081        Ok(0)
2082    }
2083
2084    /// Get what was printed to standard output, if anything.
2085    pub fn get_stdout_content(&self) -> Vec<u8> {
2086        self.bs
2087            .mem
2088            .files
2089            .borrow()
2090            .get(self.bs.mem.stdout_key())
2091            .map(|mfi| mfi.data.clone())
2092            .unwrap_or_default()
2093    }
2094
2095    /// Consume this session and return the current set of files in memory.
2096    ///
2097    /// This convenience function tries to help with the annoyances of getting
2098    /// access to the in-memory file data after the engine has been run.
2099    pub fn into_file_data(self) -> MemoryFileCollection {
2100        Rc::try_unwrap(self.bs.mem.files)
2101            .expect("multiple strong refs to MemoryIo files")
2102            .into_inner()
2103    }
2104
2105    /// See if we need to run `biber`, and parse the `.run.xml` file from the
2106    /// `loqreq` package to figure out what files `biber` needs. This
2107    /// functionality should probably become more generic, but I don't have a
2108    /// great sense as to how widely-used `logreq` is.
2109    fn check_biber_requirement(
2110        &self,
2111        status: &mut dyn StatusBackend,
2112    ) -> Result<Option<ExternalToolPass>> {
2113        // Is there a `.run.xml` file?
2114
2115        let mut run_xml_path = PathBuf::from(&self.primary_input_tex_path);
2116        run_xml_path.set_extension("run.xml");
2117        let run_xml_path = run_xml_path.display().to_string();
2118
2119        let mem_files = &*self.bs.mem.files.borrow();
2120        let run_xml_entry = match mem_files.get(&run_xml_path) {
2121            Some(e) => e,
2122            None => return Ok(None),
2123        };
2124
2125        // Yes, there is. Set up to potentially run biber. For testing support,
2126        // we let the rig specify a custom executable to use, which lets us
2127        // exercise different pieces of the external-tool behavior.
2128
2129        let s = (
2130            crate::config::is_config_test_mode_activated(),
2131            std::env::var("TECTONIC_TEST_FAKE_BIBER"),
2132        );
2133
2134        let mut argv = match s {
2135            (true, Ok(text)) if !text.trim().is_empty() => {
2136                text.split_whitespace().map(|x| x.to_owned()).collect()
2137            }
2138            // when `TECTONIC_TEST_FAKE_BIBER` is empty, proceed to discover
2139            // the biber binary as follows.
2140            _ => vec!["biber".to_owned()],
2141        };
2142
2143        // Moreover, we allow an override of the biber executable, to cope with
2144        // possible version mismatch of the bundled biblatex package, as filed
2145        // in issue #893. Since PR #1103, the `tectonic-biber` override can
2146        // also be invoked with `tectonic -X biber`.
2147        let find_by = |binary_name: &str| -> Option<String> {
2148            if let Ok(pathbuf) = which(binary_name) {
2149                if let Some(biber_path) = pathbuf.to_str() {
2150                    return Some(biber_path.to_owned());
2151                }
2152            }
2153            None
2154        };
2155
2156        let mut use_tectonic_biber_override = false;
2157        for binary_name in ["./tectonic-biber", "tectonic-biber"] {
2158            if let Some(biber_path) = find_by(binary_name) {
2159                argv = vec![biber_path];
2160                use_tectonic_biber_override = true;
2161                break;
2162            }
2163        }
2164
2165        let mut extra_requires = HashSet::new();
2166
2167        // Do a sketchy XML parse to see if there's info about a biber
2168        // invocation.
2169
2170        #[derive(Clone, Copy, Debug, Eq, PartialEq)]
2171        enum State {
2172            /// Searching for the biber section
2173            Searching,
2174
2175            /// In a <binary> element. Will its value be "biber"??!?
2176            InBinaryName,
2177
2178            /// In the <cmdline> part of the biber section.
2179            InBiberCmdline,
2180
2181            /// About to read an argument to the biber command.
2182            InBiberArgument,
2183
2184            /// Reading through the post-cmdline part of the biber section.
2185            InBiberRemainder,
2186
2187            /// In a "requirement" section like <input> or <requires> that contains
2188            /// filenames we should provide
2189            InBiberRequirementSection,
2190
2191            /// In a <file> requirement
2192            InBiberFileRequirement,
2193        }
2194
2195        let curs = Cursor::new(&run_xml_entry.data[..]);
2196        let mut reader = NsReader::from_reader(curs);
2197        let mut buf = Vec::new();
2198        let mut state = State::Searching;
2199
2200        loop {
2201            let event = ctry!(
2202                reader.read_event_into(&mut buf);
2203                "error parsing run.xml file"
2204            );
2205
2206            if let Event::Eof = event {
2207                break;
2208            }
2209
2210            match (state, event) {
2211                (State::Searching, Event::Start(ref e)) => {
2212                    let name = reader
2213                        .decoder()
2214                        .decode(e.local_name().into_inner())
2215                        .map_err(quick_xml::Error::from)?;
2216
2217                    if name == "binary" {
2218                        state = State::InBinaryName;
2219                    }
2220                }
2221
2222                (State::InBinaryName, Event::Text(ref e)) => {
2223                    let text = e.unescape()?;
2224
2225                    state = if &text == "biber" {
2226                        State::InBiberCmdline
2227                    } else {
2228                        State::Searching
2229                    };
2230                }
2231
2232                (State::InBinaryName, _) => {
2233                    state = State::Searching;
2234                }
2235
2236                (State::InBiberCmdline, Event::Start(ref e)) => {
2237                    let name = reader
2238                        .decoder()
2239                        .decode(e.local_name().into_inner())
2240                        .map_err(quick_xml::Error::from)?;
2241
2242                    // Note that the "infile" might be `foo` without the `.bcf`
2243                    // extension, so we can't use it for file-finding.
2244                    state = match &*name {
2245                        "infile" | "outfile" | "option" => State::InBiberArgument,
2246                        _ => State::InBiberRemainder,
2247                    }
2248                }
2249
2250                (State::InBiberCmdline, Event::End(ref e)) => {
2251                    let name = reader
2252                        .decoder()
2253                        .decode(e.local_name().into_inner())
2254                        .map_err(quick_xml::Error::from)?;
2255
2256                    if name == "cmdline" {
2257                        state = State::InBiberRemainder;
2258                    }
2259                }
2260
2261                (State::InBiberArgument, Event::Text(ref e)) => {
2262                    argv.push(e.unescape()?.to_string());
2263                    state = State::InBiberCmdline;
2264                }
2265
2266                (State::InBiberRemainder, Event::Start(ref e)) => {
2267                    let name = reader
2268                        .decoder()
2269                        .decode(e.local_name().into_inner())
2270                        .map_err(quick_xml::Error::from)?;
2271
2272                    state = match &*name {
2273                        "input" | "requires" => State::InBiberRequirementSection,
2274                        _ => State::InBiberRemainder,
2275                    }
2276                }
2277
2278                (State::InBiberRemainder, Event::End(ref e)) => {
2279                    let name = reader
2280                        .decoder()
2281                        .decode(e.local_name().into_inner())
2282                        .map_err(quick_xml::Error::from)?;
2283
2284                    if name == "external" {
2285                        break;
2286                    }
2287                }
2288
2289                (State::InBiberRequirementSection, Event::Start(ref e)) => {
2290                    let name = reader
2291                        .decoder()
2292                        .decode(e.local_name().into_inner())
2293                        .map_err(quick_xml::Error::from)?;
2294
2295                    state = match &*name {
2296                        "file" => State::InBiberFileRequirement,
2297                        _ => State::InBiberRemainder,
2298                    }
2299                }
2300
2301                (State::InBiberRequirementSection, Event::End(ref e)) => {
2302                    let name = reader
2303                        .decoder()
2304                        .decode(e.local_name().into_inner())
2305                        .map_err(quick_xml::Error::from)?;
2306
2307                    if name == "input" || name == "requires" {
2308                        state = State::InBiberRemainder;
2309                    }
2310                }
2311
2312                (State::InBiberFileRequirement, Event::Text(ref e)) => {
2313                    extra_requires.insert(e.unescape()?.to_string());
2314                    state = State::InBiberRequirementSection;
2315                }
2316
2317                (State::InBiberFileRequirement, _) => {
2318                    state = State::InBiberRequirementSection;
2319                }
2320
2321                _ => {}
2322            }
2323        }
2324
2325        // All done!
2326
2327        Ok(if state == State::Searching {
2328            // No biber invocation, in the end.
2329            None
2330        } else {
2331            if use_tectonic_biber_override {
2332                tt_note!(status, "using `tectonic-biber`, found at {}", argv[0]);
2333            }
2334            Some(ExternalToolPass {
2335                argv,
2336                extra_requires,
2337            })
2338        })
2339    }
2340}