Skip to main content

tectonic/
driver.rs

1// Copyright 2018-2022 the Tectonic Project
2// Licensed under the MIT License.
3
4//! The high-level Tectonic document processing interface.
5//!
6//! The main struct in this module is [`ProcessingSession`], which knows how to
7//! run (and re-run if necessary) the various engines in the right order. Such a
8//! session can be created with a [`ProcessingSessionBuilder`], which you might
9//! obtain from a [`tectonic_docmodel::document::Document`] using the
10//! [`crate::docmodel::DocumentExt::setup_session`] extension method, if you’re
11//! using the Tectonic document model. You can set one up manually if not.
12//!
13//! For an example of how to use this module, see `src/bin/tectonic/main.rs`,
14//! which contains tectonic's main CLI program.
15
16use byte_unit::{Byte, UnitType};
17use quick_xml::{events::Event, NsReader};
18use std::{
19    collections::{HashMap, HashSet},
20    fs::File,
21    io::{Cursor, Read, Write},
22    path::{Path, PathBuf},
23    process::Command,
24    rc::Rc,
25    result::Result as StdResult,
26    str::FromStr,
27    time::{Duration, SystemTime},
28};
29use tectonic_bridge_core::{CoreBridgeLauncher, DriverHooks, SecuritySettings, SystemRequestError};
30use tectonic_bundles::Bundle;
31use tectonic_engine_spx2html::AssetSpecification;
32use tectonic_io_base::{
33    digest::DigestData,
34    filesystem::{FilesystemIo, FilesystemPrimaryInputIo},
35    stdstreams::{BufferedPrimaryIo, GenuineStdoutIo},
36    InputHandle, IoProvider, OpenResult, OutputHandle,
37};
38use which::which;
39
40use crate::{
41    ctry, errmsg,
42    errors::{ChainErrCompatExt, ErrorKind, Result},
43    io::{
44        format_cache::FormatCache,
45        memory::{MemoryFileCollection, MemoryIo},
46        InputOrigin,
47    },
48    status::StatusBackend,
49    tt_error, tt_note, tt_warning,
50    unstable_opts::UnstableOptions,
51    BibtexEngine, Spx2HtmlEngine, TexEngine, TexOutcome, XdvipdfmxEngine,
52};
53
54/// Different patterns with which files may have been accessed by the
55/// underlying engines. Once a file is marked as ReadThenWritten or
56/// WrittenThenRead, its pattern does not evolve further.
57#[derive(Clone, Copy, Debug, Eq, PartialEq)]
58enum AccessPattern {
59    /// This file is only ever read.
60    Read,
61
62    /// This file is only ever written. This suggests that it is
63    /// a final output of the processing session.
64    Written,
65
66    /// This file is read, then written. We call this a "circular" access
67    /// pattern. Multiple passes of an engine will result in outputs that
68    /// change if this file's contents change, or if the file did not exist at
69    /// the time of the first pass.
70    ReadThenWritten,
71
72    /// This file is written, then read. We call this a "temporary" access
73    /// pattern. This file is likely a temporary buffer that is not of
74    /// interest to the user.
75    WrittenThenRead,
76}
77
78/// A summary of the I/O that happened on a file. We record its access
79/// pattern; where it came from, if it was used as an input; the cryptographic
80/// digest of the file when it was last read; and the cryptographic digest of
81/// the file as it was last written.
82#[derive(Clone, Debug, Eq, PartialEq)]
83struct FileSummary {
84    access_pattern: AccessPattern,
85
86    /// If this file was read, where did it come from?
87    pub input_origin: InputOrigin,
88
89    /// If this file was read, this is the digest of its contents at the time it was *first* read.
90    /// The "first" is significant for files that were read and then written (for example, `.aux`
91    /// files).
92    ///
93    /// There's some chance that this will be `None` even if the file was read. Tectonic makes an
94    /// effort to compute the digest as the data is being read from the file, but this can fail if
95    /// tex decides to seek in the file as it is being written.
96    pub read_digest: Option<DigestData>,
97
98    /// If this file was written, this is the digest of its contents at the time it was last
99    /// written.
100    pub write_digest: Option<DigestData>,
101
102    got_written_to_disk: bool,
103}
104
105impl FileSummary {
106    fn new(access_pattern: AccessPattern, input_origin: InputOrigin) -> FileSummary {
107        FileSummary {
108            access_pattern,
109            input_origin,
110            read_digest: None,
111            write_digest: None,
112            got_written_to_disk: false,
113        }
114    }
115}
116
117/// The different types of output files that tectonic knows how to produce.
118#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
119pub enum OutputFormat {
120    /// A '.aux' file.
121    Aux,
122    /// A '.html' file.
123    Html,
124    /// An extended DVI file.
125    Xdv,
126    /// A '.pdf' file.
127    #[default]
128    Pdf,
129    /// A '.fmt' file, for initializing the TeX engine.
130    Format,
131}
132
133impl FromStr for OutputFormat {
134    type Err = &'static str;
135
136    fn from_str(a_str: &str) -> StdResult<Self, Self::Err> {
137        match a_str {
138            "aux" => Ok(OutputFormat::Aux),
139            "html" => Ok(OutputFormat::Html),
140            "xdv" => Ok(OutputFormat::Xdv),
141            "pdf" => Ok(OutputFormat::Pdf),
142            "fmt" => Ok(OutputFormat::Format),
143            _ => Err("unsupported or unknown format"),
144        }
145    }
146}
147
148/// The different types of "passes" that [`ProcessingSession`] knows how to run. See
149/// [`ProcessingSession::run`] for more details.
150#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
151pub enum PassSetting {
152    /// The default pass, which repeatedly runs TeX and BibTeX until it doesn't need to any more.
153    #[default]
154    Default,
155    /// Just run the TeX engine once.
156    Tex,
157    /// Like the default pass, but runs BibTeX once first, before doing anything else.
158    BibtexFirst,
159}
160
161impl FromStr for PassSetting {
162    type Err = &'static str;
163
164    fn from_str(a_str: &str) -> StdResult<Self, Self::Err> {
165        match a_str {
166            "default" => Ok(PassSetting::Default),
167            "bibtex_first" => Ok(PassSetting::BibtexFirst),
168            "tex" => Ok(PassSetting::Tex),
169            _ => Err("unsupported or unknown pass setting"),
170        }
171    }
172}
173
174/// Different places from which the "primary input" might originate.
175#[derive(Clone, Debug, Default, Eq, PartialEq)]
176enum PrimaryInputMode {
177    /// This process's standard input.
178    #[default]
179    Stdin,
180
181    /// A path on the filesystem.
182    Path(PathBuf),
183
184    /// An in-memory buffer.
185    Buffer(Vec<u8>),
186}
187
188/// Different places where the output files might land.
189#[derive(Clone, Debug, Default, Eq, PartialEq)]
190enum OutputDestination {
191    /// The "sensible" default. Files will land in the same directory as the
192    /// input file, or the current working directory if the input is something
193    /// without a path (such as standard input).
194    #[default]
195    Default,
196
197    /// Files should land in this particular directory.
198    Path(PathBuf),
199
200    /// Files will not be written to disk. The code running the engine should
201    /// examine the memory layer of the I/O stack to obtain the output files.
202    Nowhere,
203}
204
205/// The subset of the driver state that is captured when running a C/C++ engine.
206///
207/// The main purpose of this type is to implement the [`DriverHooks`] trait,
208/// which is defined by the `tectonic_core_bridge` crate and defines that
209/// interface that the C/C++ processing engines can use to access the outside
210/// world. While these engines are running, they hold a mutable reference to
211/// these data, so it is helpful to separate them out into a sub-structure of
212/// the larger [`ProcessingSession`] type.
213///
214/// Due to the needs of the C/C++ engines, this means that [`BridgeState`] must
215/// hold the fully-prepared I/O stack information as well as the "event"
216/// information that helps the driver implement the rerun logic.
217struct BridgeState {
218    /// I/O for the primary input source. This is boxed since it can come
219    /// from different sources: maybe a file, maybe an in-memory buffer, etc.
220    primary_input: Box<dyn IoProvider>,
221
222    /// I/O for the main backing bundle. This is boxed since there are several
223    /// different bundle implementations that might be used at runtime.
224    bundle: Box<dyn Bundle>,
225
226    /// Memory buffering for files written during processing.
227    mem: MemoryIo,
228
229    /// The main filesystem backing for input files in the project.
230    filesystem: FilesystemIo,
231
232    /// Extra paths we search through for files.
233    extra_search_paths: Vec<FilesystemIo>,
234
235    /// Additional filesystem backing used if "shell escape" functionality is
236    /// activated. If None, we take that to mean that shell-escape is
237    /// disallowed. We have to use a persistent filesystem directory for this
238    /// since some packages perform a whole series of shell-escape operations
239    /// that assume continuity from one to the next.
240    shell_escape_work: Option<FilesystemIo>,
241
242    /// I/O for saving any generated format files.
243    format_cache: FormatCache,
244
245    /// Possible redirection of "standard output" writes to actual standard
246    /// output.
247    genuine_stdout: Option<GenuineStdoutIo>,
248
249    // BEGIN AWARE REPORTS PATCH
250    /// When set, output files stream directly to the filesystem instead of
251    /// buffering in `mem`, giving an O(1) memory footprint independent of
252    /// document size (the XDV and PDF of a large report otherwise sit in
253    /// RAM in their entirety). Tried before all other providers for output
254    /// opens; read-back of written files is served by `filesystem`, which
255    /// shares the same root.
256    disk_outputs: Option<FilesystemIo>,
257    // END AWARE REPORTS PATCH
258
259    /// A possible alternative "primary input" when generating format files. If
260    /// Some(), we're in format-file generation mode; in most cases this is
261    /// None.
262    format_primary: Option<BufferedPrimaryIo>,
263
264    /// The I/O events that occurred while processing.
265    events: HashMap<String, FileSummary>,
266}
267
268impl BridgeState {
269    /// Tell the IoProvider implementation of the bridge state to enter "format
270    /// mode", in which the "primary input" is fixed, based on the requested
271    /// format file name, and filesystem I/O is bypassed.
272    fn enter_format_mode(&mut self, format_file_name: &str) {
273        self.format_primary = Some(BufferedPrimaryIo::from_text(format!(
274            "\\input {format_file_name}"
275        )));
276    }
277
278    /// Leave "format mode".
279    fn leave_format_mode(&mut self) {
280        self.format_primary = None;
281    }
282
283    /// Invoke an external tool as a pass in the processing pipeline.
284    fn external_tool_pass(
285        &mut self,
286        tool: &ExternalToolPass,
287        status: &mut dyn StatusBackend,
288    ) -> Result<()> {
289        status.note_highlighted("Running external tool ", &tool.argv[0], " ...");
290
291        // Process the command arguments. Filenames appearing in the arguments
292        // are treated as "requirements" that will be placed in the tool's
293        // working directory.
294
295        let mut cmd = Command::new(&tool.argv[0]);
296        let mut read_files = tool.extra_requires.clone();
297
298        {
299            let mem_files = &*self.mem.files.borrow();
300
301            for arg in &tool.argv[1..] {
302                cmd.arg(arg);
303
304                if mem_files.contains_key(arg) {
305                    read_files.insert(arg.to_owned());
306                }
307            }
308        }
309
310        // Now that we're validated, write those files to disk so that the tool
311        // can actually use them.
312
313        let tempdir = ctry!(
314            tempfile::Builder::new().tempdir();
315            "can't create temporary directory for external tool"
316        );
317
318        {
319            for name in &read_files {
320                // If a relative parent is found in the file to open, this fn
321                // does not properly handle that. Thus, throw an error.
322                if name.contains("../") {
323                    return Err(errmsg!(
324                        "relative parent paths are not supported for the \
325                        external tool. Got path `{}`.",
326                        name
327                    ));
328                }
329
330                let mut ih = ctry!(
331                    self.input_open_name(name, status).must_exist();
332                    "can't open path `{}`", name
333                );
334
335                // If the input path is absolute, we don't need to create a
336                // version in the tempdir, and in fact the current
337                // implementation below will blow away the input file. However,
338                // we do want to try to open the input so that it gets
339                // registered with the I/O tracking system.
340
341                let path = Path::new(name);
342                if path.is_absolute() {
343                    continue;
344                }
345
346                let tool_path = tempdir.path().join(name);
347                let tool_parent = tool_path.parent().unwrap();
348
349                if tool_parent != tempdir.path() {
350                    ctry!(
351                        std::fs::create_dir_all(tool_parent);
352                        "failed to create sub directory `{}`", tool_parent.display()
353                    );
354                }
355                let mut f = ctry!(
356                    File::create(&tool_path);
357                    "failed to create file `{}`", tool_path.display()
358                );
359                ctry!(
360                    std::io::copy(&mut ih, &mut f);
361                    "failed to write file `{}`", tool_path.display()
362                );
363            }
364        }
365
366        // Now we can actually run the command.
367
368        let output = cmd.current_dir(tempdir.path()).output()?;
369
370        if let Some(0) = output.status.code() {
371        } else {
372            tt_error!(
373                status,
374                "the external tool exited with an error code; its stdout was:\n"
375            );
376            status.dump_error_logs(&output.stdout[..]);
377            tt_error!(status, "its stderr was:\n");
378            status.dump_error_logs(&output.stderr[..]);
379
380            return if let Some(n) = output.status.code() {
381                Err(errmsg!("the external tool exited with error code {}", n))
382            } else {
383                Err(errmsg!("the external tool was terminated by a signal"))
384            };
385        }
386
387        // Search for any files that the tool created, and import them into the
388        // memory layer.
389
390        for entry in std::fs::read_dir(tempdir.path())? {
391            let entry = entry?;
392
393            if !entry.file_type()?.is_file() {
394                continue;
395            }
396
397            if let Some(basename) = entry.file_name().to_str() {
398                if !self.mem.files.borrow().contains_key(basename) {
399                    let path = entry.path();
400                    let mut data = Vec::new();
401
402                    let mut f = ctry!(
403                        File::open(&path);
404                        "failed to open tool-created file `{}`", path.display()
405                    );
406                    ctry!(
407                        f.read_to_end(&mut data);
408                        "failed to read tool-created file `{}`", path.display()
409                    );
410
411                    self.mem.create_entry(basename, data);
412                    self.events.insert(
413                        basename.to_owned(),
414                        FileSummary::new(AccessPattern::Written, InputOrigin::NotInput),
415                    );
416                }
417            }
418        }
419
420        // Mark the input files as having been read, and we're done.
421
422        for name in &read_files {
423            let summ = self.events.get_mut(name).unwrap();
424            summ.access_pattern = match summ.access_pattern {
425                AccessPattern::Written => AccessPattern::WrittenThenRead,
426                c => c, // identity mapping makes sense for remaining options
427            };
428        }
429
430        Ok(())
431    }
432
433    // Get the names of all intermediate files which are generated from
434    // previous passes.
435    fn get_intermediate_file_names(&self) -> Vec<String> {
436        // Currently, we only consider files in memory as intermediate files.
437        return self.mem.files.borrow().keys().cloned().collect();
438    }
439}
440
441macro_rules! bridgestate_ioprovider_try {
442    ($provider:expr, $($inner:tt)+) => {
443        let r = $provider.$($inner)+;
444        match r {
445            OpenResult::NotAvailable => {},
446            _ => return r,
447        };
448    }
449}
450
451macro_rules! bridgestate_ioprovider_cascade {
452    ($self:ident, $($inner:tt)+) => {
453        if let Some(ref mut p) = $self.genuine_stdout {
454            bridgestate_ioprovider_try!(p, $($inner)+);
455        }
456
457        // See enter_format_mode above. If creating a format file, disable local
458        // filesystem I/O.
459        let use_fs = if let Some(ref mut p) = $self.format_primary {
460            bridgestate_ioprovider_try!(p, $($inner)+);
461            false
462        } else {
463            bridgestate_ioprovider_try!($self.primary_input, $($inner)+);
464            true
465        };
466
467        bridgestate_ioprovider_try!($self.mem, $($inner)+);
468
469        if use_fs {
470            bridgestate_ioprovider_try!($self.filesystem, $($inner)+);
471
472            // With this ordering, we are preventing files created by
473            // shell-escape commands from overwriting/replacing source files.
474            // This seems very much like the behavior we want, unless there are
475            // some freaky shell-escape uses that depend on this behavior.
476            if let Some(ref mut p) = $self.shell_escape_work {
477                bridgestate_ioprovider_try!(p, $($inner)+);
478            }
479
480            // Extra search paths. This has higher priority than bundles but lower than current
481            // working dir to support the use case of overriding broken bundles (see issue #816).
482            for fsio in $self.extra_search_paths.iter_mut() {
483                bridgestate_ioprovider_try!(fsio, $($inner)+);
484            }
485        }
486
487        bridgestate_ioprovider_try!($self.bundle.as_ioprovider_mut(), $($inner)+);
488        bridgestate_ioprovider_try!($self.format_cache, $($inner)+);
489
490        return OpenResult::NotAvailable;
491    }
492}
493
494impl IoProvider for BridgeState {
495    fn output_open_name(&mut self, name: &str) -> OpenResult<OutputHandle> {
496        let r = (|| {
497            // BEGIN AWARE REPORTS PATCH
498            // Stream outputs to disk only for normal passes. During format
499            // generation (format_primary is Some) the engine dumps the .fmt as
500            // an output; make_format_pass harvests it from the in-memory layer
501            // (self.mem.files) to populate the format cache. Diverting it to
502            // disk_outputs would leave the cache empty and break cold-start
503            // format builds ("cannot open the format file").
504            if self.format_primary.is_none() {
505                if let Some(ref mut p) = self.disk_outputs {
506                    bridgestate_ioprovider_try!(p, output_open_name(name));
507                }
508            }
509            // END AWARE REPORTS PATCH
510            bridgestate_ioprovider_cascade!(self, output_open_name(name));
511        })();
512
513        if let OpenResult::Ok(_) = r {
514            if let Some(summ) = self.events.get_mut(name) {
515                summ.access_pattern = match summ.access_pattern {
516                    AccessPattern::Read => AccessPattern::ReadThenWritten,
517                    c => c, // identity mapping makes sense for remaining options
518                };
519            } else {
520                self.events.insert(
521                    name.to_owned(),
522                    FileSummary::new(AccessPattern::Written, InputOrigin::NotInput),
523                );
524            }
525        }
526
527        r
528    }
529
530    fn output_open_stdout(&mut self) -> OpenResult<OutputHandle> {
531        let r = (|| {
532            bridgestate_ioprovider_cascade!(self, output_open_stdout());
533        })();
534
535        // Life is easier if we track stdout in the same way that we do other
536        // output files.
537
538        if let OpenResult::Ok(_) = r {
539            if let Some(summ) = self.events.get_mut("") {
540                summ.access_pattern = match summ.access_pattern {
541                    AccessPattern::Read => AccessPattern::ReadThenWritten,
542                    c => c, // identity mapping makes sense for remaining options
543                };
544            } else {
545                self.events.insert(
546                    String::from(""),
547                    FileSummary::new(AccessPattern::Written, InputOrigin::NotInput),
548                );
549            }
550        }
551
552        r
553    }
554
555    fn input_open_name(
556        &mut self,
557        name: &str,
558        status: &mut dyn StatusBackend,
559    ) -> OpenResult<InputHandle> {
560        match self.input_open_name_with_abspath(name, status) {
561            OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih),
562            OpenResult::Err(e) => OpenResult::Err(e),
563            OpenResult::NotAvailable => OpenResult::NotAvailable,
564        }
565    }
566
567    fn input_open_name_with_abspath(
568        &mut self,
569        name: &str,
570        status: &mut dyn StatusBackend,
571    ) -> OpenResult<(InputHandle, Option<PathBuf>)> {
572        let r = (|| {
573            bridgestate_ioprovider_cascade!(self, input_open_name_with_abspath(name, status));
574        })();
575
576        match r {
577            OpenResult::Ok((ref ih, ref _path)) => {
578                if let Some(summ) = self.events.get_mut(name) {
579                    summ.access_pattern = match summ.access_pattern {
580                        AccessPattern::Written => AccessPattern::WrittenThenRead,
581                        c => c, // identity mapping makes sense for remaining options
582                    };
583                } else {
584                    self.events.insert(
585                        name.to_owned(),
586                        FileSummary::new(AccessPattern::Read, ih.origin()),
587                    );
588                }
589            }
590
591            OpenResult::NotAvailable => {
592                // For the purposes of file access pattern tracking, an attempt to
593                // open a nonexistent file counts as a read of a zero-size file. I
594                // don't see how such a file could have previously been written, but
595                // let's use the full update logic just in case.
596
597                if let Some(summ) = self.events.get_mut(name) {
598                    summ.access_pattern = match summ.access_pattern {
599                        AccessPattern::Written => AccessPattern::WrittenThenRead,
600                        c => c, // identity mapping makes sense for remaining options
601                    };
602                } else {
603                    // Unlike other cases, here we need to fill in the read_digest. `None`
604                    // is not an appropriate value since, if the file is written and then
605                    // read again later, the `None` will be overwritten; but what matters
606                    // is the contents of the file the very first time it was read.
607                    let mut fs = FileSummary::new(AccessPattern::Read, InputOrigin::NotInput);
608                    fs.read_digest = Some(DigestData::of_nothing());
609                    self.events.insert(name.to_owned(), fs);
610                }
611            }
612
613            OpenResult::Err(_) => {}
614        }
615
616        r
617    }
618
619    fn input_open_primary(&mut self, status: &mut dyn StatusBackend) -> OpenResult<InputHandle> {
620        match self.input_open_primary_with_abspath(status) {
621            OpenResult::Ok((ih, _path)) => OpenResult::Ok(ih),
622            OpenResult::Err(e) => OpenResult::Err(e),
623            OpenResult::NotAvailable => OpenResult::NotAvailable,
624        }
625    }
626
627    fn input_open_primary_with_abspath(
628        &mut self,
629        status: &mut dyn StatusBackend,
630    ) -> OpenResult<(InputHandle, Option<PathBuf>)> {
631        bridgestate_ioprovider_cascade!(self, input_open_primary_with_abspath(status));
632    }
633
634    fn input_open_format(
635        &mut self,
636        name: &str,
637        status: &mut dyn StatusBackend,
638    ) -> OpenResult<InputHandle> {
639        let r = (|| {
640            bridgestate_ioprovider_cascade!(self, input_open_format(name, status));
641        })();
642
643        if let OpenResult::Ok(ref ih) = r {
644            if let Some(summ) = self.events.get_mut(name) {
645                summ.access_pattern = match summ.access_pattern {
646                    AccessPattern::Written => AccessPattern::WrittenThenRead,
647                    c => c, // identity mapping makes sense for remaining options
648                };
649            } else {
650                self.events.insert(
651                    name.to_owned(),
652                    FileSummary::new(AccessPattern::Read, ih.origin()),
653                );
654            }
655        }
656
657        r
658    }
659}
660
661impl DriverHooks for BridgeState {
662    fn io(&mut self) -> &mut dyn IoProvider {
663        self
664    }
665
666    fn event_output_closed(&mut self, name: String, digest: DigestData) {
667        let summ = self
668            .events
669            .get_mut(&name)
670            .expect("closing file that wasn't opened?");
671        summ.write_digest = Some(digest);
672    }
673
674    fn event_input_closed(
675        &mut self,
676        name: String,
677        digest: Option<DigestData>,
678        _status: &mut dyn StatusBackend,
679    ) {
680        let summ = self
681            .events
682            .get_mut(&name)
683            .expect("closing file that wasn't opened?");
684
685        // It's what was in the file the *first* time that it was read that
686        // matters, so don't replace the read digest if it's already got one.
687
688        if summ.read_digest.is_none() {
689            summ.read_digest = digest;
690        }
691    }
692
693    fn sysrq_shell_escape(
694        &mut self,
695        command: &str,
696        status: &mut dyn StatusBackend,
697    ) -> StdResult<(), SystemRequestError> {
698        #[cfg(unix)]
699        const SHELL: &[&str] = &["sh", "-c"];
700
701        #[cfg(windows)]
702        const SHELL: &[&str] = &["cmd.exe", "/c"];
703
704        // Write any TeX-created files in the memory cache to the shell-escape
705        // working directory, since the shell-escape program may need to use
706        // them. (This is the case for `minted`.) We basically just hope that
707        // nothing will want to access the actual TeX source, which will live in
708        // a different directory.
709        //
710        // This is suboptimally slow since we'll be rewriting the same files
711        // repeatedly for repeated shell-escape invocations, but I don't feel
712        // like optimizing that I/O right now. Shell-escape is a gnarly hack
713        // anyway!
714
715        if let Some(work) = self.shell_escape_work.as_ref() {
716            for (name, file) in &*self.mem.files.borrow() {
717                // If it's in the `mem` backend, it's of interest here ...
718                // unless it's stdout.
719                if name == self.mem.stdout_key() {
720                    continue;
721                }
722
723                let real_path = work.root().join(name);
724                if let Some(prefix) = real_path.parent() {
725                    std::fs::create_dir_all(prefix).map_err(|e| {
726                        tt_error!(status, "failed to create sub directory `{}`", prefix.display(); e.into());
727                        SystemRequestError::Failed
728                    })?;
729                }
730                let mut f = File::create(&real_path).map_err(|e| {
731                    tt_error!(status, "failed to create file `{}`", real_path.display(); e.into());
732                    SystemRequestError::Failed
733                })?;
734                f.write_all(&file.data).map_err(|e| {
735                    tt_error!(status, "failed to write file `{}`", real_path.display(); e.into());
736                    SystemRequestError::Failed
737                })?;
738            }
739
740            // Now we can actually run the command.
741
742            tt_note!(status, "running shell command: `{}`", command);
743
744            match Command::new(SHELL[0])
745                .args(&SHELL[1..])
746                .arg(command)
747                .current_dir(work.root())
748                .status()
749            {
750                Ok(s) => match s.code() {
751                    Some(0) => Ok(()),
752                    Some(n) => {
753                        tt_warning!(status, "command exited with error code {}", n);
754                        Err(SystemRequestError::Failed)
755                    }
756                    None => {
757                        tt_warning!(status, "command was terminated by signal");
758                        Err(SystemRequestError::Failed)
759                    }
760                },
761                Err(err) => {
762                    tt_warning!(status, "failed to run command"; err.into());
763                    Err(SystemRequestError::Failed)
764                }
765            }
766
767            // That's it! We shouldn't clean up here, because there might be
768            // multiple shell-escapes that build up in sequence, and any new
769            // files created by the shell-escape command will be picked up by
770            // the filesystem I/O.
771        } else {
772            // No shell-escape work directory. This "shouldn't happen" but means
773            // that shell-escape is supposed to be disabled anyway!
774            tt_error!(
775                status,
776                "the engine requested a shell-escape invocation but it's currently disabled"
777            );
778            Err(SystemRequestError::NotAllowed)
779        }
780    }
781}
782
783/// Possible modes for handling shell-escape functionality
784#[derive(Clone, Debug, Default, Eq, PartialEq)]
785enum ShellEscapeMode {
786    /// "Default" mode: shell-escape is disabled, unless it's been turned on in
787    /// the unstable options, in which case it will be allowed through a
788    /// temporary directory.
789    #[default]
790    Defaulted,
791
792    /// Shell-escape is disabled, overriding any unstable-option setting.
793    Disabled,
794
795    /// Shell-escape is enabled, using a temporary work directory managed by the
796    /// processing session. The work directory will be deleted after processing
797    /// completes.
798    TempDir,
799
800    /// Shell-escape is enabled, using some other work directory that is managed
801    /// externally. The processing session won't delete this directory.
802    ExternallyManagedDir(PathBuf),
803}
804
805/// A custom extra pass that invokes an external tool.
806///
807/// This is bad for reproducibility but comes in handy.
808#[derive(Debug)]
809struct ExternalToolPass {
810    argv: Vec<String>,
811    extra_requires: HashSet<String>,
812}
813
814/// A builder-style interface for creating a [`ProcessingSession`].
815///
816/// This uses standard builder patterns. The `Default` implementation defaults
817/// to restrictive security settings that disable all known-insecure features
818/// that could be abused by untrusted inputs. Use
819/// [`ProcessingSessionBuilder::new_with_security()`] in order to have the
820/// option to enable potentially-insecure features such as shell-escape.
821#[derive(Default)]
822pub struct ProcessingSessionBuilder {
823    security: SecuritySettings,
824    primary_input: PrimaryInputMode,
825    tex_input_name: Option<String>,
826    output_dest: OutputDestination,
827    filesystem_root: Option<PathBuf>,
828    format_name: Option<String>,
829    format_cache_path: Option<PathBuf>,
830    output_format: OutputFormat,
831    makefile_output_path: Option<PathBuf>,
832    hidden_input_paths: HashSet<PathBuf>,
833    pass: PassSetting,
834    reruns: Option<usize>,
835    print_stdout: bool,
836    bundle: Option<Box<dyn Bundle>>,
837    keep_intermediates: bool,
838    outputs_to_filesystem: bool,
839    // BEGIN AWARE REPORTS PATCH
840    /// Page-origin offset (x, y) in bp passed to the xdvipdfmx pass. When
841    /// `None`, the engine keeps its default 1-inch origin. Aware Reports sets
842    /// `(0.0, 0.0)` to place the TeX origin at the page corner.
843    pdf_origin_offset: Option<(f64, f64)>,
844    // END AWARE REPORTS PATCH
845    keep_logs: bool,
846    synctex: bool,
847    build_date: Option<SystemTime>,
848    unstables: UnstableOptions,
849    shell_escape_mode: ShellEscapeMode,
850    html_assets_spec_path: Option<String>,
851    html_precomputed_assets: Option<AssetSpecification>,
852    html_do_not_emit_files: bool,
853    html_do_not_emit_assets: bool,
854}
855
856impl ProcessingSessionBuilder {
857    /// Create a new builder with customized security settings.
858    pub fn new_with_security(security: SecuritySettings) -> Self {
859        ProcessingSessionBuilder {
860            security,
861            ..Default::default()
862        }
863    }
864
865    /// Sets the path to the primary input file.
866    ///
867    /// If a primary input path is not specified, we will default to reading it from stdin.
868    pub fn primary_input_path<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
869        self.primary_input = PrimaryInputMode::Path(p.as_ref().to_owned());
870        self
871    }
872
873    /// Sets the primary input to be a caller-specified buffer.
874    ///
875    /// If neither this nor a primary input path is specified, we will default
876    /// to reading the primary input from stdin.
877    pub fn primary_input_buffer(&mut self, buf: &[u8]) -> &mut Self {
878        self.primary_input = PrimaryInputMode::Buffer(buf.to_owned());
879        self
880    }
881
882    /// Sets the name of the main input file.
883    ///
884    /// This value will be used to infer the names of the output files; for example, if
885    /// `tex_input_name` is set to `"texput.tex"` then the pdf output file will be `"texput.pdf"`.
886    /// As such, this parameter is mandatory, even if the real input is coming from stdin (if it is
887    /// not provided, [`ProcessingSessionBuilder::create`] will panic).
888    pub fn tex_input_name(&mut self, s: &str) -> &mut Self {
889        self.tex_input_name = Some(s.to_owned());
890        self
891    }
892
893    /// Set the directory that serves as the root for finding files on disk.
894    ///
895    /// If unspecified, and there is a primary input file, the directory
896    /// containing that file will serve as the filesystem root. Otherwise, it is
897    /// set to the current directory.
898    pub fn filesystem_root<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
899        self.filesystem_root = Some(p.as_ref().to_owned());
900        self
901    }
902
903    /// A path to the directory where output files should be created.
904    ///
905    /// This will default to the directory containing `primary_input_path`, or
906    /// the current working directory if the primary input is coming from
907    /// stdin.
908    pub fn output_dir<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
909        self.output_dest = OutputDestination::Path(p.as_ref().to_owned());
910        self
911    }
912
913    /// Indicate that output files should not be written to disk.
914    ///
915    /// By default, output files will be written to the directory containing
916    /// `primary_input_path`, or the current working directory if the primary
917    /// input is coming from stdin.
918    pub fn do_not_write_output_files(&mut self) -> &mut Self {
919        self.output_dest = OutputDestination::Nowhere;
920        self
921    }
922
923    /// The name of the `.fmt` file used to initialize the TeX engine.
924    ///
925    /// This file does not necessarily have to exist already; it will be created
926    /// if it doesn't. This parameter is mandatory (if it is not provided,
927    /// [`ProcessingSessionBuilder::create`] will panic).
928    pub fn format_name(&mut self, p: &str) -> &mut Self {
929        self.format_name = Some(p.to_owned());
930        self
931    }
932
933    /// Sets the path to the format file cache.
934    ///
935    /// This is used to, well, cache format files, which are generated as
936    /// needed from the backing bundle. Defaults to the same directory as the
937    /// input file, or PWD if the input is a non-file (such as standard
938    /// input).
939    pub fn format_cache_path<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
940        self.format_cache_path = Some(p.as_ref().to_owned());
941        self
942    }
943
944    /// The type of output to create.
945    pub fn output_format(&mut self, f: OutputFormat) -> &mut Self {
946        self.output_format = f;
947        self
948    }
949
950    /// If set, a makefile will be written out at the given path.
951    pub fn makefile_output_path<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
952        self.makefile_output_path = Some(p.as_ref().to_owned());
953        self
954    }
955
956    /// Which kind of pass should the `ProcessingSession` run? Defaults to `PassSetting::Default`
957    /// (duh).
958    pub fn pass(&mut self, p: PassSetting) -> &mut Self {
959        self.pass = p;
960        self
961    }
962
963    /// If set, and if the pass is set to `PassSetting::Default`, the TeX engine will be re-run
964    /// *exactly* this many times.
965    ///
966    /// If `reruns` is unset, we will auto-detect how many times the TeX engine needs to be re-run.
967    pub fn reruns(&mut self, r: usize) -> &mut Self {
968        self.reruns = Some(r);
969        self
970    }
971
972    /// If set to `true`, stdout from the TeX engine will be forwarded to actual stdout. (By
973    /// default, it will be suppressed.)
974    pub fn print_stdout(&mut self, p: bool) -> &mut Self {
975        self.print_stdout = p;
976        self
977    }
978
979    /// Marks a path as hidden, meaning that the TeX engine will pretend that it doesn't exist in
980    /// the filesystem.
981    pub fn hide<P: AsRef<Path>>(&mut self, p: P) -> &mut Self {
982        self.hidden_input_paths.insert(p.as_ref().to_owned());
983        self
984    }
985
986    /// Sets the bundle, which the various engines will use for finding style files, font files,
987    /// etc.
988    pub fn bundle(&mut self, b: Box<dyn Bundle>) -> &mut Self {
989        self.bundle = Some(b);
990        self
991    }
992
993    /// If set to `true`, various intermediate files will be written out to the filesystem.
994    pub fn keep_intermediates(&mut self, k: bool) -> &mut Self {
995        self.keep_intermediates = k;
996        self
997    }
998
999    // BEGIN AWARE REPORTS PATCH
1000    /// If set to `true`, output files (XDV, PDF, logs, aux files) are
1001    /// streamed directly to the filesystem root as the engines write them,
1002    /// instead of being buffered in memory until the session ends. This
1003    /// keeps the session's memory footprint independent of the document
1004    /// size. Files land in the filesystem root regardless of
1005    /// `keep_intermediates`.
1006    pub fn outputs_to_filesystem(&mut self, k: bool) -> &mut Self {
1007        self.outputs_to_filesystem = k;
1008        self
1009    }
1010
1011    /// Set the PDF page-origin offset (x, y), in PostScript points (bp),
1012    /// applied during the xdvipdfmx pass. The engine's default is 1 inch
1013    /// (72.0, 72.0), the standard TeX origin; pass `(0.0, 0.0)` to place the
1014    /// TeX origin at the physical page corner.
1015    pub fn pdf_origin_offset(&mut self, x: f64, y: f64) -> &mut Self {
1016        self.pdf_origin_offset = Some((x, y));
1017        self
1018    }
1019    // END AWARE REPORTS PATCH
1020
1021    /// If set to `true`, '.log' and '.blg' files will be written out to the filesystem.
1022    pub fn keep_logs(&mut self, k: bool) -> &mut Self {
1023        self.keep_logs = k;
1024        self
1025    }
1026
1027    /// If set to `true`, tex files will be compiled using synctex information.
1028    pub fn synctex(&mut self, s: bool) -> &mut Self {
1029        self.synctex = s;
1030        self
1031    }
1032
1033    /// Sets the date and time of the processing session.
1034    /// See `TexEngine::build_date` for mor information.
1035    pub fn build_date(&mut self, date: SystemTime) -> &mut Self {
1036        self.build_date = Some(date);
1037        self
1038    }
1039
1040    /// Configures the date and time of the processing session from the environment:
1041    /// If `SOURCE_DATE_EPOCH` is set, it's used as the build date.
1042    /// If `force_deterministic` is set, we fall back to UNIX_EPOCH.
1043    /// Otherwise, we use the current system time.
1044    pub fn build_date_from_env(&mut self, force_deterministic: bool) -> &mut Self {
1045        let build_date_str = std::env::var("SOURCE_DATE_EPOCH").ok();
1046        let build_date = match (force_deterministic, build_date_str) {
1047            (_, Some(s)) => {
1048                let epoch = s
1049                    .parse::<u64>()
1050                    .expect("invalid SOURCE_DATE_EPOCH (not a number)");
1051
1052                SystemTime::UNIX_EPOCH
1053                    .checked_add(Duration::from_secs(epoch))
1054                    .expect("time overflow")
1055            }
1056            (true, None) => SystemTime::UNIX_EPOCH,
1057            (false, None) => SystemTime::now(),
1058        };
1059        self.build_date(build_date)
1060    }
1061
1062    /// Loads unstable options into the processing session
1063    pub fn unstables(&mut self, opts: UnstableOptions) -> &mut Self {
1064        self.unstables = opts;
1065        self
1066    }
1067
1068    /// Enable "shell escape" commands in the engines, and use the specified
1069    /// directory for shell-escape work. The caller is responsible for the
1070    /// creation and/or destruction of this directory. The default is to
1071    /// disable shell-escape unless the [`UnstableOptions`] say otherwise,
1072    /// in which case a driver-managed temporary directory will be used.
1073    pub fn shell_escape_with_work_dir<P: AsRef<Path>>(&mut self, path: P) -> &mut Self {
1074        if self.security.allow_shell_escape() {
1075            self.shell_escape_mode =
1076                ShellEscapeMode::ExternallyManagedDir(path.as_ref().to_owned());
1077        }
1078        self
1079    }
1080
1081    /// Forcibly enable shell-escape mode with a temporary directory, overriding
1082    /// any [`UnstableOptions`] settings. The default is to disable shell-escape
1083    /// unless the [`UnstableOptions`] say otherwise, in which case a
1084    /// driver-managed temporary directory will be used.
1085    pub fn shell_escape_with_temp_dir(&mut self) -> &mut Self {
1086        if self.security.allow_shell_escape() {
1087            self.shell_escape_mode = ShellEscapeMode::TempDir;
1088        }
1089        self
1090    }
1091
1092    /// Forcibly disable shell-escape mode, overriding any [`UnstableOptions`]
1093    /// settings. The default is to disable shell-escape unless the
1094    /// [`UnstableOptions`] say otherwise, in which case a driver-managed
1095    /// temporary directory will be used.
1096    pub fn shell_escape_disabled(&mut self) -> &mut Self {
1097        self.shell_escape_mode = ShellEscapeMode::Disabled;
1098        self
1099    }
1100
1101    /// When using HTML mode, emit an asset specification file instead of actual
1102    /// asset files.
1103    ///
1104    /// "Assets" are files like fonts and images that accompany the HTML output
1105    /// generated during processing. By default, these are emitted during
1106    /// processing. If this method is called, the assets will *not* be created.
1107    /// Instead, an "asset specification" file will be emitted to the given
1108    /// output path. This specification file contains the information needed to
1109    /// generate the assets upon a later invocation. Asset specification files
1110    /// can be merged, allowing the results of multiple separate TeX
1111    /// compilations to be synthesized into one HTML output tree.
1112    ///
1113    /// If the build does not use HTML mode, this setting has no effect.
1114    pub fn html_assets_spec_path<S: ToString>(&mut self, path: S) -> &mut Self {
1115        self.html_assets_spec_path = Some(path.to_string());
1116        self
1117    }
1118
1119    /// In HTML mode, use a precomputed asset specification.
1120    ///
1121    /// "Assets" are files like fonts and images that accompany the HTML output
1122    /// generated during processing. By default, the engine gathers these during
1123    /// processing and emits them at the end. After this method is used,
1124    /// however, it will generate HTML outputs assuming the information given in
1125    /// the asset specification given here. If the input calls for new assets or
1126    /// different options inconsistent with the specification, processing will
1127    /// abort with an error.
1128    ///
1129    /// The purpose of this mode is to allow for a unified set of assets to be
1130    /// created from multiple independent runs of the SPX-to-HTML stage. First,
1131    /// the different inputs should be processed independently, and their
1132    /// individual assets should saved. These should then be merged. Then the
1133    /// inputs should be reprocessed, all using the merged asset specification.
1134    /// In one — but only one — of these sessions, the assets should actually be
1135    /// emitted.
1136    pub fn html_precomputed_assets(&mut self, assets: AssetSpecification) -> &mut Self {
1137        self.html_precomputed_assets = Some(assets);
1138        self
1139    }
1140
1141    /// Set whether templated outputs should be created during HTML processing.
1142    ///
1143    /// This mode can be useful if you want to analyze what *would* be created
1144    /// during HTML processing without actually creating the files.
1145    pub fn html_emit_files(&mut self, do_emit: bool) -> &mut Self {
1146        self.html_do_not_emit_files = !do_emit;
1147        self
1148    }
1149
1150    /// Set whether supporting asset files should be created during HTML
1151    /// processing.
1152    ///
1153    /// This mode can be useful if you want to analyze what *would* be created
1154    /// during HTML processing without actually creating the files. If you call
1155    /// [`Self::html_assets_spec_path`], this setting will ignored, and no
1156    /// assets will be emitted to disk.
1157    pub fn html_emit_assets(&mut self, do_emit: bool) -> &mut Self {
1158        self.html_do_not_emit_assets = !do_emit;
1159        self
1160    }
1161
1162    /// Creates a `ProcessingSession`.
1163    pub fn create(self, status: &mut dyn StatusBackend) -> Result<ProcessingSession> {
1164        // First, work on the "bridge state", which gathers the subset of our
1165        // state that has to be held in a mutable reference while running the
1166        // C/C++ engines:
1167
1168        let mut bundle = self.bundle.expect("a bundle must be specified");
1169
1170        let mut filesystem_root = self.filesystem_root.unwrap_or_default();
1171
1172        let (pio, primary_input_path, default_output_path) = match self.primary_input {
1173            PrimaryInputMode::Path(p) => {
1174                // Set the filesystem root (that's the directory we'll search
1175                // for files in) to be the same directory as the main input
1176                // file.
1177                let parent = match p.parent() {
1178                    Some(parent) => parent.to_owned(),
1179                    None => {
1180                        return Err(errmsg!(
1181                            "can't figure out a parent directory for input path \"{}\"",
1182                            p.display()
1183                        ));
1184                    }
1185                };
1186
1187                filesystem_root.clone_from(&parent);
1188                let pio: Box<dyn IoProvider> = Box::new(FilesystemPrimaryInputIo::new(&p));
1189                (pio, Some(p), parent)
1190            }
1191
1192            PrimaryInputMode::Stdin => {
1193                // If the main input file is stdin, we don't set a filesystem
1194                // root, which means we'll default to the current working
1195                // directory.
1196                //
1197                // Note that, due to the expected need to rerun the engine
1198                // multiple times, we'll need to buffer stdin in its entirety,
1199                // so we might as well do that now.
1200                let pio = ctry!(BufferedPrimaryIo::from_stdin(); "error reading standard input");
1201                let pio: Box<dyn IoProvider> = Box::new(pio);
1202                (pio, None, "".into())
1203            }
1204
1205            PrimaryInputMode::Buffer(buf) => {
1206                // Same behavior as with stdin.
1207                let pio: Box<dyn IoProvider> = Box::new(BufferedPrimaryIo::from_buffer(buf));
1208                (pio, None, "".into())
1209            }
1210        };
1211
1212        let format_cache_path = self
1213            .format_cache_path
1214            .unwrap_or_else(|| filesystem_root.clone());
1215        let format_cache = FormatCache::new(bundle.get_digest()?, format_cache_path);
1216
1217        let genuine_stdout = if self.print_stdout {
1218            Some(GenuineStdoutIo::new())
1219        } else {
1220            None
1221        };
1222
1223        // move this out of self to get around borrow checker issues
1224        let hidden_input_paths = self.hidden_input_paths;
1225
1226        let extra_search_paths = if self.security.allow_extra_search_paths() {
1227            self.unstables
1228                .extra_search_paths
1229                .iter()
1230                .map(|p| FilesystemIo::new(p, false, false, hidden_input_paths.clone()))
1231                .collect()
1232        } else {
1233            if !self.unstables.extra_search_paths.is_empty() {
1234                tt_warning!(status, "Extra search path(s) ignored due to security");
1235            }
1236            Vec::new()
1237        };
1238
1239        let filesystem = FilesystemIo::new(&filesystem_root, false, true, hidden_input_paths);
1240
1241        let mem = MemoryIo::new(true);
1242
1243        // BEGIN AWARE REPORTS PATCH
1244        let disk_outputs = if self.outputs_to_filesystem {
1245            Some(FilesystemIo::new(
1246                &filesystem_root,
1247                true,
1248                false,
1249                HashSet::new(),
1250            ))
1251        } else {
1252            None
1253        };
1254        // END AWARE REPORTS PATCH
1255
1256        let bs = BridgeState {
1257            primary_input: pio,
1258            mem,
1259            filesystem,
1260            extra_search_paths,
1261            shell_escape_work: None,
1262            format_cache,
1263            bundle,
1264            genuine_stdout,
1265            format_primary: None,
1266            events: HashMap::new(),
1267            disk_outputs,
1268        };
1269
1270        // Now we can do the rest.
1271
1272        let output_path = match self.output_dest {
1273            OutputDestination::Default => Some(default_output_path),
1274            OutputDestination::Path(p) => Some(p),
1275            OutputDestination::Nowhere => None,
1276        };
1277
1278        let tex_input_name = self
1279            .tex_input_name
1280            .expect("tex_input_name must be specified");
1281        let mut aux_path = PathBuf::from(tex_input_name.clone());
1282        aux_path.set_extension("aux");
1283        let mut xdv_path = aux_path.clone();
1284        xdv_path.set_extension(if self.output_format == OutputFormat::Html {
1285            "spx"
1286        } else {
1287            "xdv"
1288        });
1289        let mut pdf_path = aux_path.clone();
1290        pdf_path.set_extension("pdf");
1291
1292        let shell_escape_mode = if !self.security.allow_shell_escape() {
1293            ShellEscapeMode::Disabled
1294        } else {
1295            match self.shell_escape_mode {
1296                ShellEscapeMode::Defaulted => {
1297                    if let Some(ref cwd) = self.unstables.shell_escape_cwd {
1298                        ShellEscapeMode::ExternallyManagedDir(cwd.into())
1299                    } else if self.unstables.shell_escape {
1300                        ShellEscapeMode::TempDir
1301                    } else {
1302                        ShellEscapeMode::Disabled
1303                    }
1304                }
1305
1306                other => other,
1307            }
1308        };
1309
1310        Ok(ProcessingSession {
1311            security: self.security,
1312            bs,
1313            pass: self.pass,
1314            primary_input_path,
1315            primary_input_tex_path: tex_input_name,
1316            format_name: self.format_name.unwrap(),
1317            tex_aux_path: aux_path.display().to_string(),
1318            tex_xdv_path: xdv_path.display().to_string(),
1319            tex_pdf_path: pdf_path.display().to_string(),
1320            output_format: self.output_format,
1321            makefile_output_path: self.makefile_output_path,
1322            output_path,
1323            tex_rerun_specification: self.reruns,
1324            keep_intermediates: self.keep_intermediates,
1325            keep_logs: self.keep_logs,
1326            synctex_enabled: self.synctex,
1327            build_date: self.build_date.unwrap_or(SystemTime::UNIX_EPOCH),
1328            // BEGIN AWARE REPORTS PATCH
1329            pdf_origin_offset: self.pdf_origin_offset,
1330            // END AWARE REPORTS PATCH
1331            unstables: self.unstables,
1332            shell_escape_mode,
1333            html_assets_spec_path: self.html_assets_spec_path,
1334            html_precomputed_assets: self.html_precomputed_assets,
1335            html_emit_files: !self.html_do_not_emit_files,
1336            html_emit_assets: !self.html_do_not_emit_assets,
1337        })
1338    }
1339}
1340
1341#[derive(Debug, Clone)]
1342enum RerunReason {
1343    Biber,
1344    Bibtex,
1345    FileChange(String),
1346}
1347
1348/// The ProcessingSession struct runs the whole show when we're actually
1349/// processing a file. It understands, for example, the need to re-run the TeX
1350/// engine if the `.aux` file changed.
1351pub struct ProcessingSession {
1352    // Security settings.
1353    security: SecuritySettings,
1354
1355    /// The subset of the session state that's can be mutated while the C/C++
1356    /// engines are running. Importantly, this includes the full I/O stack.
1357    bs: BridgeState,
1358
1359    /// If our primary input is an actual file on disk, this is its path.
1360    primary_input_path: Option<PathBuf>,
1361
1362    /// This is the name of the input that we tell TeX. It is the basename of
1363    /// the UTF8-ified version of `primary_input_path`; or something anodyne
1364    /// if the latter is None. (Name, "texput.tex").
1365    primary_input_tex_path: String,
1366
1367    /// This is the name of the format file to use. TeX has to open it by name
1368    /// internally, so it has to be String compatible.
1369    format_name: String,
1370
1371    /// These are the paths of the various output files as TeX knows them --
1372    /// just `primary_input_tex_path` with the extension changed.
1373    tex_aux_path: String,
1374    tex_xdv_path: String,
1375    tex_pdf_path: String,
1376
1377    /// If we're writing out Makefile rules, this is where they go. The TeX
1378    /// engine doesn't know about this path at all.
1379    makefile_output_path: Option<PathBuf>,
1380
1381    /// This is the path that the processed file will be saved at. It defaults
1382    /// to the path of `primary_input_path` or `.` if STDIN is used. If set to
1383    /// None, the output files will not be saved to disk — in which case, the
1384    /// caller should access the memory layer of the `io` field to gain access
1385    /// to the output files.
1386    output_path: Option<PathBuf>,
1387
1388    pass: PassSetting,
1389    output_format: OutputFormat,
1390    tex_rerun_specification: Option<usize>,
1391    keep_intermediates: bool,
1392    keep_logs: bool,
1393    synctex_enabled: bool,
1394
1395    /// See `TexEngine::with_date` and `XdvipdfmxEngine::with_date`.
1396    build_date: SystemTime,
1397
1398    // BEGIN AWARE REPORTS PATCH
1399    /// Page-origin offset (x, y) in bp for the xdvipdfmx pass; `None` keeps the
1400    /// engine default (1 inch). See `ProcessingSessionBuilder::pdf_origin_offset`.
1401    pdf_origin_offset: Option<(f64, f64)>,
1402    // END AWARE REPORTS PATCH
1403
1404    unstables: UnstableOptions,
1405
1406    /// How to handle shell-escape. The `Defaulted` option will never
1407    /// be used here.
1408    shell_escape_mode: ShellEscapeMode,
1409
1410    html_assets_spec_path: Option<String>,
1411    html_precomputed_assets: Option<AssetSpecification>,
1412    html_emit_files: bool,
1413    html_emit_assets: bool,
1414}
1415
1416const DEFAULT_MAX_TEX_PASSES: usize = 6;
1417const ALWAYS_INTERMEDIATE_EXTENSIONS: &[&str] = &[
1418    ".snm", ".toc", // generated by Beamer
1419];
1420
1421impl ProcessingSession {
1422    /// Assess whether we need to rerun an engine. This is the case if there
1423    /// was a file that the engine read and then rewrote, and the rewritten
1424    /// version is different than the version that it read in.
1425    fn is_rerun_needed(&self, status: &mut dyn StatusBackend) -> Option<RerunReason> {
1426        // TODO: we should probably wire up diagnostics since I expect this
1427        // stuff could get finicky and we're going to want to be able to
1428        // figure out why rerun detection is breaking.
1429
1430        for (name, info) in &self.bs.events {
1431            if info.access_pattern == AccessPattern::ReadThenWritten {
1432                let file_changed = match (&info.read_digest, &info.write_digest) {
1433                    (Some(d1), Some(d2)) => d1 != d2,
1434                    (&None, &Some(_)) => true,
1435                    (_, _) => {
1436                        // Other cases shouldn't happen.
1437                        tt_warning!(
1438                            status,
1439                            "internal consistency problem when checking if {} changed",
1440                            name
1441                        );
1442                        true
1443                    }
1444                };
1445
1446                if file_changed {
1447                    return Some(RerunReason::FileChange(name.clone()));
1448                }
1449            }
1450        }
1451
1452        None
1453    }
1454
1455    #[allow(dead_code)]
1456    fn _dump_access_info(&self, status: &mut dyn StatusBackend) {
1457        for (name, info) in &self.bs.events {
1458            if info.access_pattern != AccessPattern::Read {
1459                let r = match info.read_digest {
1460                    Some(ref d) => d.to_string(),
1461                    None => "-".into(),
1462                };
1463                let w = match info.write_digest {
1464                    Some(ref d) => d.to_string(),
1465                    None => "-".into(),
1466                };
1467                tt_note!(
1468                    status,
1469                    "ACCESS: {} {:?} {:?} {:?}",
1470                    name,
1471                    info.access_pattern,
1472                    r,
1473                    w
1474                );
1475            }
1476        }
1477    }
1478
1479    /// Runs the session, generating the desired outputs.
1480    ///
1481    /// What this does depends on which [`PassSetting`] you asked for. The most common choice is
1482    /// `PassSetting::Default`, in which case this method does the following:
1483    ///
1484    /// - if a `.fmt` file does not yet exist, generate one and cache it
1485    /// - run the TeX engine once
1486    /// - run BibTeX, if it seems to be required
1487    /// - repeat the last two steps as often as needed
1488    /// - write the output files to disk, including a Makefile if it was requested.
1489    pub fn run(&mut self, status: &mut dyn StatusBackend) -> Result<()> {
1490        // Pre-invocation setup that requires cleanup even if the processing errors out.
1491
1492        let (shell_escape_work, clean_up_shell_escape) = match self.shell_escape_mode {
1493            ShellEscapeMode::Disabled => (None, false),
1494
1495            ShellEscapeMode::ExternallyManagedDir(ref p) => (
1496                Some(FilesystemIo::new(p, false, false, HashSet::new())),
1497                false,
1498            ),
1499
1500            ShellEscapeMode::TempDir => {
1501                let tempdir = ctry!(tempfile::Builder::new().tempdir(); "can't create temporary directory for shell-escape work");
1502                (
1503                    Some(FilesystemIo::new(
1504                        &tempdir.keep(),
1505                        false,
1506                        false,
1507                        HashSet::new(),
1508                    )),
1509                    true,
1510                )
1511            }
1512
1513            ShellEscapeMode::Defaulted => unreachable!(),
1514        };
1515
1516        self.bs.shell_escape_work = shell_escape_work;
1517
1518        // Go-time!
1519        let result = self.run_inner(status);
1520
1521        // Do that cleanup.
1522
1523        if clean_up_shell_escape {
1524            let shell_escape_work = self.bs.shell_escape_work.take().unwrap();
1525            let shell_escape_err = std::fs::remove_dir_all(shell_escape_work.root());
1526
1527            if let Err(e) = shell_escape_err {
1528                tt_warning!(status, "an error occurred while cleaning up the \
1529                    shell-escape temporary directory `{}`", shell_escape_work.root().display(); e.into());
1530            }
1531        }
1532
1533        // Propagate the actual result.
1534        result
1535    }
1536
1537    /// The bulk of the `run` implementation. We need to wrap it to manage the
1538    /// lifecycle of resources like the shell-escape temporary directory, if
1539    /// needed.
1540    fn run_inner(&mut self, status: &mut dyn StatusBackend) -> Result<()> {
1541        // Do we need to generate the format file?
1542
1543        let generate_format = if self.output_format == OutputFormat::Format {
1544            false
1545        } else {
1546            match self.bs.input_open_format(&self.format_name, status) {
1547                OpenResult::Ok(_) => false,
1548                OpenResult::NotAvailable => true,
1549                OpenResult::Err(e) => {
1550                    return Err(e)
1551                        .chain_err(|| format!("could not open format file {}", self.format_name));
1552                }
1553            }
1554        };
1555
1556        if generate_format {
1557            tt_note!(status, "generating format \"{}\"", self.format_name);
1558            self.make_format_pass(status)?;
1559        }
1560
1561        // Do the meat of the work.
1562
1563        let result = match self.pass {
1564            PassSetting::Tex => match self.tex_pass(None, status) {
1565                Ok(Some(warnings)) => {
1566                    tt_warning!(status, "{}", warnings);
1567                    Ok(0)
1568                }
1569                Ok(None) => Ok(0),
1570                Err(e) => Err(e),
1571            },
1572            PassSetting::Default => self.default_pass(false, status),
1573            PassSetting::BibtexFirst => self.default_pass(true, status),
1574        };
1575
1576        if let Err(e) = result {
1577            self.write_files(None, status, true)?;
1578            return Err(e);
1579        };
1580
1581        // Write output files and the first line of our Makefile output.
1582
1583        let mut mf_dest_maybe = match self.makefile_output_path {
1584            Some(ref p) => {
1585                if self.output_path.is_none() {
1586                    tt_warning!(
1587                        status,
1588                        "requested to generate Makefile rules, but no files written to disk!"
1589                    );
1590                    None
1591                } else {
1592                    Some(File::create(p)?)
1593                }
1594            }
1595
1596            None => None,
1597        };
1598
1599        let n_skipped_intermediates = self.write_files(mf_dest_maybe.as_mut(), status, false)?;
1600
1601        if n_skipped_intermediates > 0 {
1602            status.note_highlighted(
1603                "Skipped writing ",
1604                &format!("{n_skipped_intermediates}"),
1605                " intermediate files (use --keep-intermediates to keep them)",
1606            );
1607        }
1608
1609        // Finish Makefile rules, maybe.
1610
1611        if let Some(ref mut mf_dest) = mf_dest_maybe {
1612            ctry!(write!(mf_dest, ": "); "couldn't write to Makefile-rules file");
1613
1614            if let Some(ref pip) = self.primary_input_path {
1615                let opip = ctry!(pip.to_str(); "Makefile-rules file path must be Unicode-able");
1616                ctry!(mf_dest.write_all(opip.as_bytes()); "couldn't write to Makefile-rules file");
1617            }
1618
1619            // The check above ensures that this is never None.
1620            let root = self.output_path.as_ref().unwrap();
1621
1622            for (name, info) in &self.bs.events {
1623                if info.input_origin != InputOrigin::Filesystem {
1624                    continue;
1625                }
1626
1627                if info.got_written_to_disk {
1628                    // If the file originally came from the filesystem, and it
1629                    // was written as well as read, and we actually wrote it
1630                    // to disk, there's a circular dependency that's
1631                    // inappropriate to express in a Makefile. If it was
1632                    // "written" by the engine but we didn't actually write
1633                    // those modifications to disk, we're OK. If there's a
1634                    // two-stage compilation involving the .aux file, the
1635                    // latter case is what arises unless --keep-intermediates
1636                    // is specified.
1637                    tt_warning!(status, "omitting circular Makefile dependency for {}", name);
1638                    continue;
1639                }
1640
1641                ctry!(write!(mf_dest, " \\\n  {}", root.join(name).display()); "couldn't write to Makefile-rules file");
1642            }
1643
1644            ctry!(writeln!(mf_dest, ""); "couldn't write to Makefile-rules file");
1645        }
1646
1647        // All done.
1648
1649        Ok(())
1650    }
1651
1652    fn write_files(
1653        &mut self,
1654        mut mf_dest_maybe: Option<&mut File>,
1655        status: &mut dyn StatusBackend,
1656        only_logs: bool,
1657    ) -> Result<u32> {
1658        let root = match self.output_path {
1659            Some(ref p) => p,
1660
1661            None => {
1662                // We were told not to write anything!
1663                return Ok(0);
1664            }
1665        };
1666
1667        let mut n_skipped_intermediates = 0;
1668
1669        for (name, file) in &*self.bs.mem.files.borrow() {
1670            if name == self.bs.mem.stdout_key() {
1671                continue;
1672            }
1673
1674            let sname = name;
1675            let summ = self.bs.events.get_mut(name).unwrap();
1676
1677            if !only_logs && (self.output_format == OutputFormat::Aux) {
1678                // In this mode we're only writing the .aux file. I initially
1679                // wanted to be clever-ish and output all auxiliary-type
1680                // files, but doing so ended up causing non-obvious problems
1681                // for my use case, which involves using Ninja to manage
1682                // dependencies.
1683                if !sname.ends_with(".aux") {
1684                    continue;
1685                }
1686            } else if !self.keep_intermediates
1687                && (summ.access_pattern != AccessPattern::Written
1688                    || ALWAYS_INTERMEDIATE_EXTENSIONS
1689                        .iter()
1690                        .any(|ext| sname.ends_with(ext)))
1691            {
1692                n_skipped_intermediates += 1;
1693                continue;
1694            }
1695
1696            let is_logfile = sname.ends_with(".log") || sname.ends_with(".blg");
1697
1698            if is_logfile && !self.keep_logs {
1699                continue;
1700            }
1701
1702            if !is_logfile && only_logs {
1703                continue;
1704            }
1705
1706            if file.data.is_empty() {
1707                status.note_highlighted(
1708                    "Not writing ",
1709                    &format!("`{sname}`"),
1710                    ": it would be empty.",
1711                );
1712                continue;
1713            }
1714
1715            let real_path = root.join(name);
1716            let byte_len = Byte::from_u128(file.data.len() as u128).unwrap();
1717            status.note_highlighted(
1718                "Writing ",
1719                &format!("`{}`", real_path.display()),
1720                &format!(" ({})", byte_len.get_appropriate_unit(UnitType::Binary)),
1721            );
1722
1723            if let Some(parent) = real_path.parent() {
1724                std::fs::create_dir_all(parent)?;
1725            }
1726
1727            let mut f = File::create(&real_path)?;
1728            f.write_all(&file.data)?;
1729            summ.got_written_to_disk = true;
1730
1731            if let Some(ref mut mf_dest) = mf_dest_maybe {
1732                // Maybe it'd be better to have this just be a warning? But if
1733                // the program is supposed to write the file, you don't want
1734                // it exiting with error code zero if it couldn't do that
1735                // successfully.
1736                //
1737                // Not quite sure why, but I can't pull out the target path
1738                // here. I think 'self' is borrow inside the loop?
1739                ctry!(write!(mf_dest, "{} ", real_path.display()); "couldn't write to Makefile-rules file");
1740            }
1741        }
1742
1743        Ok(n_skipped_intermediates)
1744    }
1745
1746    /// The "default" pass really runs a bunch of sub-passes. It is a "Do What
1747    /// I Mean" operation.
1748    fn default_pass(&mut self, bibtex_first: bool, status: &mut dyn StatusBackend) -> Result<i32> {
1749        // If `bibtex_first` is true, we start by running bibtex, and run
1750        // proceed with the standard rerun logic. Otherwise, we run TeX,
1751        // auto-detect whether we need to run bibtex, possibly run it, and
1752        // then go ahead.
1753
1754        let mut warnings = None;
1755        let mut rerun_result = if bibtex_first {
1756            self.bibtex_pass(status)?;
1757            Some(RerunReason::Bibtex)
1758        } else {
1759            warnings = self.tex_pass(None, status)?;
1760            let maybe_biber = self.check_biber_requirement(status)?;
1761
1762            if let Some(biber) = maybe_biber {
1763                self.bs.external_tool_pass(&biber, status)?;
1764                Some(RerunReason::Biber)
1765            } else if self.is_bibtex_needed() {
1766                self.bibtex_pass(status)?;
1767                Some(RerunReason::Bibtex)
1768            } else {
1769                self.is_rerun_needed(status)
1770            }
1771        };
1772
1773        // Now we enter the main rerun loop.
1774
1775        let (pass_count, reruns_fixed) = match self.tex_rerun_specification {
1776            Some(n) => (n, true),
1777            None => (DEFAULT_MAX_TEX_PASSES, false),
1778        };
1779
1780        for i in 0..pass_count {
1781            let rerun_explanation = if reruns_fixed {
1782                "I was told to".to_owned()
1783            } else {
1784                match rerun_result {
1785                    Some(RerunReason::Biber) => "biber was run".to_owned(),
1786                    Some(RerunReason::Bibtex) => "bibtex was run".to_owned(),
1787                    Some(RerunReason::FileChange(ref s)) => format!("\"{s}\" changed"),
1788                    None => break,
1789                }
1790            };
1791
1792            // We're restarting the engine afresh, so clear the read inputs.
1793            // We do *not* clear the entire HashMap since we want to remember,
1794            // e.g., that bibtex wrote out the .bbl file, since that way we
1795            // can later know that it's OK to delete. I am not super confident
1796            // that the access_pattern data can just be left as-is when we do
1797            // this, but, uh, so far it seems to work.
1798            for summ in self.bs.events.values_mut() {
1799                summ.read_digest = None;
1800            }
1801
1802            warnings = self.tex_pass(Some(&rerun_explanation), status)?;
1803
1804            if !reruns_fixed {
1805                rerun_result = self.is_rerun_needed(status);
1806
1807                if rerun_result.is_some() && i == DEFAULT_MAX_TEX_PASSES - 1 {
1808                    tt_warning!(
1809                        status,
1810                        "TeX rerun seems needed, but stopping at {} passes",
1811                        DEFAULT_MAX_TEX_PASSES
1812                    );
1813                    break;
1814                }
1815            }
1816        }
1817
1818        // The last tex pass generated warnings.
1819        if let Some(warnings) = warnings {
1820            tt_warning!(status, "{}", warnings);
1821        }
1822
1823        // And finally, xdvipdfmx or spx2html. Maybe.
1824
1825        if let OutputFormat::Pdf = self.output_format {
1826            self.xdvipdfmx_pass(status)?;
1827        } else if let OutputFormat::Html = self.output_format {
1828            self.spx2html_pass(status)?;
1829        }
1830
1831        Ok(0)
1832    }
1833
1834    fn is_bibtex_needed(&self) -> bool {
1835        const BIBDATA: &[u8] = b"\\bibdata";
1836
1837        self.bs
1838            .mem
1839            .files
1840            .borrow()
1841            .get(&self.tex_aux_path)
1842            .map(|file| {
1843                // We used to use aho-corasick crate here, but it was removed to reduce the code
1844                // size.
1845                file.data.windows(BIBDATA.len()).any(|s| s == BIBDATA)
1846            })
1847            .unwrap_or(false)
1848    }
1849
1850    /// Use the TeX engine to generate a format file.
1851    #[allow(clippy::manual_split_once)] // requires Rust 1.52 (note that we don't actually define our MSRV)
1852    fn make_format_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
1853        // PathBuf.file_stem() doesn't do what we want since it only strips
1854        // one extension. As of 1.17, the compiler needs a type annotation for
1855        // some reason, which is why we use the `r` variable.
1856        let r: Result<&str> = self.format_name.split('.').next().ok_or_else(|| {
1857            ErrorKind::Msg(format!(
1858                "incomprehensible format file name \"{}\"",
1859                self.format_name
1860            ))
1861            .into()
1862        });
1863        let stem = r?;
1864
1865        let result = {
1866            self.bs
1867                .enter_format_mode(&format!("tectonic-format-{stem}.tex"));
1868            let mut launcher =
1869                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
1870            let r = TexEngine::default()
1871                .halt_on_error_mode(true)
1872                .initex_mode(true)
1873                .shell_escape(self.shell_escape_mode != ShellEscapeMode::Disabled)
1874                .process(&mut launcher, "UNUSED.fmt", "texput");
1875            self.bs.leave_format_mode();
1876            r
1877        };
1878
1879        match result {
1880            Ok(TexOutcome::Spotless) => {}
1881            Ok(TexOutcome::Warnings) => {
1882                tt_warning!(status, "warnings were issued by the TeX engine; use --print and/or --keep-logs for details.");
1883            }
1884            Ok(TexOutcome::Errors) => {
1885                tt_error!(status, "errors were issued by the TeX engine; use --print and/or --keep-logs for details.");
1886                return Err(ErrorKind::Msg("unhandled TeX engine error".to_owned()).into());
1887            }
1888            Err(e) => {
1889                return Err(e.into());
1890            }
1891        }
1892
1893        // Now we can write the format file to its special location. In
1894        // principle we could stream the format file directly to the staging
1895        // area as we ran the TeX engine, but we don't bother.
1896
1897        for (name, file) in &*self.bs.mem.files.borrow() {
1898            if name == self.bs.mem.stdout_key() {
1899                continue;
1900            }
1901
1902            let sname = name;
1903
1904            if !sname.ends_with(".fmt") {
1905                continue;
1906            }
1907
1908            // Note that we intentionally pass 'stem', not 'name'.
1909            ctry!(self.bs.format_cache.write_format(stem, &file.data, status); "cannot write format file {}", sname);
1910        }
1911
1912        // All done. Clear the memory layer since this was a special preparatory step.
1913        self.bs.mem.files.borrow_mut().clear();
1914
1915        Ok(0)
1916    }
1917
1918    /// Run one pass of the TeX engine.
1919    fn tex_pass(
1920        &mut self,
1921        rerun_explanation: Option<&str>,
1922        status: &mut dyn StatusBackend,
1923    ) -> Result<Option<&'static str>> {
1924        let result = {
1925            if let Some(s) = rerun_explanation {
1926                status.note_highlighted("Rerunning ", "TeX", &format!(" because {s} ..."));
1927            } else {
1928                status.note_highlighted("Running ", "TeX", " ...");
1929            }
1930
1931            let mut launcher =
1932                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
1933
1934            // In deterministic mode, we stub a few aspects of the environment.
1935            // They default to a "realistic" view, but we override them with static values:
1936            if self.unstables.deterministic_mode {
1937                launcher.with_expose_absolute_paths(false);
1938                launcher.with_mtime_override(Some(
1939                    self.build_date
1940                        .duration_since(SystemTime::UNIX_EPOCH)
1941                        .map(|x| x.as_secs() as i64)
1942                        .expect("invalid build date in deterministic mode"),
1943                ));
1944            }
1945
1946            TexEngine::default()
1947                .halt_on_error_mode(!self.unstables.continue_on_errors)
1948                .initex_mode(self.output_format == OutputFormat::Format)
1949                .synctex(self.synctex_enabled)
1950                .semantic_pagination(self.output_format == OutputFormat::Html)
1951                .shell_escape(self.shell_escape_mode != ShellEscapeMode::Disabled)
1952                .build_date(self.build_date)
1953                .process(
1954                    &mut launcher,
1955                    &self.format_name,
1956                    &self.primary_input_tex_path,
1957                )
1958        };
1959
1960        let warnings = match result {
1961            Ok(TexOutcome::Spotless) => None,
1962            Ok(TexOutcome::Warnings) =>
1963                    Some("warnings were issued by the TeX engine; use --print and/or --keep-logs for details."),
1964            Ok(TexOutcome::Errors) =>
1965                    Some("errors were issued by the TeX engine, but were ignored; \
1966                         use --print and/or --keep-logs for details."),
1967            Err(e) =>
1968                return Err(e.into()),
1969        };
1970
1971        if !self.bs.mem.files.borrow().contains_key(&self.tex_xdv_path) {
1972            // TeX did not produce the expected output file
1973            tt_warning!(
1974                status,
1975                "did not produce \"{}\"; this may mean that your document is empty",
1976                self.tex_xdv_path
1977            )
1978        }
1979
1980        Ok(warnings)
1981    }
1982
1983    // Run Bibtex process for one .aux file.
1984    fn bibtex_pass_for_one_aux_file(
1985        &mut self,
1986        status: &mut dyn StatusBackend,
1987        aux_file: &String,
1988    ) -> Result<i32> {
1989        let result = {
1990            status.note_highlighted("Running ", "BibTeX", &format!(" on {aux_file} ..."));
1991            let mut launcher =
1992                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
1993            let mut engine = BibtexEngine::new();
1994            engine.process(&mut launcher, aux_file, &self.unstables)
1995        };
1996
1997        match result {
1998            Ok(TexOutcome::Spotless) => {}
1999            Ok(TexOutcome::Warnings) => {
2000                tt_note!(
2001                    status,
2002                    "warnings were issued by BibTeX; use --print and/or --keep-logs for details."
2003                );
2004            }
2005            Ok(TexOutcome::Errors) => {
2006                tt_warning!(
2007                    status,
2008                    "errors were issued by BibTeX, but were ignored; \
2009                     use --print and/or --keep-logs for details."
2010                );
2011            }
2012            Err(e) => {
2013                return Err(e.chain_err(|| ErrorKind::EngineError("BibTeX")));
2014            }
2015        }
2016
2017        Ok(0)
2018    }
2019
2020    fn bibtex_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
2021        let mut aux_files = vec![self.tex_aux_path.clone()];
2022
2023        // find other .aux files generated by tex_pass
2024        for f in self.bs.get_intermediate_file_names() {
2025            if f.ends_with(".aux") && f != self.tex_aux_path {
2026                aux_files.push(f);
2027            }
2028        }
2029
2030        for f in aux_files {
2031            let _r = self.bibtex_pass_for_one_aux_file(status, &f)?;
2032        }
2033
2034        Ok(0)
2035    }
2036
2037    fn xdvipdfmx_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
2038        {
2039            status.note_highlighted("Running ", "xdvipdfmx", " ...");
2040
2041            let mut launcher =
2042                CoreBridgeLauncher::new_with_security(&mut self.bs, status, self.security.clone());
2043            let mut engine = XdvipdfmxEngine::default();
2044
2045            engine.build_date(self.build_date);
2046
2047            if let Some(ref ps) = self.unstables.paper_size {
2048                engine.paper_spec(ps.clone());
2049            }
2050
2051            // BEGIN AWARE REPORTS PATCH
2052            if let Some((x, y)) = self.pdf_origin_offset {
2053                engine.origin_offset(x, y);
2054            }
2055            // END AWARE REPORTS PATCH
2056
2057            engine.process(&mut launcher, &self.tex_xdv_path, &self.tex_pdf_path)?;
2058        }
2059
2060        self.bs.mem.files.borrow_mut().remove(&self.tex_xdv_path);
2061        Ok(0)
2062    }
2063
2064    fn spx2html_pass(&mut self, status: &mut dyn StatusBackend) -> Result<i32> {
2065        {
2066            let mut engine = Spx2HtmlEngine::default();
2067
2068            match (self.html_emit_files, self.output_path.as_ref()) {
2069                (true, Some(p)) => engine.output_base(p),
2070                (false, _) => engine.do_not_emit_files(),
2071                (true, None) => return Err(errmsg!("HTML output must be saved directly to disk")),
2072            };
2073
2074            if let Some(p) = self.html_assets_spec_path.as_ref() {
2075                engine.assets_spec_path(p);
2076            } else if !self.html_emit_assets {
2077                engine.do_not_emit_assets();
2078            }
2079
2080            if let Some(a) = self.html_precomputed_assets.as_ref() {
2081                engine.precomputed_assets(a.clone());
2082            }
2083
2084            status.note_highlighted("Running ", "spx2html", " ...");
2085            engine.process_to_filesystem(&mut self.bs, status, &self.tex_xdv_path)?;
2086        }
2087
2088        self.bs.mem.files.borrow_mut().remove(&self.tex_xdv_path);
2089        Ok(0)
2090    }
2091
2092    /// Get what was printed to standard output, if anything.
2093    pub fn get_stdout_content(&self) -> Vec<u8> {
2094        self.bs
2095            .mem
2096            .files
2097            .borrow()
2098            .get(self.bs.mem.stdout_key())
2099            .map(|mfi| mfi.data.clone())
2100            .unwrap_or_default()
2101    }
2102
2103    /// Consume this session and return the current set of files in memory.
2104    ///
2105    /// This convenience function tries to help with the annoyances of getting
2106    /// access to the in-memory file data after the engine has been run.
2107    pub fn into_file_data(self) -> MemoryFileCollection {
2108        Rc::try_unwrap(self.bs.mem.files)
2109            .expect("multiple strong refs to MemoryIo files")
2110            .into_inner()
2111    }
2112
2113    /// See if we need to run `biber`, and parse the `.run.xml` file from the
2114    /// `loqreq` package to figure out what files `biber` needs. This
2115    /// functionality should probably become more generic, but I don't have a
2116    /// great sense as to how widely-used `logreq` is.
2117    fn check_biber_requirement(
2118        &self,
2119        status: &mut dyn StatusBackend,
2120    ) -> Result<Option<ExternalToolPass>> {
2121        // Is there a `.run.xml` file?
2122
2123        let mut run_xml_path = PathBuf::from(&self.primary_input_tex_path);
2124        run_xml_path.set_extension("run.xml");
2125        let run_xml_path = run_xml_path.display().to_string();
2126
2127        let mem_files = &*self.bs.mem.files.borrow();
2128        let run_xml_entry = match mem_files.get(&run_xml_path) {
2129            Some(e) => e,
2130            None => return Ok(None),
2131        };
2132
2133        // Yes, there is. Set up to potentially run biber. For testing support,
2134        // we let the rig specify a custom executable to use, which lets us
2135        // exercise different pieces of the external-tool behavior.
2136
2137        let s = (
2138            crate::config::is_config_test_mode_activated(),
2139            std::env::var("TECTONIC_TEST_FAKE_BIBER"),
2140        );
2141
2142        let mut argv = match s {
2143            (true, Ok(text)) if !text.trim().is_empty() => {
2144                text.split_whitespace().map(|x| x.to_owned()).collect()
2145            }
2146            // when `TECTONIC_TEST_FAKE_BIBER` is empty, proceed to discover
2147            // the biber binary as follows.
2148            _ => vec!["biber".to_owned()],
2149        };
2150
2151        // Moreover, we allow an override of the biber executable, to cope with
2152        // possible version mismatch of the bundled biblatex package, as filed
2153        // in issue #893. Since PR #1103, the `tectonic-biber` override can
2154        // also be invoked with `tectonic -X biber`.
2155        let find_by = |binary_name: &str| -> Option<String> {
2156            if let Ok(pathbuf) = which(binary_name) {
2157                if let Some(biber_path) = pathbuf.to_str() {
2158                    return Some(biber_path.to_owned());
2159                }
2160            }
2161            None
2162        };
2163
2164        let mut use_tectonic_biber_override = false;
2165        for binary_name in ["./tectonic-biber", "tectonic-biber"] {
2166            if let Some(biber_path) = find_by(binary_name) {
2167                argv = vec![biber_path];
2168                use_tectonic_biber_override = true;
2169                break;
2170            }
2171        }
2172
2173        let mut extra_requires = HashSet::new();
2174
2175        // Do a sketchy XML parse to see if there's info about a biber
2176        // invocation.
2177
2178        #[derive(Clone, Copy, Debug, Eq, PartialEq)]
2179        enum State {
2180            /// Searching for the biber section
2181            Searching,
2182
2183            /// In a <binary> element. Will its value be "biber"??!?
2184            InBinaryName,
2185
2186            /// In the <cmdline> part of the biber section.
2187            InBiberCmdline,
2188
2189            /// About to read an argument to the biber command.
2190            InBiberArgument,
2191
2192            /// Reading through the post-cmdline part of the biber section.
2193            InBiberRemainder,
2194
2195            /// In a "requirement" section like <input> or <requires> that contains
2196            /// filenames we should provide
2197            InBiberRequirementSection,
2198
2199            /// In a <file> requirement
2200            InBiberFileRequirement,
2201        }
2202
2203        let curs = Cursor::new(&run_xml_entry.data[..]);
2204        let mut reader = NsReader::from_reader(curs);
2205        let mut buf = Vec::new();
2206        let mut state = State::Searching;
2207
2208        loop {
2209            let event = ctry!(
2210                reader.read_event_into(&mut buf);
2211                "error parsing run.xml file"
2212            );
2213
2214            if let Event::Eof = event {
2215                break;
2216            }
2217
2218            match (state, event) {
2219                (State::Searching, Event::Start(ref e)) => {
2220                    let name = reader
2221                        .decoder()
2222                        .decode(e.local_name().into_inner())
2223                        .map_err(quick_xml::Error::from)?;
2224
2225                    if name == "binary" {
2226                        state = State::InBinaryName;
2227                    }
2228                }
2229
2230                (State::InBinaryName, Event::Text(ref e)) => {
2231                    let text = e.unescape()?;
2232
2233                    state = if &text == "biber" {
2234                        State::InBiberCmdline
2235                    } else {
2236                        State::Searching
2237                    };
2238                }
2239
2240                (State::InBinaryName, _) => {
2241                    state = State::Searching;
2242                }
2243
2244                (State::InBiberCmdline, Event::Start(ref e)) => {
2245                    let name = reader
2246                        .decoder()
2247                        .decode(e.local_name().into_inner())
2248                        .map_err(quick_xml::Error::from)?;
2249
2250                    // Note that the "infile" might be `foo` without the `.bcf`
2251                    // extension, so we can't use it for file-finding.
2252                    state = match &*name {
2253                        "infile" | "outfile" | "option" => State::InBiberArgument,
2254                        _ => State::InBiberRemainder,
2255                    }
2256                }
2257
2258                (State::InBiberCmdline, Event::End(ref e)) => {
2259                    let name = reader
2260                        .decoder()
2261                        .decode(e.local_name().into_inner())
2262                        .map_err(quick_xml::Error::from)?;
2263
2264                    if name == "cmdline" {
2265                        state = State::InBiberRemainder;
2266                    }
2267                }
2268
2269                (State::InBiberArgument, Event::Text(ref e)) => {
2270                    argv.push(e.unescape()?.to_string());
2271                    state = State::InBiberCmdline;
2272                }
2273
2274                (State::InBiberRemainder, Event::Start(ref e)) => {
2275                    let name = reader
2276                        .decoder()
2277                        .decode(e.local_name().into_inner())
2278                        .map_err(quick_xml::Error::from)?;
2279
2280                    state = match &*name {
2281                        "input" | "requires" => State::InBiberRequirementSection,
2282                        _ => State::InBiberRemainder,
2283                    }
2284                }
2285
2286                (State::InBiberRemainder, Event::End(ref e)) => {
2287                    let name = reader
2288                        .decoder()
2289                        .decode(e.local_name().into_inner())
2290                        .map_err(quick_xml::Error::from)?;
2291
2292                    if name == "external" {
2293                        break;
2294                    }
2295                }
2296
2297                (State::InBiberRequirementSection, Event::Start(ref e)) => {
2298                    let name = reader
2299                        .decoder()
2300                        .decode(e.local_name().into_inner())
2301                        .map_err(quick_xml::Error::from)?;
2302
2303                    state = match &*name {
2304                        "file" => State::InBiberFileRequirement,
2305                        _ => State::InBiberRemainder,
2306                    }
2307                }
2308
2309                (State::InBiberRequirementSection, Event::End(ref e)) => {
2310                    let name = reader
2311                        .decoder()
2312                        .decode(e.local_name().into_inner())
2313                        .map_err(quick_xml::Error::from)?;
2314
2315                    if name == "input" || name == "requires" {
2316                        state = State::InBiberRemainder;
2317                    }
2318                }
2319
2320                (State::InBiberFileRequirement, Event::Text(ref e)) => {
2321                    extra_requires.insert(e.unescape()?.to_string());
2322                    state = State::InBiberRequirementSection;
2323                }
2324
2325                (State::InBiberFileRequirement, _) => {
2326                    state = State::InBiberRequirementSection;
2327                }
2328
2329                _ => {}
2330            }
2331        }
2332
2333        // All done!
2334
2335        Ok(if state == State::Searching {
2336            // No biber invocation, in the end.
2337            None
2338        } else {
2339            if use_tectonic_biber_override {
2340                tt_note!(status, "using `tectonic-biber`, found at {}", argv[0]);
2341            }
2342            Some(ExternalToolPass {
2343                argv,
2344                extra_requires,
2345            })
2346        })
2347    }
2348}