Skip to main content

tectonic/io/
memory.rs

1// src/io/memory.rs -- I/O to in-memory buffers
2// Copyright 2016-2020 the Tectonic Project
3// Licensed under the MIT License.
4
5//! MemoryIo is an IoProvider that stores "files" in in-memory buffers.
6
7use std::{
8    cell::RefCell,
9    collections::HashMap,
10    io::{self, Cursor, Read, Seek, SeekFrom, Write},
11    rc::Rc,
12    time::SystemTime,
13};
14use tectonic_errors::Result;
15use tectonic_status_base::StatusBackend;
16
17use super::{
18    normalize_tex_path, InputFeatures, InputHandle, InputOrigin, IoProvider, OpenResult,
19    OutputHandle,
20};
21
22/// Information about a file created or used inside the memory-backed I/O
23/// provider.
24#[derive(Clone, Debug, Eq, PartialEq)]
25pub struct MemoryFileInfo {
26    // TODO: smarter buffering structure than Vec<u8>? E.g., linked list of 4k
27    // chunks or something. In the current scheme reallocations will get
28    // expensive.
29    /// Raw file bytes
30    pub data: Vec<u8>,
31    /// Last modification time of the in-memory file
32    pub unix_mtime: Option<i64>,
33}
34
35/// A collection of files created or used inside a memory-backed I/O provider.
36pub type MemoryFileCollection = HashMap<String, MemoryFileInfo>;
37
38/// When a file is "opened", we create a MemoryIoItem struct that tracks the
39/// data, seek cursor state, etc.
40struct MemoryIoItem {
41    // This is the best way I can come up with to allow the file object to
42    // update its data in its parent data structure.
43    files: Rc<RefCell<MemoryFileCollection>>,
44
45    name: String,
46    state: Cursor<Vec<u8>>,
47    unix_mtime: Option<i64>,
48    was_modified: bool,
49}
50
51/// Get the current time as a Unix time, in a manner consistent with our Unix
52/// file modification time API. We choose to make this function infallible
53/// rather than injecting a bunch of Results.
54fn now_as_unix_time() -> i64 {
55    // No cleaner way to convert a SystemTime to time_t, as far as I can
56    // tell.
57    let now = SystemTime::now();
58    let dur = match now.duration_since(SystemTime::UNIX_EPOCH) {
59        Ok(d) => d,
60        Err(_) => return 0, // indicates error to C code, if it cares
61    };
62    dur.as_secs() as i64
63}
64
65impl MemoryIoItem {
66    pub fn new(
67        files: &Rc<RefCell<MemoryFileCollection>>,
68        name: &str,
69        truncate: bool,
70    ) -> MemoryIoItem {
71        let (cur_data, cur_mtime) = match files.borrow_mut().remove(name) {
72            Some(info) => {
73                if truncate {
74                    (Vec::new(), Some(now_as_unix_time()))
75                } else {
76                    (info.data, info.unix_mtime)
77                }
78            }
79            None => (Vec::new(), Some(now_as_unix_time())),
80        };
81
82        MemoryIoItem {
83            files: files.clone(),
84            name: name.to_owned(),
85            state: Cursor::new(cur_data),
86            unix_mtime: cur_mtime,
87            was_modified: false,
88        }
89    }
90}
91
92impl Read for MemoryIoItem {
93    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
94        self.state.read(buf)
95    }
96}
97
98impl Write for MemoryIoItem {
99    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
100        self.was_modified = true;
101        self.state.write(buf)
102    }
103
104    fn flush(&mut self) -> io::Result<()> {
105        self.state.flush()
106    }
107}
108
109impl Seek for MemoryIoItem {
110    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
111        self.state.seek(pos)
112    }
113}
114
115impl InputFeatures for MemoryIoItem {
116    fn get_size(&mut self) -> Result<usize> {
117        Ok(self.state.get_ref().len())
118    }
119
120    fn get_unix_mtime(&mut self) -> Result<Option<i64>> {
121        Ok(self.unix_mtime)
122    }
123
124    fn try_seek(&mut self, pos: SeekFrom) -> Result<u64> {
125        Ok(self.state.seek(pos)?)
126    }
127}
128
129impl Drop for MemoryIoItem {
130    fn drop(&mut self) {
131        let unix_mtime = if self.was_modified {
132            Some(now_as_unix_time())
133        } else {
134            self.unix_mtime
135        };
136
137        // I think split_off() is an efficient way to move our data vector
138        // back into the hashmap? Ideally we could "consume" self but I don't
139        // believe that's possible in a Drop implementation.
140        let mut mfiles = self.files.borrow_mut();
141        mfiles.insert(
142            self.name.clone(),
143            MemoryFileInfo {
144                data: self.state.get_mut().split_off(0),
145                unix_mtime,
146            },
147        );
148    }
149}
150
151/// An I/O driver backed by a collection of in-memory files.
152pub struct MemoryIo {
153    /// Map of file paths to in-memory file data in this I/O provider.
154    pub files: Rc<RefCell<MemoryFileCollection>>,
155    stdout_allowed: bool,
156}
157
158impl MemoryIo {
159    /// Create a new memory-backed I/O. `stdout_allowed` controls whether attempts to open stdout
160    /// on this type succeed or fail.
161    pub fn new(stdout_allowed: bool) -> MemoryIo {
162        MemoryIo {
163            files: Rc::new(RefCell::new(HashMap::new())),
164            stdout_allowed,
165        }
166    }
167
168    /// Create a new entry into the backing file collection, with automatically generated
169    /// modification time.
170    pub fn create_entry(&mut self, name: &str, data: Vec<u8>) {
171        let mut mfiles = self.files.borrow_mut();
172        mfiles.insert(
173            name.to_owned(),
174            MemoryFileInfo {
175                data,
176                unix_mtime: Some(now_as_unix_time()),
177            },
178        );
179    }
180
181    /// Name to use for stdout file
182    pub fn stdout_key(&self) -> &str {
183        ""
184    }
185}
186
187impl IoProvider for MemoryIo {
188    fn output_open_name(&mut self, name: &str) -> OpenResult<OutputHandle> {
189        if name.is_empty() {
190            return OpenResult::NotAvailable;
191        }
192
193        let name = normalize_tex_path(name);
194
195        let oh = OutputHandle::new(name.clone(), MemoryIoItem::new(&self.files, &name, true));
196
197        // `hyperxmp.sty` does a thing where it tries to get today's date by
198        // calling \filemoddate on `\jobname.log`. That essentially relies on it
199        // being possible to \openin an \openout file that hasn't yet been
200        // closed. I think that it's reasonable to allow that, if we just
201        // provide null data and don't try to support intermixed reads and
202        // writes. We have to do this after creating the MemoryIoItem since that
203        // step removes any preexisting entry from the table of files.
204        self.create_entry(&name, Vec::new());
205
206        OpenResult::Ok(oh)
207    }
208
209    fn output_open_stdout(&mut self) -> OpenResult<OutputHandle> {
210        if !self.stdout_allowed {
211            return OpenResult::NotAvailable;
212        }
213
214        OpenResult::Ok(OutputHandle::new(
215            self.stdout_key(),
216            MemoryIoItem::new(&self.files, self.stdout_key(), true),
217        ))
218    }
219
220    fn input_open_name(
221        &mut self,
222        name: &str,
223        _status: &mut dyn StatusBackend,
224    ) -> OpenResult<InputHandle> {
225        if name.is_empty() {
226            return OpenResult::NotAvailable;
227        }
228
229        let name = normalize_tex_path(name);
230
231        if self.files.borrow().contains_key(&*name) {
232            OpenResult::Ok(InputHandle::new(
233                name.clone(),
234                MemoryIoItem::new(&self.files, &name, false),
235                InputOrigin::Other,
236            ))
237        } else {
238            OpenResult::NotAvailable
239        }
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use super::*;
246    use crate::status::NoopStatusBackend;
247    use std::io::{BufRead, BufReader};
248
249    /// Early versions had a bug where files were not truncated when opened
250    /// for writing, which led to junk after the intended EOF when the engine
251    /// ran for multiple passes and the file shrank from one pass to the next.
252    #[test]
253    fn shrinking_file() {
254        let mut mem = MemoryIo::new(false);
255        let name = "test.tex";
256        let mut sb = NoopStatusBackend::default();
257
258        // Write a line to a file, then (implicitly) close it.
259        {
260            let mut h = mem.output_open_name(name).unwrap();
261            writeln!(h, "0123456789").unwrap();
262        }
263
264        // Reopen the file for input, then close it.
265        {
266            mem.input_open_name(name, &mut sb).unwrap();
267        }
268
269        // Open for input yet again; file should *not* have been truncated.
270        {
271            let h = mem.input_open_name(name, &mut sb).unwrap();
272            let mut br = BufReader::new(h);
273            let mut s = String::new();
274            br.read_line(&mut s).unwrap();
275            assert_eq!(s.len(), 11);
276        }
277
278        // Now open for output and write a shorter line.
279        {
280            let mut h = mem.output_open_name(name).unwrap();
281            writeln!(h, "0123").unwrap();
282        }
283
284        // Open for input one last time; file should now have been truncated.
285        {
286            let h = mem.input_open_name(name, &mut sb).unwrap();
287            let mut br = BufReader::new(h);
288            let mut s = String::new();
289            br.read_line(&mut s).unwrap();
290            assert_eq!(s.len(), 5);
291            s.clear();
292            br.read_line(&mut s).unwrap();
293            assert_eq!(s.len(), 0);
294        }
295    }
296}