Skip to main content

tectonic_bundles/
itar.rs

1// Copyright 2017-2021 the Tectonic Project
2// Licensed under the MIT License.
3
4//! The web-friendly "indexed tar" bundle backend.
5//!
6//! The main type offered by this module is the [`ItarBundle`] struct,
7//! which can (but should not) be used directly as any other bundle.
8//!
9//! Instead, wrap it in a [`crate::BundleCache`] for filesystem-backed
10//! caching.
11//!
12//! While the on-server file format backing the "indexed tar" backend is indeed
13//! a standard `tar` file, as far as the client is concerned, this backend is
14//! centered on HTTP byte-range requests. For each file contained in the backing
15//! resource, the index file merely contains a byte offset and length that are
16//! then used to construct an HTTP Range request to obtain the file as needed.
17
18use crate::{Bundle, CachableBundle, FileIndex, FileInfo, NET_RETRY_ATTEMPTS, NET_RETRY_SLEEP_MS};
19use flate2::read::GzDecoder;
20use std::{
21    collections::HashMap,
22    io::{BufRead, BufReader, Cursor, Read},
23    str::FromStr,
24    thread,
25    time::Duration,
26};
27use tectonic_errors::prelude::*;
28use tectonic_geturl::{DefaultBackend, DefaultRangeReader, GetUrlBackend, RangeReader};
29use tectonic_io_base::{digest, InputHandle, InputOrigin, IoProvider, OpenResult};
30use tectonic_status_base::{tt_note, tt_warning, NoopStatusBackend, StatusBackend};
31
32/// The internal file-information struct used by the [`ItarBundle`].
33#[derive(Clone, Debug)]
34pub struct ItarFileInfo {
35    name: String,
36    offset: u64,
37    length: usize,
38}
39
40impl FileInfo for ItarFileInfo {
41    fn name(&self) -> &str {
42        &self.name
43    }
44    fn path(&self) -> &str {
45        &self.name
46    }
47}
48
49/// A simple FileIndex for compatiblity with [`crate::BundleCache`]
50#[derive(Default, Debug, Clone)]
51pub struct ItarFileIndex {
52    content: HashMap<String, ItarFileInfo>,
53}
54
55impl<'this> FileIndex<'this> for ItarFileIndex {
56    type InfoType = ItarFileInfo;
57
58    fn iter(&'this self) -> Box<dyn Iterator<Item = &'this ItarFileInfo> + 'this> {
59        Box::new(self.content.values())
60    }
61
62    fn len(&self) -> usize {
63        self.content.len()
64    }
65
66    fn initialize(&mut self, reader: &mut dyn Read) -> Result<()> {
67        self.content.clear();
68
69        for line in BufReader::new(reader).lines() {
70            let line = line?;
71            let mut bits = line.split_whitespace();
72
73            if let (Some(name), Some(offset), Some(length)) =
74                (bits.next(), bits.next(), bits.next())
75            {
76                self.content.insert(
77                    name.to_owned(),
78                    ItarFileInfo {
79                        name: name.to_owned(),
80                        offset: offset.parse::<u64>()?,
81                        length: length.parse::<usize>()?,
82                    },
83                );
84            } else {
85                // TODO: preserve the warning info or something!
86                bail!("malformed index line");
87            }
88        }
89        Ok(())
90    }
91
92    /// Find a file in this index
93    fn search(&'this mut self, name: &str) -> Option<ItarFileInfo> {
94        self.content.get(name).cloned()
95    }
96}
97
98/// The old-fashoned Tectonic web bundle format.
99pub struct ItarBundle {
100    url: String,
101    /// Maps all available file names to [`FileInfo`]s.
102    /// This is empty after we create this bundle, so we don't need network
103    /// to make an object. It is automatically filled by get_index when we need it.
104    index: ItarFileIndex,
105
106    /// RangeReader object, responsible for sending queries.
107    /// Will be None when the object is created, automatically
108    /// replaced with Some(...) once needed.
109    reader: Option<DefaultRangeReader>,
110}
111
112impl ItarBundle {
113    /// Make a new ItarBundle.
114    /// This method does not require network access.
115    /// It will succeed even in we can't connect to the bundle, or if we're given a bad url.
116    pub fn new(url: String) -> Result<ItarBundle> {
117        Ok(ItarBundle {
118            index: ItarFileIndex::default(),
119            reader: None,
120            url,
121        })
122    }
123
124    fn connect_reader(&mut self) {
125        let geturl_backend = DefaultBackend::default();
126        // Connect reader if it is not already connected
127        if self.reader.is_none() {
128            self.reader = Some(geturl_backend.open_range_reader(&self.url));
129        }
130    }
131
132    /// Fill this bundle's index, if it is empty.
133    fn ensure_index(&mut self) -> Result<()> {
134        // Fetch index if it is empty
135        if self.index.is_initialized() {
136            return Ok(());
137        }
138        self.connect_reader();
139
140        let mut reader = self.get_index_reader()?;
141        self.index.initialize(&mut reader)?;
142
143        Ok(())
144    }
145}
146
147impl IoProvider for ItarBundle {
148    fn input_open_name(
149        &mut self,
150        name: &str,
151        status: &mut dyn StatusBackend,
152    ) -> OpenResult<InputHandle> {
153        if let Err(e) = self.ensure_index() {
154            return OpenResult::Err(e);
155        };
156
157        let info = match self.index.search(name) {
158            Some(a) => a,
159            None => return OpenResult::NotAvailable,
160        };
161
162        // Retries are handled in open_fileinfo,
163        // since BundleCache never calls input_open_name.
164        self.open_fileinfo(&info, status)
165    }
166}
167
168impl Bundle for ItarBundle {
169    fn all_files(&self) -> Vec<String> {
170        self.index.iter().map(|x| x.path().to_owned()).collect()
171    }
172
173    fn get_digest(&mut self) -> Result<tectonic_io_base::digest::DigestData> {
174        let digest_text = match self.input_open_name(digest::DIGEST_NAME, &mut NoopStatusBackend {})
175        {
176            OpenResult::Ok(h) => {
177                let mut text = String::new();
178                h.take(64).read_to_string(&mut text)?;
179                text
180            }
181
182            OpenResult::NotAvailable => {
183                // Broken or un-cacheable backend.
184                bail!("bundle does not provide needed SHA256SUM file");
185            }
186
187            OpenResult::Err(e) => {
188                return Err(e);
189            }
190        };
191
192        Ok(atry!(digest::DigestData::from_str(&digest_text); ["corrupted SHA256 digest data"]))
193    }
194}
195
196impl CachableBundle<'_, ItarFileIndex> for ItarBundle {
197    fn get_location(&mut self) -> String {
198        self.url.clone()
199    }
200
201    fn initialize_index(&mut self, source: &mut dyn Read) -> Result<()> {
202        // BEGIN AWARE REPORTS PATCH
203        // Parsing the bundle index (tens of thousands of entries) costs tens
204        // of milliseconds, and a process that renders in several passes (the
205        // ARL measurement pass + final pass) builds a fresh bundle per pass,
206        // re-parsing the identical index each time. Memoize the parsed index
207        // process-wide, keyed by bundle URL. The index for a given URL is
208        // immutable for the life of a process.
209        {
210            let memo = index_memo().lock().unwrap();
211            if let Some(idx) = memo.get(&self.url) {
212                self.index = idx.clone();
213                return Ok(());
214            }
215        }
216        self.index.initialize(source)?;
217        index_memo()
218            .lock()
219            .unwrap()
220            .insert(self.url.clone(), self.index.clone());
221        // END AWARE REPORTS PATCH
222        Ok(())
223    }
224
225    fn index(&mut self) -> &mut ItarFileIndex {
226        &mut self.index
227    }
228
229    fn search(&mut self, name: &str) -> Option<ItarFileInfo> {
230        self.index.search(name)
231    }
232
233    fn get_index_reader(&mut self) -> Result<Box<dyn Read>> {
234        let mut geturl_backend = DefaultBackend::default();
235        let index_url = format!("{}.index.gz", &self.url);
236        let reader = GzDecoder::new(geturl_backend.get_url(&index_url)?);
237        Ok(Box::new(reader))
238    }
239
240    fn open_fileinfo(
241        &mut self,
242        info: &ItarFileInfo,
243        status: &mut dyn StatusBackend,
244    ) -> OpenResult<InputHandle> {
245        match self.ensure_index() {
246            Ok(_) => {}
247            Err(e) => return OpenResult::Err(e),
248        };
249
250        // BEGIN AWARE REPORTS PATCH
251        // When BundleCache pre-loads the index from the local cache,
252        // ensure_index() early-returns without ever connecting the range
253        // reader, and the unwrap below panics on any cache miss. Upstream
254        // 0.4.x only avoided this because its warm-cache get_digest()
255        // phone-home connected the reader as a side effect. Connect lazily
256        // here — exactly when a miss actually needs the network.
257        self.connect_reader();
258        // END AWARE REPORTS PATCH
259
260        let mut v = Vec::with_capacity(info.length);
261        tt_note!(status, "downloading {}", info.name);
262
263        // Edge case for zero-sized reads
264        // (these cause errors on some web hosts)
265        if info.length == 0 {
266            return OpenResult::Ok(InputHandle::new_read_only(
267                info.name.to_owned(),
268                Cursor::new(v),
269                InputOrigin::Other,
270            ));
271        }
272
273        // Get file with retries
274        for i in 0..NET_RETRY_ATTEMPTS {
275            let mut stream = match self
276                .reader
277                .as_mut()
278                .unwrap()
279                .read_range(info.offset, info.length)
280            {
281                Ok(r) => r,
282                Err(e) => {
283                    tt_warning!(status,
284                        "failure fetching \"{}\" from network ({}/{NET_RETRY_ATTEMPTS})",
285                        info.name, i+1; e
286                    );
287                    thread::sleep(Duration::from_millis(NET_RETRY_SLEEP_MS));
288                    continue;
289                }
290            };
291
292            match stream.read_to_end(&mut v) {
293                Ok(_) => {}
294                Err(e) => {
295                    tt_warning!(status,
296                        "failure downloading \"{}\" from network ({}/{NET_RETRY_ATTEMPTS})",
297                        info.name, i+1; e.into()
298                    );
299                    thread::sleep(Duration::from_millis(NET_RETRY_SLEEP_MS));
300                    continue;
301                }
302            };
303
304            return OpenResult::Ok(InputHandle::new_read_only(
305                info.name.to_owned(),
306                Cursor::new(v),
307                InputOrigin::Other,
308            ));
309        }
310
311        OpenResult::Err(anyhow!(
312            "failed to download \"{}\"; please check your network connection.",
313            info.name
314        ))
315    }
316}
317
318// BEGIN AWARE REPORTS PATCH
319/// Process-wide memo of parsed bundle indices, keyed by bundle URL.
320fn index_memo() -> &'static std::sync::Mutex<std::collections::HashMap<String, ItarFileIndex>> {
321    static MEMO: std::sync::OnceLock<
322        std::sync::Mutex<std::collections::HashMap<String, ItarFileIndex>>,
323    > = std::sync::OnceLock::new();
324    MEMO.get_or_init(Default::default)
325}
326// END AWARE REPORTS PATCH