1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
use std::io::{Error, ErrorKind, Read, Result, Seek, SeekFrom};

use byteorder::{LittleEndian, ReadBytesExt};
use shallow_tees::ShallowTees;
use takes::Ext;
use typed_builder::TypedBuilder;

use super::util::read_find_bstr;
use super::{index, Entry, FileIndex, Section};
use crate::signature::{self, Signature};
use crate::util::{tell, PHAR_TERMINATOR, STUB_TERMINATOR};

/// The metadata of a phar file.
#[derive(Debug)]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "reader")))]
pub struct Reader<R: Read + Seek, FileIndexT: FileIndex = index::NameHashMap> {
    stream: R,
    stub: Section,
    num_files: u32,
    api: u16,
    flags: u32,
    alias: Section,
    metadata: Section,
    file_index: FileIndexT,
}

impl<R: Read + Seek, FileIndexT: FileIndex> Reader<R, FileIndexT> {
    /// Parses the phar file.
    ///
    /// This optionally validates the signature.
    /// Stub, metadata and file metadata are not fully validated,
    /// and may not be saved in memory depending on `options`.
    pub fn read(mut read: R, options: Options) -> Result<Self> {
        let mut expected_sig = None;
        let mut sig_offset = None;

        let mut sig = if options.verify_signature {
            let _ = read.seek(SeekFrom::End(-4))?;
            let mut gbmb = [0u8; 4];
            read.read_exact(&mut gbmb[..])?;
            if gbmb != PHAR_TERMINATOR {
                return Err(Error::new(ErrorKind::Other, "corrupted file"));
            }

            let _ = read.seek(SeekFrom::End(-8))?;
            let discrim = read.read_u32::<LittleEndian>()?;
            let sig = Signature::from_u32(discrim).ok_or_else(|| {
                Error::new(
                    ErrorKind::Other,
                    format!("unsupported signature type {:x}", discrim),
                )
            })?;

            let mut expect = vec![0u8; sig.size().into()];
            let offset = read.seek(SeekFrom::End(-8i64 - i64::from(sig.size())))?;
            sig_offset = Some(offset);
            read.read_exact(&mut expect[..])?;
            expected_sig = Some(expect);

            signature::MaybeDummy::Real(sig)
        } else {
            signature::MaybeDummy::Dummy(signature::NullDevice)
        };

        let _ = read.seek(SeekFrom::Start(0))?;

        let mut tee = ShallowTees::new(&mut read, sig.write());

        let mut stub = Section::create(options.cache_stub, 0);
        read_find_bstr(&mut tee, &mut stub, STUB_TERMINATOR)?;

        let manifest_size = tee.read_u32::<LittleEndian>()?;
        let mut manifest = (&mut tee).takes(manifest_size.into())?;

        let num_files = manifest.read_u32::<LittleEndian>()?;
        let api = manifest.read_u16::<LittleEndian>()?;
        let flags = manifest.read_u32::<LittleEndian>()?;

        let alias_len = manifest.read_u32::<LittleEndian>()?;
        let mut alias = Section::create(options.cache_alias, tell(&mut manifest)?);
        alias.from_read(&mut manifest, alias_len)?;

        let metadata_len = manifest.read_u32::<LittleEndian>()?;
        let mut metadata = Section::create(options.cache_metadata, tell(&mut manifest)?);
        metadata.from_read(&mut manifest, metadata_len)?;

        let mut file_index = FileIndexT::default();
        if FileIndexT::scan_files() {
            for _ in 0..num_files {
                let start = tell(&mut manifest)?;
                let entry = Entry::parse(
                    &mut manifest,
                    FileIndexT::requires_name(),
                    FileIndexT::requires_metadata(),
                )?;
                file_index.feed_entry(start, entry)?;
            }
        }
        file_index.end_of_header(tell(&mut manifest)?);

        if let (Some(expected_sig), Some(sig_offset)) = (expected_sig, sig_offset) {
            let _ = tee.seek(SeekFrom::Start(sig_offset))?;
            drop(tee);
            let sig = match sig {
                signature::MaybeDummy::Real(sig) => sig,
                signature::MaybeDummy::Dummy(_) => {
                    unreachable!("expected_sig, sig_offset should be None")
                }
            };
            let ret = sig.finalize();

            if ret[..] != expected_sig[..] {
                return Err(Error::new(ErrorKind::Other, "signature mismatch"));
            }
        }

        Ok(Reader {
            stream: read,
            stub,
            num_files,
            api,
            flags,
            alias,
            metadata,
            file_index,
        })
    }

    /// Returns the stub as a slice.
    ///
    /// If the stub was previously not stored in memory, it is stored in a new Vec.
    /// Consider using `stub_read()` instead if `cache_stub` is false
    /// and storing the stub in memory is not intended.
    pub fn stub_bytes(&mut self) -> Result<impl AsRef<[u8]> + '_> {
        self.stub.as_memory(&mut self.stream)
    }

    /// Returns the stub as an `io::Read`.
    pub fn stub_read(&mut self) -> Result<impl Read + '_> {
        self.stub.as_read(&mut self.stream)
    }

    /// Returns the metadata as a slice.
    ///
    /// If the metadata was previously not stored in memory, it is stored in a new Vec.
    /// Consider using `metadata_read()` instead if `cache_metadata` is false
    /// and storing the metadata in memory is not intended.
    pub fn metadata_bytes(&mut self) -> Result<impl AsRef<[u8]> + '_> {
        self.metadata.as_memory(&mut self.stream)
    }

    /// Returns the metadata as an `io::Read`.
    pub fn metadata_read(&mut self) -> Result<impl Read + '_> {
        self.metadata.as_read(&mut self.stream)
    }
}

impl<R: Read + Seek, FileIndexT: index::Iterable> Reader<R, FileIndexT> {
    /// Iterates over the files in this archive.
    pub fn for_each_file<F>(&mut self, f: F) -> Result<()>
    where
        F: FnMut(&[u8], &mut (dyn Read)) -> Result<()>,
    {
        self.file_index.for_each_file(&mut self.stream, f)
    }

    /// Iterates over the files in this archive and fold return values.
    pub fn for_each_file_fold<F, G, T, U>(&mut self, f: F, fold: G) -> Result<Option<T>>
    where
        F: FnMut(&[u8], &mut (dyn Read)) -> Result<U>,
        G: FnMut(Option<T>, U) -> T,
    {
        self.file_index
            .for_each_file_fold(&mut self.stream, f, fold)
    }
}

/// Options for reading phar archives
#[derive(Default, TypedBuilder)]
pub struct Options {
    /// Whether to cache the phar stub in memory
    ///
    /// Default true.
    /// If set to false, stub is read from the input `R` again
    /// when it is queried by the user.
    /// False is only recommended when stub is not going to be used.
    #[builder(default = true)]
    cache_stub: bool,
    /// Whether to cache the phar alias in memory
    ///
    /// Default true.
    /// If set to false, alias is read from the input `R` again
    /// when it is queried by the user.
    /// False is only recommended when stub is not going to be used.
    #[builder(default = true)]
    cache_alias: bool,
    /// Whether to cache the phar metadata string in memory
    ///
    /// Default true.
    /// If set to false, metadata is read from the input `R` again
    /// when it is queried by the user.
    /// False is only recommended when stub is not going to be used.
    #[builder(default = true)]
    cache_metadata: bool,

    /// Whether to verify the phar signature.
    ///
    /// Default true.
    /// When true, the whole file is scanned at least once
    /// when the file is first parsed.
    /// When false, unused bytes would be skipped (with `fseek(3)`)
    /// instead of being read into buffer.
    #[builder(default = true)]
    verify_signature: bool,
}