Skip to main content

phar/write/
writer.rs

1use std::borrow::Cow;
2use std::convert::TryInto;
3use std::ffi::OsStr;
4use std::io::{self, Error, ErrorKind, Read, Result, Seek, SeekFrom, Write};
5use std::path::Path;
6use std::time::{SystemTime, UNIX_EPOCH};
7
8use byteorder::{LittleEndian, WriteBytesExt};
9use walkdir::WalkDir;
10
11use super::util::{write_bstr, Crc32Writer, MultiWrite};
12use crate::signature::Signature;
13use crate::util::{tell, PHAR_TERMINATOR, STUB_TERMINATOR};
14use crate::Compression;
15
16/// Creates a phar file.
17///
18/// The `stream` must support *random-access read AND write*.
19/// When passing `fs::File`, it should be created with
20/// `fs::OpenOptions::new().read(true).write(true).create(true)`,
21/// optionally with `truncate(true)` as well.
22///
23/// For performance reasons, the name and metadata of _all_ entries
24/// must be known at the beginning before writing any file content.
25/// Editing previous writes is _not_ supported,
26/// because that would require repacking all subsequent contents.
27pub fn create<W: Read + Write + Seek>(stream: W, signature: Signature) -> NeedStub<W> {
28    NeedStub { stream, signature }
29}
30
31/// Intermediate type for writing phar.
32///
33/// Call `stub` to progress to the next builder step.
34pub struct NeedStub<W: Read + Write + Seek> {
35    stream: W,
36    signature: Signature,
37}
38
39impl<W: Read + Write + Seek> NeedStub<W> {
40    /// Sets the stub for the phar archive.
41    ///
42    /// It is not necessary to append the `__HALT_COMPILER();`,
43    /// as the `phar` library will insert it automatically.
44    ///
45    /// It is not strictly necessary for the stub to start with `<?php`,
46    /// but lack of `<?php` prevents the `php` command from running the file directly.
47    ///
48    /// Consider adding a line `#!/usr/bin/env php\n` before the `<?php` tag
49    /// to allow direct shebang execution of the output file.
50    pub fn stub(mut self, mut stub: impl Read) -> Result<NeedAlias<W>> {
51        let _ = io::copy(&mut stub, &mut self.stream)?;
52        self.stream.write_all(STUB_TERMINATOR)?;
53        let manifest_size_offset = tell(&mut self.stream)?;
54
55        let _ = self.stream.seek(SeekFrom::Current(8))?; // manifest size, num_files
56        self.stream.write_all(&[0x11, 0])?; // api
57        self.stream.write_u32::<LittleEndian>(0x00010000)?; // flag
58
59        Ok(NeedAlias {
60            manifest_size_offset,
61            stream: self.stream,
62            signature: self.signature,
63        })
64    }
65}
66
67/// Intermediate type for writing phar.
68///
69/// Call `alias` or `metadata` to progress to the next builder step.
70pub struct NeedAlias<W: Read + Write + Seek> {
71    manifest_size_offset: u64,
72    stream: W,
73    signature: Signature,
74}
75
76impl<W: Read + Write + Seek> NeedAlias<W> {
77    /// Sets the alias for the phar archive.
78    pub fn alias(mut self, alias: impl Read) -> Result<NeedGlobMeta<W>> {
79        write_bstr(&mut self.stream, alias, "alias is too long")?;
80        Ok(NeedGlobMeta {
81            manifest_size_offset: self.manifest_size_offset,
82            stream: self.stream,
83            signature: self.signature,
84        })
85    }
86
87    /// Sets the metadata for the phar archive.
88    ///
89    /// The `phar` crate does not validate the contents,
90    /// but they should either be empty string or comply to PHP serialization format.
91    pub fn metadata(self, metadata: impl Read) -> Result<NeedEntries<W>> {
92        self.alias(io::empty())?.metadata(metadata)
93    }
94}
95
96/// Intermediate type for writing phar.
97pub struct NeedGlobMeta<W: Read + Write + Seek> {
98    manifest_size_offset: u64,
99    stream: W,
100    signature: Signature,
101}
102
103impl<W: Read + Write + Seek> NeedGlobMeta<W> {
104    /// Sets the metadata for the phar archive.
105    ///
106    /// The `phar` crate does not validate the contents,
107    /// but they should either be empty string or comply to PHP serialization format.
108    pub fn metadata(mut self, metadata: impl Read) -> Result<NeedEntries<W>> {
109        write_bstr(&mut self.stream, metadata, "metadata is too long")?;
110        Ok(NeedEntries {
111            manifest_size_offset: self.manifest_size_offset,
112            stream: self.stream,
113            signature: self.signature,
114            entries: Vec::new(),
115            global_flags: 0x00010000,
116        })
117    }
118}
119
120/// Preparation step for writing phar entries.
121///
122/// For performance reasons, users need to first provide all file metadata
123/// before providing all file contents.
124/// Consider using `build_from_*`
125/// if the data source is located on the filesystem.
126pub struct NeedEntries<W: Read + Write + Seek> {
127    manifest_size_offset: u64,
128    stream: W,
129    signature: Signature,
130    entries: Vec<WriteEntry>,
131    global_flags: u32,
132}
133
134impl<W: Read + Write + Seek> NeedEntries<W> {
135    /// Adds an entry to the phar.
136    ///
137    /// The file contents shall be later passed with the `Contents::feed` method in the same order.
138    pub fn entry(
139        &mut self,
140        name: impl Read,
141        metadata: impl Read,
142        timestamp: SystemTime,
143        mode: u32,
144        compression: Compression,
145    ) -> Result<()> {
146        write_bstr(&mut self.stream, name, "file name is too long")?;
147        let uncompressed_offset = tell(&mut self.stream)?;
148
149        let _ = self.stream.seek(SeekFrom::Current(4))?; // uncompressed filesize
150
151        self.stream.write_u32::<LittleEndian>(
152            #[allow(clippy::as_conversions)]
153            // explicit truncation to u32, since we have no better solution
154            match timestamp.duration_since(UNIX_EPOCH) {
155                Ok(duration) => duration.as_secs() as u32,
156                Err(err) => {
157                    let secs = err.duration().as_secs() as u32;
158                    secs.wrapping_neg()
159                }
160            },
161        )?;
162
163        let _ = self.stream.seek(SeekFrom::Current(8))?; // compressed filesize, crc32
164        self.stream.write_u32::<LittleEndian>({
165            let mut out = mode & 0x1FF; // should we panic if mode >= 0x200?
166            out |= compression.bit();
167            out
168        })?;
169
170        self.global_flags |= compression.bit();
171
172        write_bstr(&mut self.stream, metadata, "file metadata is too large")?;
173
174        self.entries.push(WriteEntry {
175            uncompressed_offset,
176            compression,
177        });
178
179        Ok(())
180    }
181
182    /// Starts writing the contents section of the phar.
183    ///
184    /// Users should call `feed` on the returned `Contents` value with the file contents
185    /// in the exact same order as entries declared with `entry`.
186    pub fn contents(mut self) -> Result<Contents<W>> {
187        let content_offset = tell(&mut self.stream)?;
188        let manifest_size = content_offset - (self.manifest_size_offset + 4);
189
190        let _ = self
191            .stream
192            .seek(SeekFrom::Start(self.manifest_size_offset))?;
193        self.stream.write_u32::<LittleEndian>(
194            manifest_size
195                .try_into()
196                .map_err(|_| Error::new(ErrorKind::Other, "manifest too large"))?,
197        )?;
198        self.stream.write_u32::<LittleEndian>(
199            self.entries
200                .len()
201                .try_into()
202                .map_err(|_| Error::new(ErrorKind::Other, "too many file entries"))?,
203        )?;
204        let _ = self.stream.seek(SeekFrom::Current(2))?; // phar api version
205        self.stream.write_u32::<LittleEndian>(self.global_flags)?;
206
207        Ok(Contents {
208            stream: self.stream,
209            entries: self.entries,
210            ptr: Some(0),
211            signature: Some(self.signature),
212            end_offset: content_offset,
213        })
214    }
215
216    /// Builds the phar from a directory on the filesystem.
217    pub fn build_from_directory(self, path: &Path, compression: Compression) -> Result<()> {
218        let vec: Result<Vec<(_, _)>> = WalkDir::new(path)
219            .into_iter()
220            .map(|entry| {
221                let entry = entry?;
222                Ok((
223                    entry
224                        .path()
225                        .strip_prefix(path)
226                        .map_err(|_| {
227                            Error::new(ErrorKind::Other, "path is not a prefix of walked entry")
228                        })?
229                        .as_os_str()
230                        .to_owned(),
231                    entry.path().to_owned(),
232                ))
233            })
234            .collect();
235        let vec = vec?;
236        self.build_from_path_iter(|| vec.iter().map(|(a, b)| Ok((a, b))), compression)
237    }
238
239    /// Builds the phar from an iterator of file paths.
240    ///
241    /// The iterator parameter yields `(S, P)` pairs,
242    /// where each `S` is an `OsStr` representing the path inside the archive
243    /// and each `P` is a `Path` that resolves to the actual file to include
244    /// (at least relative to the current working directory).
245    pub fn build_from_path_iter<S, P, I>(
246        mut self,
247        iter: impl Fn() -> I,
248        compression: Compression,
249    ) -> Result<()>
250    where
251        I: Iterator<Item = Result<(S, P)>>,
252        S: AsRef<OsStr>,
253        P: AsRef<Path>,
254    {
255        use std::fs;
256
257        #[cfg(unix)]
258        fn os_str_to_bytes(name: &OsStr) -> impl AsRef<[u8]> + '_ {
259            use std::os::unix::ffi::OsStrExt;
260            Cow::Borrowed(name.as_bytes())
261        }
262
263        #[cfg(not(unix))]
264        fn os_str_to_bytes(name: &OsStr) -> impl AsRef<[u8]> + '_ {
265            match name.to_string_lossy() {
266                Cow::Borrowed(name) => Cow::Borrowed(name.as_bytes()),
267                Cow::Owned(name) => Cow::Owned(name.into_bytes()),
268            }
269        }
270
271        #[cfg(unix)]
272        fn stat_to_mode(permissions: fs::Permissions) -> u32 {
273            use std::os::unix::fs::PermissionsExt;
274            permissions.mode()
275        }
276        #[cfg(not(unix))]
277        fn stat_to_mode(permissions: fs::Permissions) -> u32 {
278            if permissions.readonly() {
279                0o444
280            } else {
281                0o664
282            }
283        }
284
285        for pair in iter() {
286            let (name, file) = pair?;
287            let stat = file.as_ref().metadata()?;
288            if stat.is_file() {
289                self.entry(
290                    os_str_to_bytes(name.as_ref()).as_ref(),
291                    &b""[..],
292                    stat.modified()?,
293                    stat_to_mode(stat.permissions()),
294                    compression,
295                )?;
296            }
297        }
298        let mut contents = self.contents()?;
299        for pair in iter() {
300            let (_, file) = pair?;
301            contents.feed(fs::File::open(file)?)?;
302        }
303        Ok(())
304    }
305}
306
307struct WriteEntry {
308    uncompressed_offset: u64,
309    compression: Compression,
310}
311
312/// Step for writing phar file contents.
313///
314/// See also the documentation of `NeedEntries`.
315///
316/// The file signature is automatically appended when all files have been written.
317/// File state is _undefined_ before the last entry is written.
318pub struct Contents<W: Read + Write + Seek> {
319    stream: W,
320    entries: Vec<WriteEntry>,
321    ptr: Option<usize>,
322    signature: Option<Signature>,
323    end_offset: u64,
324}
325
326impl<W: Read + Write + Seek> Contents<W> {
327    /// Passes the content source for the next file entry.
328    pub fn feed(&mut self, read: impl Read) -> Result<()> {
329        fn try_feed(
330            entry: &WriteEntry,
331            mut read: impl Read,
332            mut write: impl Write + Seek,
333            start_offset: u64,
334        ) -> Result<u64> {
335            let start = write.seek(SeekFrom::Start(start_offset))?;
336
337            let mut comp_write = entry.compression.make_write(&mut write)?;
338
339            let mut cksum = Crc32Writer::default();
340
341            #[allow(clippy::as_conversions)]
342            let uncompressed_size = io::copy(
343                &mut read,
344                &mut MultiWrite([
345                    &mut comp_write as &mut dyn Write,
346                    &mut cksum as &mut dyn Write,
347                ]),
348            )?;
349            drop(comp_write);
350
351            let end = tell(&mut write)?;
352            let compressed_size = end - start;
353
354            let _ = write.seek(SeekFrom::Start(entry.uncompressed_offset))?;
355            write.write_u32::<LittleEndian>(
356                uncompressed_size
357                    .try_into()
358                    .map_err(|_| Error::new(ErrorKind::Other, "content is too large"))?,
359            )?;
360            let _ = write.seek(SeekFrom::Current(4))?; // unix timestamp already written
361            write.write_u32::<LittleEndian>(
362                compressed_size
363                    .try_into()
364                    .map_err(|_| Error::new(ErrorKind::Other, "content is too large"))?,
365            )?;
366            write.write_u32::<LittleEndian>(cksum.finish())?;
367
368            Ok(end)
369        }
370
371        fn write_signature(
372            mut stream: impl Read + Write + Seek,
373            end_offset: u64,
374            mut signature: Signature,
375        ) -> Result<()> {
376            let _ = stream.seek(SeekFrom::Start(0))?;
377            let _ = io::copy(&mut (&mut stream).take(end_offset), &mut signature.write())?;
378            let sig_id = signature.to_u32();
379            let bytes = signature.finalize();
380            let _ = stream.seek(SeekFrom::Start(end_offset))?;
381            stream.write_all(&bytes[..])?;
382            stream.write_u32::<LittleEndian>(sig_id)?;
383            stream.write_all(PHAR_TERMINATOR)?;
384            Ok(())
385        }
386
387        let ptr = match self.ptr {
388            Some(ptr) => ptr,
389            None => {
390                return Err(Error::new(
391                    ErrorKind::Other,
392                    "feed() called again after returning Err",
393                ))
394            }
395        };
396        let Contents {
397            stream: write,
398            entries,
399            end_offset,
400            signature,
401            ..
402        } = self;
403        let entry = match entries.get(ptr) {
404            Some(entry) => entry,
405            None => return Err(Error::new(ErrorKind::Other, "feed() called too many times")),
406        };
407        let ret = try_feed(entry, read, &mut *write, *end_offset);
408        match &ret {
409            Ok(new_end_offset) => {
410                self.ptr = Some(ptr + 1);
411                if entries.get(ptr + 1).is_none() {
412                    write_signature(write, *new_end_offset, signature.take().expect("last call"))?;
413                }
414                self.end_offset = *new_end_offset;
415            }
416            Err(_) => self.ptr = None,
417        }
418        ret.map(|_| ())
419    }
420}