binstall_tar/
builder.rs

1use std::fs;
2use std::io;
3use std::io::prelude::*;
4use std::path::Path;
5use std::str;
6
7use crate::header::{path2bytes, HeaderMode};
8use crate::{other, EntryType, Header};
9
10/// A structure for building archives
11///
12/// This structure has methods for building up an archive from scratch into any
13/// arbitrary writer.
14pub struct Builder<W: Write> {
15    mode: HeaderMode,
16    follow: bool,
17    finished: bool,
18    obj: Option<W>,
19}
20
21impl<W: Write> Builder<W> {
22    /// Create a new archive builder with the underlying object as the
23    /// destination of all data written. The builder will use
24    /// `HeaderMode::Complete` by default.
25    pub fn new(obj: W) -> Builder<W> {
26        Builder {
27            mode: HeaderMode::Complete,
28            follow: true,
29            finished: false,
30            obj: Some(obj),
31        }
32    }
33
34    /// Changes the HeaderMode that will be used when reading fs Metadata for
35    /// methods that implicitly read metadata for an input Path. Notably, this
36    /// does _not_ apply to `append(Header)`.
37    pub fn mode(&mut self, mode: HeaderMode) {
38        self.mode = mode;
39    }
40
41    /// Follow symlinks, archiving the contents of the file they point to rather
42    /// than adding a symlink to the archive. Defaults to true.
43    pub fn follow_symlinks(&mut self, follow: bool) {
44        self.follow = follow;
45    }
46
47    /// Gets shared reference to the underlying object.
48    pub fn get_ref(&self) -> &W {
49        self.obj.as_ref().unwrap()
50    }
51
52    /// Gets mutable reference to the underlying object.
53    ///
54    /// Note that care must be taken while writing to the underlying
55    /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
56    /// useful in the situations when one needs to be ensured that
57    /// tar entry was flushed to the disk.
58    pub fn get_mut(&mut self) -> &mut W {
59        self.obj.as_mut().unwrap()
60    }
61
62    /// Unwrap this archive, returning the underlying object.
63    ///
64    /// This function will finish writing the archive if the `finish` function
65    /// hasn't yet been called, returning any I/O error which happens during
66    /// that operation.
67    pub fn into_inner(mut self) -> io::Result<W> {
68        if !self.finished {
69            self.finish()?;
70        }
71        Ok(self.obj.take().unwrap())
72    }
73
74    /// Adds a new entry to this archive.
75    ///
76    /// This function will append the header specified, followed by contents of
77    /// the stream specified by `data`. To produce a valid archive the `size`
78    /// field of `header` must be the same as the length of the stream that's
79    /// being written. Additionally the checksum for the header should have been
80    /// set via the `set_cksum` method.
81    ///
82    /// Note that this will not attempt to seek the archive to a valid position,
83    /// so if the archive is in the middle of a read or some other similar
84    /// operation then this may corrupt the archive.
85    ///
86    /// Also note that after all entries have been written to an archive the
87    /// `finish` function needs to be called to finish writing the archive.
88    ///
89    /// # Errors
90    ///
91    /// This function will return an error for any intermittent I/O error which
92    /// occurs when either reading or writing.
93    ///
94    /// # Examples
95    ///
96    /// ```
97    /// use binstall_tar::{Builder, Header};
98    ///
99    /// let mut header = Header::new_gnu();
100    /// header.set_path("foo").unwrap();
101    /// header.set_size(4);
102    /// header.set_cksum();
103    ///
104    /// let mut data: &[u8] = &[1, 2, 3, 4];
105    ///
106    /// let mut ar = Builder::new(Vec::new());
107    /// ar.append(&header, data).unwrap();
108    /// let data = ar.into_inner().unwrap();
109    /// ```
110    pub fn append<R: Read>(&mut self, header: &Header, mut data: R) -> io::Result<()> {
111        append(self.get_mut(), header, &mut data)
112    }
113
114    /// Adds a new entry to this archive with the specified path.
115    ///
116    /// This function will set the specified path in the given header, which may
117    /// require appending a GNU long-name extension entry to the archive first.
118    /// The checksum for the header will be automatically updated via the
119    /// `set_cksum` method after setting the path. No other metadata in the
120    /// header will be modified.
121    ///
122    /// Then it will append the header, followed by contents of the stream
123    /// specified by `data`. To produce a valid archive the `size` field of
124    /// `header` must be the same as the length of the stream that's being
125    /// written.
126    ///
127    /// Note that this will not attempt to seek the archive to a valid position,
128    /// so if the archive is in the middle of a read or some other similar
129    /// operation then this may corrupt the archive.
130    ///
131    /// Also note that after all entries have been written to an archive the
132    /// `finish` function needs to be called to finish writing the archive.
133    ///
134    /// # Errors
135    ///
136    /// This function will return an error for any intermittent I/O error which
137    /// occurs when either reading or writing.
138    ///
139    /// # Examples
140    ///
141    /// ```
142    /// use binstall_tar::{Builder, Header};
143    ///
144    /// let mut header = Header::new_gnu();
145    /// header.set_size(4);
146    /// header.set_cksum();
147    ///
148    /// let mut data: &[u8] = &[1, 2, 3, 4];
149    ///
150    /// let mut ar = Builder::new(Vec::new());
151    /// ar.append_data(&mut header, "really/long/path/to/foo", data).unwrap();
152    /// let data = ar.into_inner().unwrap();
153    /// ```
154    pub fn append_data<P: AsRef<Path>, R: Read>(
155        &mut self,
156        header: &mut Header,
157        path: P,
158        data: R,
159    ) -> io::Result<()> {
160        prepare_header_path(self.get_mut(), header, path.as_ref())?;
161        header.set_cksum();
162        self.append(&header, data)
163    }
164
165    /// Adds a new link (symbolic or hard) entry to this archive with the specified path and target.
166    ///
167    /// This function is similar to [`Self::append_data`] which supports long filenames,
168    /// but also supports long link targets using GNU extensions if necessary.
169    /// You must set the entry type to either [`EntryType::Link`] or [`EntryType::Symlink`].
170    /// The `set_cksum` method will be invoked after setting the path. No other metadata in the
171    /// header will be modified.
172    ///
173    /// If you are intending to use GNU extensions, you must use this method over calling
174    /// [`Header::set_link_name`] because that function will fail on long links.
175    ///
176    /// Similar constraints around the position of the archive and completion
177    /// apply as with [`Self::append_data`].
178    ///
179    /// # Errors
180    ///
181    /// This function will return an error for any intermittent I/O error which
182    /// occurs when either reading or writing.
183    ///
184    /// # Examples
185    ///
186    /// ```
187    /// use binstall_tar::{Builder, Header, EntryType};
188    ///
189    /// let mut ar = Builder::new(Vec::new());
190    /// let mut header = Header::new_gnu();
191    /// header.set_username("foo");
192    /// header.set_entry_type(EntryType::Symlink);
193    /// header.set_size(0);
194    /// ar.append_link(&mut header, "really/long/path/to/foo", "other/really/long/target").unwrap();
195    /// let data = ar.into_inner().unwrap();
196    /// ```
197    pub fn append_link<P: AsRef<Path>, T: AsRef<Path>>(
198        &mut self,
199        header: &mut Header,
200        path: P,
201        target: T,
202    ) -> io::Result<()> {
203        self._append_link(header, path.as_ref(), target.as_ref())
204    }
205
206    fn _append_link(&mut self, header: &mut Header, path: &Path, target: &Path) -> io::Result<()> {
207        prepare_header_path(self.get_mut(), header, path)?;
208        prepare_header_link(self.get_mut(), header, target)?;
209        header.set_cksum();
210        self.append(&header, std::io::empty())
211    }
212
213    /// Adds a file on the local filesystem to this archive.
214    ///
215    /// This function will open the file specified by `path` and insert the file
216    /// into the archive with the appropriate metadata set, returning any I/O
217    /// error which occurs while writing. The path name for the file inside of
218    /// this archive will be the same as `path`, and it is required that the
219    /// path is a relative path.
220    ///
221    /// Note that this will not attempt to seek the archive to a valid position,
222    /// so if the archive is in the middle of a read or some other similar
223    /// operation then this may corrupt the archive.
224    ///
225    /// Also note that after all files have been written to an archive the
226    /// `finish` function needs to be called to finish writing the archive.
227    ///
228    /// # Examples
229    ///
230    /// ```no_run
231    /// use binstall_tar::Builder;
232    ///
233    /// let mut ar = Builder::new(Vec::new());
234    ///
235    /// ar.append_path("foo/bar.txt").unwrap();
236    /// ```
237    pub fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
238        let mode = self.mode.clone();
239        let follow = self.follow;
240        append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow)
241    }
242
243    /// Adds a file on the local filesystem to this archive under another name.
244    ///
245    /// This function will open the file specified by `path` and insert the file
246    /// into the archive as `name` with appropriate metadata set, returning any
247    /// I/O error which occurs while writing. The path name for the file inside
248    /// of this archive will be `name` is required to be a relative path.
249    ///
250    /// Note that this will not attempt to seek the archive to a valid position,
251    /// so if the archive is in the middle of a read or some other similar
252    /// operation then this may corrupt the archive.
253    ///
254    /// Note if the `path` is a directory. This will just add an entry to the archive,
255    /// rather than contents of the directory.
256    ///
257    /// Also note that after all files have been written to an archive the
258    /// `finish` function needs to be called to finish writing the archive.
259    ///
260    /// # Examples
261    ///
262    /// ```no_run
263    /// use binstall_tar::Builder;
264    ///
265    /// let mut ar = Builder::new(Vec::new());
266    ///
267    /// // Insert the local file "foo/bar.txt" in the archive but with the name
268    /// // "bar/foo.txt".
269    /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").unwrap();
270    /// ```
271    pub fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
272        &mut self,
273        path: P,
274        name: N,
275    ) -> io::Result<()> {
276        let mode = self.mode.clone();
277        let follow = self.follow;
278        append_path_with_name(
279            self.get_mut(),
280            path.as_ref(),
281            Some(name.as_ref()),
282            mode,
283            follow,
284        )
285    }
286
287    /// Adds a file to this archive with the given path as the name of the file
288    /// in the archive.
289    ///
290    /// This will use the metadata of `file` to populate a `Header`, and it will
291    /// then append the file to the archive with the name `path`.
292    ///
293    /// Note that this will not attempt to seek the archive to a valid position,
294    /// so if the archive is in the middle of a read or some other similar
295    /// operation then this may corrupt the archive.
296    ///
297    /// Also note that after all files have been written to an archive the
298    /// `finish` function needs to be called to finish writing the archive.
299    ///
300    /// # Examples
301    ///
302    /// ```no_run
303    /// use std::fs::File;
304    /// use binstall_tar::Builder;
305    ///
306    /// let mut ar = Builder::new(Vec::new());
307    ///
308    /// // Open the file at one location, but insert it into the archive with a
309    /// // different name.
310    /// let mut f = File::open("foo/bar/baz.txt").unwrap();
311    /// ar.append_file("bar/baz.txt", &mut f).unwrap();
312    /// ```
313    pub fn append_file<P: AsRef<Path>>(&mut self, path: P, file: &mut fs::File) -> io::Result<()> {
314        let mode = self.mode.clone();
315        append_file(self.get_mut(), path.as_ref(), file, mode)
316    }
317
318    /// Adds a directory to this archive with the given path as the name of the
319    /// directory in the archive.
320    ///
321    /// This will use `stat` to populate a `Header`, and it will then append the
322    /// directory to the archive with the name `path`.
323    ///
324    /// Note that this will not attempt to seek the archive to a valid position,
325    /// so if the archive is in the middle of a read or some other similar
326    /// operation then this may corrupt the archive.
327    ///
328    /// Note this will not add the contents of the directory to the archive.
329    /// See `append_dir_all` for recusively adding the contents of the directory.
330    ///
331    /// Also note that after all files have been written to an archive the
332    /// `finish` function needs to be called to finish writing the archive.
333    ///
334    /// # Examples
335    ///
336    /// ```
337    /// use std::fs;
338    /// use binstall_tar::Builder;
339    ///
340    /// let mut ar = Builder::new(Vec::new());
341    ///
342    /// // Use the directory at one location, but insert it into the archive
343    /// // with a different name.
344    /// ar.append_dir("bardir", ".").unwrap();
345    /// ```
346    pub fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
347    where
348        P: AsRef<Path>,
349        Q: AsRef<Path>,
350    {
351        let mode = self.mode.clone();
352        append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode)
353    }
354
355    /// Adds a directory and all of its contents (recursively) to this archive
356    /// with the given path as the name of the directory in the archive.
357    ///
358    /// Note that this will not attempt to seek the archive to a valid position,
359    /// so if the archive is in the middle of a read or some other similar
360    /// operation then this may corrupt the archive.
361    ///
362    /// Also note that after all files have been written to an archive the
363    /// `finish` function needs to be called to finish writing the archive.
364    ///
365    /// # Examples
366    ///
367    /// ```
368    /// use std::fs;
369    /// use binstall_tar::Builder;
370    ///
371    /// let mut ar = Builder::new(Vec::new());
372    ///
373    /// // Use the directory at one location, but insert it into the archive
374    /// // with a different name.
375    /// ar.append_dir_all("bardir", ".").unwrap();
376    /// ```
377    pub fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
378    where
379        P: AsRef<Path>,
380        Q: AsRef<Path>,
381    {
382        let mode = self.mode.clone();
383        let follow = self.follow;
384        append_dir_all(
385            self.get_mut(),
386            path.as_ref(),
387            src_path.as_ref(),
388            mode,
389            follow,
390        )
391    }
392
393    /// Finish writing this archive, emitting the termination sections.
394    ///
395    /// This function should only be called when the archive has been written
396    /// entirely and if an I/O error happens the underlying object still needs
397    /// to be acquired.
398    ///
399    /// In most situations the `into_inner` method should be preferred.
400    pub fn finish(&mut self) -> io::Result<()> {
401        if self.finished {
402            return Ok(());
403        }
404        self.finished = true;
405        self.get_mut().write_all(&[0; 1024])
406    }
407}
408
409fn append(mut dst: &mut dyn Write, header: &Header, mut data: &mut dyn Read) -> io::Result<()> {
410    dst.write_all(header.as_bytes())?;
411    let len = io::copy(&mut data, &mut dst)?;
412
413    // Pad with zeros if necessary.
414    let buf = [0; 512];
415    let remaining = 512 - (len % 512);
416    if remaining < 512 {
417        dst.write_all(&buf[..remaining as usize])?;
418    }
419
420    Ok(())
421}
422
423fn append_path_with_name(
424    dst: &mut dyn Write,
425    path: &Path,
426    name: Option<&Path>,
427    mode: HeaderMode,
428    follow: bool,
429) -> io::Result<()> {
430    let stat = if follow {
431        fs::metadata(path).map_err(|err| {
432            io::Error::new(
433                err.kind(),
434                format!("{} when getting metadata for {}", err, path.display()),
435            )
436        })?
437    } else {
438        fs::symlink_metadata(path).map_err(|err| {
439            io::Error::new(
440                err.kind(),
441                format!("{} when getting metadata for {}", err, path.display()),
442            )
443        })?
444    };
445    let ar_name = name.unwrap_or(path);
446    if stat.is_file() {
447        append_fs(dst, ar_name, &stat, &mut fs::File::open(path)?, mode, None)
448    } else if stat.is_dir() {
449        append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None)
450    } else if stat.file_type().is_symlink() {
451        let link_name = fs::read_link(path)?;
452        append_fs(
453            dst,
454            ar_name,
455            &stat,
456            &mut io::empty(),
457            mode,
458            Some(&link_name),
459        )
460    } else {
461        #[cfg(unix)]
462        {
463            append_special(dst, path, &stat, mode)
464        }
465        #[cfg(not(unix))]
466        {
467            Err(other(&format!("{} has unknown file type", path.display())))
468        }
469    }
470}
471
472#[cfg(unix)]
473fn append_special(
474    dst: &mut dyn Write,
475    path: &Path,
476    stat: &fs::Metadata,
477    mode: HeaderMode,
478) -> io::Result<()> {
479    use ::std::os::unix::fs::{FileTypeExt, MetadataExt};
480
481    let file_type = stat.file_type();
482    let entry_type;
483    if file_type.is_socket() {
484        // sockets can't be archived
485        return Err(other(&format!(
486            "{}: socket can not be archived",
487            path.display()
488        )));
489    } else if file_type.is_fifo() {
490        entry_type = EntryType::Fifo;
491    } else if file_type.is_char_device() {
492        entry_type = EntryType::Char;
493    } else if file_type.is_block_device() {
494        entry_type = EntryType::Block;
495    } else {
496        return Err(other(&format!("{} has unknown file type", path.display())));
497    }
498
499    let mut header = Header::new_gnu();
500    header.set_metadata_in_mode(stat, mode);
501    prepare_header_path(dst, &mut header, path)?;
502
503    header.set_entry_type(entry_type);
504    let dev_id = stat.rdev();
505    let dev_major = ((dev_id >> 32) & 0xffff_f000) | ((dev_id >> 8) & 0x0000_0fff);
506    let dev_minor = ((dev_id >> 12) & 0xffff_ff00) | ((dev_id) & 0x0000_00ff);
507    header.set_device_major(dev_major as u32)?;
508    header.set_device_minor(dev_minor as u32)?;
509
510    header.set_cksum();
511    dst.write_all(header.as_bytes())?;
512
513    Ok(())
514}
515
516fn append_file(
517    dst: &mut dyn Write,
518    path: &Path,
519    file: &mut fs::File,
520    mode: HeaderMode,
521) -> io::Result<()> {
522    let stat = file.metadata()?;
523    append_fs(dst, path, &stat, file, mode, None)
524}
525
526fn append_dir(
527    dst: &mut dyn Write,
528    path: &Path,
529    src_path: &Path,
530    mode: HeaderMode,
531) -> io::Result<()> {
532    let stat = fs::metadata(src_path)?;
533    append_fs(dst, path, &stat, &mut io::empty(), mode, None)
534}
535
536fn prepare_header(size: u64, entry_type: u8) -> Header {
537    let mut header = Header::new_gnu();
538    let name = b"././@LongLink";
539    header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
540    header.set_mode(0o644);
541    header.set_uid(0);
542    header.set_gid(0);
543    header.set_mtime(0);
544    // + 1 to be compliant with GNU tar
545    header.set_size(size + 1);
546    header.set_entry_type(EntryType::new(entry_type));
547    header.set_cksum();
548    header
549}
550
551fn prepare_header_path(dst: &mut dyn Write, header: &mut Header, path: &Path) -> io::Result<()> {
552    // Try to encode the path directly in the header, but if it ends up not
553    // working (probably because it's too long) then try to use the GNU-specific
554    // long name extension by emitting an entry which indicates that it's the
555    // filename.
556    if let Err(e) = header.set_path(path) {
557        let data = path2bytes(&path)?;
558        let max = header.as_old().name.len();
559        // Since `e` isn't specific enough to let us know the path is indeed too
560        // long, verify it first before using the extension.
561        if data.len() < max {
562            return Err(e);
563        }
564        let header2 = prepare_header(data.len() as u64, b'L');
565        // null-terminated string
566        let mut data2 = data.chain(io::repeat(0).take(1));
567        append(dst, &header2, &mut data2)?;
568
569        // Truncate the path to store in the header we're about to emit to
570        // ensure we've got something at least mentioned. Note that we use
571        // `str`-encoding to be compatible with Windows, but in general the
572        // entry in the header itself shouldn't matter too much since extraction
573        // doesn't look at it.
574        let truncated = match str::from_utf8(&data[..max]) {
575            Ok(s) => s,
576            Err(e) => str::from_utf8(&data[..e.valid_up_to()]).unwrap(),
577        };
578        header.set_path(truncated)?;
579    }
580    Ok(())
581}
582
583fn prepare_header_link(
584    dst: &mut dyn Write,
585    header: &mut Header,
586    link_name: &Path,
587) -> io::Result<()> {
588    // Same as previous function but for linkname
589    if let Err(e) = header.set_link_name(&link_name) {
590        let data = path2bytes(&link_name)?;
591        if data.len() < header.as_old().linkname.len() {
592            return Err(e);
593        }
594        let header2 = prepare_header(data.len() as u64, b'K');
595        let mut data2 = data.chain(io::repeat(0).take(1));
596        append(dst, &header2, &mut data2)?;
597    }
598    Ok(())
599}
600
601fn append_fs(
602    dst: &mut dyn Write,
603    path: &Path,
604    meta: &fs::Metadata,
605    read: &mut dyn Read,
606    mode: HeaderMode,
607    link_name: Option<&Path>,
608) -> io::Result<()> {
609    let mut header = Header::new_gnu();
610
611    prepare_header_path(dst, &mut header, path)?;
612    header.set_metadata_in_mode(meta, mode);
613    if let Some(link_name) = link_name {
614        prepare_header_link(dst, &mut header, link_name)?;
615    }
616    header.set_cksum();
617    append(dst, &header, read)
618}
619
620fn append_dir_all(
621    dst: &mut dyn Write,
622    path: &Path,
623    src_path: &Path,
624    mode: HeaderMode,
625    follow: bool,
626) -> io::Result<()> {
627    let mut stack = vec![(src_path.to_path_buf(), true, false)];
628    while let Some((src, is_dir, is_symlink)) = stack.pop() {
629        let dest = path.join(src.strip_prefix(&src_path).unwrap());
630        // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
631        if is_dir || (is_symlink && follow && src.is_dir()) {
632            for entry in fs::read_dir(&src)? {
633                let entry = entry?;
634                let file_type = entry.file_type()?;
635                stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
636            }
637            if dest != Path::new("") {
638                append_dir(dst, &dest, &src, mode)?;
639            }
640        } else if !follow && is_symlink {
641            let stat = fs::symlink_metadata(&src)?;
642            let link_name = fs::read_link(&src)?;
643            append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name))?;
644        } else {
645            #[cfg(unix)]
646            {
647                let stat = fs::metadata(&src)?;
648                if !stat.is_file() {
649                    append_special(dst, &dest, &stat, mode)?;
650                    continue;
651                }
652            }
653            append_file(dst, &dest, &mut fs::File::open(src)?, mode)?;
654        }
655    }
656    Ok(())
657}
658
659impl<W: Write> Drop for Builder<W> {
660    fn drop(&mut self) {
661        let _ = self.finish();
662    }
663}