async_tar_wasm/
builder.rs

1use std::borrow::Cow;
2
3#[cfg(feature = "fs")]
4use async_std::fs;
5use async_std::{
6    io::{self, Read, Write},
7    path::Path,
8    prelude::*,
9};
10
11#[cfg(feature = "fs")]
12use crate::other;
13use crate::{
14    header::{bytes2path, path2bytes, HeaderMode},
15    EntryType, Header,
16};
17
18/// A structure for building archives
19///
20/// This structure has methods for building up an archive from scratch into any
21/// arbitrary writer.
22pub struct Builder<W: Write + Unpin + Send + Sync> {
23    mode: HeaderMode,
24    follow: bool,
25    finished: bool,
26    obj: Option<W>,
27}
28
29impl<W: Write + Unpin + Send + Sync> Builder<W> {
30    /// Create a new archive builder with the underlying object as the
31    /// destination of all data written. The builder will use
32    /// `HeaderMode::Complete` by default.
33    pub fn new(obj: W) -> Builder<W> {
34        Builder {
35            mode: HeaderMode::Complete,
36            follow: true,
37            finished: false,
38            obj: Some(obj),
39        }
40    }
41
42    /// Changes the HeaderMode that will be used when reading fs Metadata for
43    /// methods that implicitly read metadata for an input Path. Notably, this
44    /// does _not_ apply to `append(Header)`.
45    pub fn mode(&mut self, mode: HeaderMode) {
46        self.mode = mode;
47    }
48
49    /// Follow symlinks, archiving the contents of the file they point to rather
50    /// than adding a symlink to the archive. Defaults to true.
51    pub fn follow_symlinks(&mut self, follow: bool) {
52        self.follow = follow;
53    }
54
55    /// Gets shared reference to the underlying object.
56    pub fn get_ref(&self) -> &W {
57        self.obj.as_ref().unwrap()
58    }
59
60    /// Gets mutable reference to the underlying object.
61    ///
62    /// Note that care must be taken while writing to the underlying
63    /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
64    /// useful in the situations when one needs to be ensured that
65    /// tar entry was flushed to the disk.
66    pub fn get_mut(&mut self) -> &mut W {
67        self.obj.as_mut().unwrap()
68    }
69
70    /// Unwrap this archive, returning the underlying object.
71    ///
72    /// This function will finish writing the archive if the `finish` function
73    /// hasn't yet been called, returning any I/O error which happens during
74    /// that operation.
75    pub async fn into_inner(mut self) -> io::Result<W> {
76        if !self.finished {
77            self.finish().await?;
78        }
79        Ok(self.obj.take().unwrap())
80    }
81
82    /// Adds a new entry to this archive.
83    ///
84    /// This function will append the header specified, followed by contents of
85    /// the stream specified by `data`. To produce a valid archive the `size`
86    /// field of `header` must be the same as the length of the stream that's
87    /// being written. Additionally the checksum for the header should have been
88    /// set via the `set_cksum` method.
89    ///
90    /// Note that this will not attempt to seek the archive to a valid position,
91    /// so if the archive is in the middle of a read or some other similar
92    /// operation then this may corrupt the archive.
93    ///
94    /// Also note that after all entries have been written to an archive the
95    /// `finish` function needs to be called to finish writing the archive.
96    ///
97    /// # Errors
98    ///
99    /// This function will return an error for any intermittent I/O error which
100    /// occurs when either reading or writing.
101    ///
102    /// # Examples
103    ///
104    /// ```
105    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
106    /// #
107    /// use async_tar::{Builder, Header};
108    ///
109    /// let mut header = Header::new_gnu();
110    /// header.set_path("foo")?;
111    /// header.set_size(4);
112    /// header.set_cksum();
113    ///
114    /// let mut data: &[u8] = &[1, 2, 3, 4];
115    ///
116    /// let mut ar = Builder::new(Vec::new());
117    /// ar.append(&header, data).await?;
118    /// let data = ar.into_inner().await?;
119    /// #
120    /// # Ok(()) }) }
121    /// ```
122    pub async fn append<R: Read + Unpin + Send>(
123        &mut self,
124        header: &Header,
125        mut data: R,
126    ) -> io::Result<()> {
127        append(self.get_mut(), header, &mut data).await?;
128
129        Ok(())
130    }
131
132    /// Adds a new entry to this archive with the specified path.
133    ///
134    /// This function will set the specified path in the given header, which may
135    /// require appending a GNU long-name extension entry to the archive first.
136    /// The checksum for the header will be automatically updated via the
137    /// `set_cksum` method after setting the path. No other metadata in the
138    /// header will be modified.
139    ///
140    /// Then it will append the header, followed by contents of the stream
141    /// specified by `data`. To produce a valid archive the `size` field of
142    /// `header` must be the same as the length of the stream that's being
143    /// written.
144    ///
145    /// Note that this will not attempt to seek the archive to a valid position,
146    /// so if the archive is in the middle of a read or some other similar
147    /// operation then this may corrupt the archive.
148    ///
149    /// Also note that after all entries have been written to an archive the
150    /// `finish` function needs to be called to finish writing the archive.
151    ///
152    /// # Errors
153    ///
154    /// This function will return an error for any intermittent I/O error which
155    /// occurs when either reading or writing.
156    ///
157    /// # Examples
158    ///
159    /// ```
160    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
161    /// #
162    /// use async_tar::{Builder, Header};
163    ///
164    /// let mut header = Header::new_gnu();
165    /// header.set_size(4);
166    /// header.set_cksum();
167    ///
168    /// let mut data: &[u8] = &[1, 2, 3, 4];
169    ///
170    /// let mut ar = Builder::new(Vec::new());
171    /// ar.append_data(&mut header, "really/long/path/to/foo", data).await?;
172    /// let data = ar.into_inner().await?;
173    /// #
174    /// # Ok(()) }) }
175    /// ```
176    pub async fn append_data<P: AsRef<Path>, R: Read + Unpin + Send>(
177        &mut self,
178        header: &mut Header,
179        path: P,
180        data: R,
181    ) -> io::Result<()> {
182        prepare_header_path(self.get_mut(), header, path.as_ref()).await?;
183        header.set_cksum();
184        self.append(header, data).await?;
185
186        Ok(())
187    }
188
189    /// Adds a file on the local filesystem to this archive.
190    ///
191    /// This function will open the file specified by `path` and insert the file
192    /// into the archive with the appropriate metadata set, returning any I/O
193    /// error which occurs while writing. The path name for the file inside of
194    /// this archive will be the same as `path`, and it is required that the
195    /// path is a relative path.
196    ///
197    /// Note that this will not attempt to seek the archive to a valid position,
198    /// so if the archive is in the middle of a read or some other similar
199    /// operation then this may corrupt the archive.
200    ///
201    /// Also note that after all files have been written to an archive the
202    /// `finish` function needs to be called to finish writing the archive.
203    ///
204    /// # Examples
205    ///
206    /// ```no_run
207    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
208    /// #
209    /// use async_tar::Builder;
210    ///
211    /// let mut ar = Builder::new(Vec::new());
212    ///
213    /// ar.append_path("foo/bar.txt").await?;
214    /// #
215    /// # Ok(()) }) }
216    /// ```
217    #[cfg(feature = "fs")]
218    pub async fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
219        let mode = self.mode;
220        let follow = self.follow;
221        append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow).await?;
222        Ok(())
223    }
224
225    /// Adds a file on the local filesystem to this archive under another name.
226    ///
227    /// This function will open the file specified by `path` and insert the file
228    /// into the archive as `name` with appropriate metadata set, returning any
229    /// I/O error which occurs while writing. The path name for the file inside
230    /// of this archive will be `name` is required to be a relative path.
231    ///
232    /// Note that this will not attempt to seek the archive to a valid position,
233    /// so if the archive is in the middle of a read or some other similar
234    /// operation then this may corrupt the archive.
235    ///
236    /// Also note that after all files have been written to an archive the
237    /// `finish` function needs to be called to finish writing the archive.
238    ///
239    /// # Examples
240    ///
241    /// ```no_run
242    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
243    /// #
244    /// use async_tar::Builder;
245    ///
246    /// let mut ar = Builder::new(Vec::new());
247    ///
248    /// // Insert the local file "foo/bar.txt" in the archive but with the name
249    /// // "bar/foo.txt".
250    /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").await?;
251    /// #
252    /// # Ok(()) }) }
253    /// ```
254    #[cfg(feature = "fs")]
255    pub async fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
256        &mut self,
257        path: P,
258        name: N,
259    ) -> io::Result<()> {
260        let mode = self.mode;
261        let follow = self.follow;
262        append_path_with_name(
263            self.get_mut(),
264            path.as_ref(),
265            Some(name.as_ref()),
266            mode,
267            follow,
268        )
269        .await?;
270        Ok(())
271    }
272
273    /// Adds a file to this archive with the given path as the name of the file
274    /// in the archive.
275    ///
276    /// This will use the metadata of `file` to populate a `Header`, and it will
277    /// then append the file to the archive with the name `path`.
278    ///
279    /// Note that this will not attempt to seek the archive to a valid position,
280    /// so if the archive is in the middle of a read or some other similar
281    /// operation then this may corrupt the archive.
282    ///
283    /// Also note that after all files have been written to an archive the
284    /// `finish` function needs to be called to finish writing the archive.
285    ///
286    /// # Examples
287    ///
288    /// ```no_run
289    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
290    /// #
291    /// use async_std::fs::File;
292    /// use async_tar::Builder;
293    ///
294    /// let mut ar = Builder::new(Vec::new());
295    ///
296    /// // Open the file at one location, but insert it into the archive with a
297    /// // different name.
298    /// let mut f = File::open("foo/bar/baz.txt").await?;
299    /// ar.append_file("bar/baz.txt", &mut f).await?;
300    /// #
301    /// # Ok(()) }) }
302    /// ```
303    #[cfg(feature = "fs")]
304    pub async fn append_file<P: AsRef<Path>>(
305        &mut self,
306        path: P,
307        file: &mut fs::File,
308    ) -> io::Result<()> {
309        let mode = self.mode;
310        append_file(self.get_mut(), path.as_ref(), file, mode).await?;
311        Ok(())
312    }
313
314    /// Adds a directory to this archive with the given path as the name of the
315    /// directory in the archive.
316    ///
317    /// This will use `stat` to populate a `Header`, and it will then append the
318    /// directory to the archive with the name `path`.
319    ///
320    /// Note that this will not attempt to seek the archive to a valid position,
321    /// so if the archive is in the middle of a read or some other similar
322    /// operation then this may corrupt the archive.
323    ///
324    /// Also note that after all files have been written to an archive the
325    /// `finish` function needs to be called to finish writing the archive.
326    ///
327    /// # Examples
328    ///
329    /// ```
330    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
331    /// #
332    /// use async_std::fs;
333    /// use async_tar::Builder;
334    ///
335    /// let mut ar = Builder::new(Vec::new());
336    ///
337    /// // Use the directory at one location, but insert it into the archive
338    /// // with a different name.
339    /// ar.append_dir("bardir", ".").await?;
340    /// #
341    /// # Ok(()) }) }
342    /// ```
343    #[cfg(feature = "fs")]
344    pub async fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
345    where
346        P: AsRef<Path>,
347        Q: AsRef<Path>,
348    {
349        let mode = self.mode;
350        append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode).await?;
351        Ok(())
352    }
353
354    /// Adds a directory and all of its contents (recursively) to this archive
355    /// with the given path as the name of the directory in the archive.
356    ///
357    /// Note that this will not attempt to seek the archive to a valid position,
358    /// so if the archive is in the middle of a read or some other similar
359    /// operation then this may corrupt the archive.
360    ///
361    /// Also note that after all files have been written to an archive the
362    /// `finish` function needs to be called to finish writing the archive.
363    ///
364    /// # Examples
365    ///
366    /// ```
367    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
368    /// #
369    /// use async_std::fs;
370    /// use async_tar::Builder;
371    ///
372    /// let mut ar = Builder::new(Vec::new());
373    ///
374    /// // Use the directory at one location, but insert it into the archive
375    /// // with a different name.
376    /// ar.append_dir_all("bardir", ".").await?;
377    /// #
378    /// # Ok(()) }) }
379    /// ```
380    #[cfg(feature = "fs")]
381    pub async fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
382    where
383        P: AsRef<Path>,
384        Q: AsRef<Path>,
385    {
386        let mode = self.mode;
387        let follow = self.follow;
388        append_dir_all(
389            self.get_mut(),
390            path.as_ref(),
391            src_path.as_ref(),
392            mode,
393            follow,
394        )
395        .await?;
396        Ok(())
397    }
398
399    /// Finish writing this archive, emitting the termination sections.
400    ///
401    /// This function should only be called when the archive has been written
402    /// entirely and if an I/O error happens the underlying object still needs
403    /// to be acquired.
404    ///
405    /// In most situations the `into_inner` method should be preferred.
406    pub async fn finish(&mut self) -> io::Result<()> {
407        if self.finished {
408            return Ok(());
409        }
410        self.finished = true;
411        self.get_mut().write_all(&[0; 1024]).await?;
412        Ok(())
413    }
414}
415
416async fn append(
417    mut dst: &mut (dyn Write + Unpin + Send),
418    header: &Header,
419    mut data: &mut (dyn Read + Unpin + Send),
420) -> io::Result<()> {
421    dst.write_all(header.as_bytes()).await?;
422    let len = io::copy(&mut data, &mut dst).await?;
423
424    // Pad with zeros if necessary.
425    let buf = [0; 512];
426    let remaining = 512 - (len % 512);
427    if remaining < 512 {
428        dst.write_all(&buf[..remaining as usize]).await?;
429    }
430
431    Ok(())
432}
433
434#[cfg(feature = "fs")]
435async fn append_path_with_name(
436    dst: &mut (dyn Write + Unpin + Sync + Send),
437    path: &Path,
438    name: Option<&Path>,
439    mode: HeaderMode,
440    follow: bool,
441) -> io::Result<()> {
442    let stat = if follow {
443        fs::metadata(path).await.map_err(|err| {
444            io::Error::new(
445                err.kind(),
446                format!("{} when getting metadata for {}", err, path.display()),
447            )
448        })?
449    } else {
450        fs::symlink_metadata(path).await.map_err(|err| {
451            io::Error::new(
452                err.kind(),
453                format!("{} when getting metadata for {}", err, path.display()),
454            )
455        })?
456    };
457    let ar_name = name.unwrap_or(path);
458    if stat.is_file() {
459        append_fs(
460            dst,
461            ar_name,
462            &stat,
463            &mut fs::File::open(path).await?,
464            mode,
465            None,
466        )
467        .await?;
468        Ok(())
469    } else if stat.is_dir() {
470        append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None).await?;
471        Ok(())
472    } else if stat.file_type().is_symlink() {
473        let link_name = fs::read_link(path).await?;
474        append_fs(
475            dst,
476            ar_name,
477            &stat,
478            &mut io::empty(),
479            mode,
480            Some(&link_name),
481        )
482        .await?;
483        Ok(())
484    } else {
485        Err(other(&format!("{} has unknown file type", path.display())))
486    }
487}
488
489#[cfg(feature = "fs")]
490async fn append_file(
491    dst: &mut (dyn Write + Unpin + Send + Sync),
492    path: &Path,
493    file: &mut fs::File,
494    mode: HeaderMode,
495) -> io::Result<()> {
496    let stat = file.metadata().await?;
497    append_fs(dst, path, &stat, file, mode, None).await?;
498    Ok(())
499}
500
501#[cfg(feature = "fs")]
502async fn append_dir(
503    dst: &mut (dyn Write + Unpin + Send + Sync),
504    path: &Path,
505    src_path: &Path,
506    mode: HeaderMode,
507) -> io::Result<()> {
508    let stat = fs::metadata(src_path).await?;
509    append_fs(dst, path, &stat, &mut io::empty(), mode, None).await?;
510    Ok(())
511}
512
513fn prepare_header(size: u64, entry_type: EntryType) -> Header {
514    let mut header = Header::new_gnu();
515    let name = b"././@LongLink";
516    header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
517    header.set_mode(0o644);
518    header.set_uid(0);
519    header.set_gid(0);
520    header.set_mtime(0);
521    // + 1 to be compliant with GNU tar
522    header.set_size(size + 1);
523    header.set_entry_type(entry_type);
524    header.set_cksum();
525    header
526}
527
528async fn prepare_header_path(
529    dst: &mut (dyn Write + Unpin + Send + Sync),
530    header: &mut Header,
531    path: &Path,
532) -> io::Result<()> {
533    // Try to encode the path directly in the header, but if it ends up not
534    // working (probably because it's too long) then try to use the GNU-specific
535    // long name extension by emitting an entry which indicates that it's the
536    // filename.
537    if let Err(e) = header.set_path(path) {
538        let data = path2bytes(path)?;
539        let max = header.as_old().name.len();
540        //  Since e isn't specific enough to let us know the path is indeed too
541        //  long, verify it first before using the extension.
542        if data.len() < max {
543            return Err(e);
544        }
545        let header2 = prepare_header(data.len() as u64, EntryType::GNULongName);
546        // null-terminated string
547        let mut data2 = data.chain(io::repeat(0).take(1));
548        append(dst, &header2, &mut data2).await?;
549        // Truncate the path to store in the header we're about to emit to
550        // ensure we've got something at least mentioned.
551        let path = bytes2path(Cow::Borrowed(&data[..max]))?;
552        header.set_path(&path)?;
553    }
554    Ok(())
555}
556
557#[cfg(feature = "fs")]
558async fn prepare_header_link(
559    dst: &mut (dyn Write + Unpin + Send + Sync),
560    header: &mut Header,
561    link_name: &Path,
562) -> io::Result<()> {
563    // Same as previous function but for linkname
564    if let Err(e) = header.set_link_name(&link_name) {
565        let data = path2bytes(link_name)?;
566        if data.len() < header.as_old().linkname.len() {
567            return Err(e);
568        }
569        let header2 = prepare_header(data.len() as u64, EntryType::GNULongLink);
570        let mut data2 = data.chain(io::repeat(0).take(1));
571        append(dst, &header2, &mut data2).await?;
572    }
573    Ok(())
574}
575
576#[cfg(feature = "fs")]
577async fn append_fs(
578    dst: &mut (dyn Write + Unpin + Send + Sync),
579    path: &Path,
580    meta: &fs::Metadata,
581    read: &mut (dyn Read + Unpin + Sync + Send),
582    mode: HeaderMode,
583    link_name: Option<&Path>,
584) -> io::Result<()> {
585    let mut header = Header::new_gnu();
586
587    prepare_header_path(dst, &mut header, path).await?;
588    header.set_metadata_in_mode(meta, mode);
589    if let Some(link_name) = link_name {
590        prepare_header_link(dst, &mut header, link_name).await?;
591    }
592    header.set_cksum();
593    append(dst, &header, read).await?;
594
595    Ok(())
596}
597
598#[cfg(feature = "fs")]
599async fn append_dir_all(
600    dst: &mut (dyn Write + Unpin + Send + Sync),
601    path: &Path,
602    src_path: &Path,
603    mode: HeaderMode,
604    follow: bool,
605) -> io::Result<()> {
606    let mut stack = vec![(src_path.to_path_buf(), true, false)];
607    while let Some((src, is_dir, is_symlink)) = stack.pop() {
608        let dest = path.join(src.strip_prefix(&src_path).unwrap());
609
610        // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
611        if is_dir || (is_symlink && follow && src.is_dir().await) {
612            let mut entries = fs::read_dir(&src).await?;
613            while let Some(entry) = entries.next().await {
614                let entry = entry?;
615                let file_type = entry.file_type().await?;
616                stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
617            }
618            if dest != Path::new("") {
619                append_dir(dst, &dest, &src, mode).await?;
620            }
621        } else if !follow && is_symlink {
622            let stat = fs::symlink_metadata(&src).await?;
623            let link_name = fs::read_link(&src).await?;
624            append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name)).await?;
625        } else {
626            append_file(dst, &dest, &mut fs::File::open(src).await?, mode).await?;
627        }
628    }
629    Ok(())
630}
631
632#[cfg(feature = "fs")]
633impl<W: Write + Unpin + Send + Sync> Drop for Builder<W> {
634    fn drop(&mut self) {
635        async_std::task::block_on(async move {
636            let _ = self.finish().await;
637        });
638    }
639}
640
641#[cfg(test)]
642mod tests {
643    use super::*;
644
645    assert_impl_all!(async_std::fs::File: Send, Sync);
646    assert_impl_all!(Builder<async_std::fs::File>: Send, Sync);
647}