async_tar/builder.rs
1use std::borrow::Cow;
2
3use async_std::{
4 fs,
5 io::{self, Read, Write},
6 path::Path,
7 prelude::*,
8};
9
10use crate::{
11 header::{bytes2path, path2bytes, HeaderMode},
12 other, EntryType, Header,
13};
14
15/// A structure for building archives
16///
17/// This structure has methods for building up an archive from scratch into any
18/// arbitrary writer.
19pub struct Builder<W: Write + Unpin + Send + Sync> {
20 mode: HeaderMode,
21 follow: bool,
22 finished: bool,
23 obj: Option<W>,
24}
25
26impl<W: Write + Unpin + Send + Sync> Builder<W> {
27 /// Create a new archive builder with the underlying object as the
28 /// destination of all data written. The builder will use
29 /// `HeaderMode::Complete` by default.
30 pub fn new(obj: W) -> Builder<W> {
31 Builder {
32 mode: HeaderMode::Complete,
33 follow: true,
34 finished: false,
35 obj: Some(obj),
36 }
37 }
38
39 /// Changes the HeaderMode that will be used when reading fs Metadata for
40 /// methods that implicitly read metadata for an input Path. Notably, this
41 /// does _not_ apply to `append(Header)`.
42 pub fn mode(&mut self, mode: HeaderMode) {
43 self.mode = mode;
44 }
45
46 /// Follow symlinks, archiving the contents of the file they point to rather
47 /// than adding a symlink to the archive. Defaults to true.
48 pub fn follow_symlinks(&mut self, follow: bool) {
49 self.follow = follow;
50 }
51
52 /// Gets shared reference to the underlying object.
53 pub fn get_ref(&self) -> &W {
54 self.obj.as_ref().unwrap()
55 }
56
57 /// Gets mutable reference to the underlying object.
58 ///
59 /// Note that care must be taken while writing to the underlying
60 /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
61 /// useful in the situations when one needs to be ensured that
62 /// tar entry was flushed to the disk.
63 pub fn get_mut(&mut self) -> &mut W {
64 self.obj.as_mut().unwrap()
65 }
66
67 /// Unwrap this archive, returning the underlying object.
68 ///
69 /// This function will finish writing the archive if the `finish` function
70 /// hasn't yet been called, returning any I/O error which happens during
71 /// that operation.
72 pub async fn into_inner(mut self) -> io::Result<W> {
73 if !self.finished {
74 self.finish().await?;
75 }
76 Ok(self.obj.take().unwrap())
77 }
78
79 /// Adds a new entry to this archive.
80 ///
81 /// This function will append the header specified, followed by contents of
82 /// the stream specified by `data`. To produce a valid archive the `size`
83 /// field of `header` must be the same as the length of the stream that's
84 /// being written. Additionally the checksum for the header should have been
85 /// set via the `set_cksum` method.
86 ///
87 /// Note that this will not attempt to seek the archive to a valid position,
88 /// so if the archive is in the middle of a read or some other similar
89 /// operation then this may corrupt the archive.
90 ///
91 /// Also note that after all entries have been written to an archive the
92 /// `finish` function needs to be called to finish writing the archive.
93 ///
94 /// # Errors
95 ///
96 /// This function will return an error for any intermittent I/O error which
97 /// occurs when either reading or writing.
98 ///
99 /// # Examples
100 ///
101 /// ```
102 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
103 /// #
104 /// use async_tar::{Builder, Header};
105 ///
106 /// let mut header = Header::new_gnu();
107 /// header.set_path("foo")?;
108 /// header.set_size(4);
109 /// header.set_cksum();
110 ///
111 /// let mut data: &[u8] = &[1, 2, 3, 4];
112 ///
113 /// let mut ar = Builder::new(Vec::new());
114 /// ar.append(&header, data).await?;
115 /// let data = ar.into_inner().await?;
116 /// #
117 /// # Ok(()) }) }
118 /// ```
119 pub async fn append<R: Read + Unpin + Send>(
120 &mut self,
121 header: &Header,
122 mut data: R,
123 ) -> io::Result<()> {
124 append(self.get_mut(), header, &mut data).await?;
125
126 Ok(())
127 }
128
129 /// Adds a new entry to this archive with the specified path.
130 ///
131 /// This function will set the specified path in the given header, which may
132 /// require appending a GNU long-name extension entry to the archive first.
133 /// The checksum for the header will be automatically updated via the
134 /// `set_cksum` method after setting the path. No other metadata in the
135 /// header will be modified.
136 ///
137 /// Then it will append the header, followed by contents of the stream
138 /// specified by `data`. To produce a valid archive the `size` field of
139 /// `header` must be the same as the length of the stream that's being
140 /// written.
141 ///
142 /// Note that this will not attempt to seek the archive to a valid position,
143 /// so if the archive is in the middle of a read or some other similar
144 /// operation then this may corrupt the archive.
145 ///
146 /// Also note that after all entries have been written to an archive the
147 /// `finish` function needs to be called to finish writing the archive.
148 ///
149 /// # Errors
150 ///
151 /// This function will return an error for any intermittent I/O error which
152 /// occurs when either reading or writing.
153 ///
154 /// # Examples
155 ///
156 /// ```
157 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
158 /// #
159 /// use async_tar::{Builder, Header};
160 ///
161 /// let mut header = Header::new_gnu();
162 /// header.set_size(4);
163 /// header.set_cksum();
164 ///
165 /// let mut data: &[u8] = &[1, 2, 3, 4];
166 ///
167 /// let mut ar = Builder::new(Vec::new());
168 /// ar.append_data(&mut header, "really/long/path/to/foo", data).await?;
169 /// let data = ar.into_inner().await?;
170 /// #
171 /// # Ok(()) }) }
172 /// ```
173 pub async fn append_data<P: AsRef<Path>, R: Read + Unpin + Send>(
174 &mut self,
175 header: &mut Header,
176 path: P,
177 data: R,
178 ) -> io::Result<()> {
179 prepare_header_path(self.get_mut(), header, path.as_ref()).await?;
180 header.set_cksum();
181 self.append(header, data).await?;
182
183 Ok(())
184 }
185
186 /// Adds a file on the local filesystem to this archive.
187 ///
188 /// This function will open the file specified by `path` and insert the file
189 /// into the archive with the appropriate metadata set, returning any I/O
190 /// error which occurs while writing. The path name for the file inside of
191 /// this archive will be the same as `path`, and it is required that the
192 /// path is a relative path.
193 ///
194 /// Note that this will not attempt to seek the archive to a valid position,
195 /// so if the archive is in the middle of a read or some other similar
196 /// operation then this may corrupt the archive.
197 ///
198 /// Also note that after all files have been written to an archive the
199 /// `finish` function needs to be called to finish writing the archive.
200 ///
201 /// # Examples
202 ///
203 /// ```no_run
204 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
205 /// #
206 /// use async_tar::Builder;
207 ///
208 /// let mut ar = Builder::new(Vec::new());
209 ///
210 /// ar.append_path("foo/bar.txt").await?;
211 /// #
212 /// # Ok(()) }) }
213 /// ```
214 pub async fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
215 let mode = self.mode;
216 let follow = self.follow;
217 append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow).await?;
218 Ok(())
219 }
220
221 /// Adds a file on the local filesystem to this archive under another name.
222 ///
223 /// This function will open the file specified by `path` and insert the file
224 /// into the archive as `name` with appropriate metadata set, returning any
225 /// I/O error which occurs while writing. The path name for the file inside
226 /// of this archive will be `name` is required to be a relative path.
227 ///
228 /// Note that this will not attempt to seek the archive to a valid position,
229 /// so if the archive is in the middle of a read or some other similar
230 /// operation then this may corrupt the archive.
231 ///
232 /// Also note that after all files have been written to an archive the
233 /// `finish` function needs to be called to finish writing the archive.
234 ///
235 /// # Examples
236 ///
237 /// ```no_run
238 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
239 /// #
240 /// use async_tar::Builder;
241 ///
242 /// let mut ar = Builder::new(Vec::new());
243 ///
244 /// // Insert the local file "foo/bar.txt" in the archive but with the name
245 /// // "bar/foo.txt".
246 /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").await?;
247 /// #
248 /// # Ok(()) }) }
249 /// ```
250 pub async fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
251 &mut self,
252 path: P,
253 name: N,
254 ) -> io::Result<()> {
255 let mode = self.mode;
256 let follow = self.follow;
257 append_path_with_name(
258 self.get_mut(),
259 path.as_ref(),
260 Some(name.as_ref()),
261 mode,
262 follow,
263 )
264 .await?;
265 Ok(())
266 }
267
268 /// Adds a file to this archive with the given path as the name of the file
269 /// in the archive.
270 ///
271 /// This will use the metadata of `file` to populate a `Header`, and it will
272 /// then append the file to the archive with the name `path`.
273 ///
274 /// Note that this will not attempt to seek the archive to a valid position,
275 /// so if the archive is in the middle of a read or some other similar
276 /// operation then this may corrupt the archive.
277 ///
278 /// Also note that after all files have been written to an archive the
279 /// `finish` function needs to be called to finish writing the archive.
280 ///
281 /// # Examples
282 ///
283 /// ```no_run
284 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
285 /// #
286 /// use async_std::fs::File;
287 /// use async_tar::Builder;
288 ///
289 /// let mut ar = Builder::new(Vec::new());
290 ///
291 /// // Open the file at one location, but insert it into the archive with a
292 /// // different name.
293 /// let mut f = File::open("foo/bar/baz.txt").await?;
294 /// ar.append_file("bar/baz.txt", &mut f).await?;
295 /// #
296 /// # Ok(()) }) }
297 /// ```
298 pub async fn append_file<P: AsRef<Path>>(
299 &mut self,
300 path: P,
301 file: &mut fs::File,
302 ) -> io::Result<()> {
303 let mode = self.mode;
304 append_file(self.get_mut(), path.as_ref(), file, mode).await?;
305 Ok(())
306 }
307
308 /// Adds a directory to this archive with the given path as the name of the
309 /// directory in the archive.
310 ///
311 /// This will use `stat` to populate a `Header`, and it will then append the
312 /// directory to the archive with the name `path`.
313 ///
314 /// Note that this will not attempt to seek the archive to a valid position,
315 /// so if the archive is in the middle of a read or some other similar
316 /// operation then this may corrupt the archive.
317 ///
318 /// Also note that after all files have been written to an archive the
319 /// `finish` function needs to be called to finish writing the archive.
320 ///
321 /// # Examples
322 ///
323 /// ```
324 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
325 /// #
326 /// use async_std::fs;
327 /// use async_tar::Builder;
328 ///
329 /// let mut ar = Builder::new(Vec::new());
330 ///
331 /// // Use the directory at one location, but insert it into the archive
332 /// // with a different name.
333 /// ar.append_dir("bardir", ".").await?;
334 /// #
335 /// # Ok(()) }) }
336 /// ```
337 pub async fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
338 where
339 P: AsRef<Path>,
340 Q: AsRef<Path>,
341 {
342 let mode = self.mode;
343 append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode).await?;
344 Ok(())
345 }
346
347 /// Adds a directory and all of its contents (recursively) to this archive
348 /// with the given path as the name of the directory in the archive.
349 ///
350 /// Note that this will not attempt to seek the archive to a valid position,
351 /// so if the archive is in the middle of a read or some other similar
352 /// operation then this may corrupt the archive.
353 ///
354 /// Also note that after all files have been written to an archive the
355 /// `finish` function needs to be called to finish writing the archive.
356 ///
357 /// # Examples
358 ///
359 /// ```
360 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
361 /// #
362 /// use async_std::fs;
363 /// use async_tar::Builder;
364 ///
365 /// let mut ar = Builder::new(Vec::new());
366 ///
367 /// // Use the directory at one location, but insert it into the archive
368 /// // with a different name.
369 /// ar.append_dir_all("bardir", ".").await?;
370 /// #
371 /// # Ok(()) }) }
372 /// ```
373 pub async fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
374 where
375 P: AsRef<Path>,
376 Q: AsRef<Path>,
377 {
378 let mode = self.mode;
379 let follow = self.follow;
380 append_dir_all(
381 self.get_mut(),
382 path.as_ref(),
383 src_path.as_ref(),
384 mode,
385 follow,
386 )
387 .await?;
388 Ok(())
389 }
390
391 /// Finish writing this archive, emitting the termination sections.
392 ///
393 /// This function should only be called when the archive has been written
394 /// entirely and if an I/O error happens the underlying object still needs
395 /// to be acquired.
396 ///
397 /// In most situations the `into_inner` method should be preferred.
398 pub async fn finish(&mut self) -> io::Result<()> {
399 if self.finished {
400 return Ok(());
401 }
402 self.finished = true;
403 self.get_mut().write_all(&[0; 1024]).await?;
404 Ok(())
405 }
406}
407
408async fn append(
409 mut dst: &mut (dyn Write + Unpin + Send),
410 header: &Header,
411 mut data: &mut (dyn Read + Unpin + Send),
412) -> io::Result<()> {
413 dst.write_all(header.as_bytes()).await?;
414 let len = io::copy(&mut data, &mut dst).await?;
415
416 // Pad with zeros if necessary.
417 let buf = [0; 512];
418 let remaining = 512 - (len % 512);
419 if remaining < 512 {
420 dst.write_all(&buf[..remaining as usize]).await?;
421 }
422
423 Ok(())
424}
425
426async fn append_path_with_name(
427 dst: &mut (dyn Write + Unpin + Sync + Send),
428 path: &Path,
429 name: Option<&Path>,
430 mode: HeaderMode,
431 follow: bool,
432) -> io::Result<()> {
433 let stat = if follow {
434 fs::metadata(path).await.map_err(|err| {
435 io::Error::new(
436 err.kind(),
437 format!("{} when getting metadata for {}", err, path.display()),
438 )
439 })?
440 } else {
441 fs::symlink_metadata(path).await.map_err(|err| {
442 io::Error::new(
443 err.kind(),
444 format!("{} when getting metadata for {}", err, path.display()),
445 )
446 })?
447 };
448 let ar_name = name.unwrap_or(path);
449 if stat.is_file() {
450 append_fs(
451 dst,
452 ar_name,
453 &stat,
454 &mut fs::File::open(path).await?,
455 mode,
456 None,
457 )
458 .await?;
459 Ok(())
460 } else if stat.is_dir() {
461 append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None).await?;
462 Ok(())
463 } else if stat.file_type().is_symlink() {
464 let link_name = fs::read_link(path).await?;
465 append_fs(
466 dst,
467 ar_name,
468 &stat,
469 &mut io::empty(),
470 mode,
471 Some(&link_name),
472 )
473 .await?;
474 Ok(())
475 } else {
476 Err(other(&format!("{} has unknown file type", path.display())))
477 }
478}
479
480async fn append_file(
481 dst: &mut (dyn Write + Unpin + Send + Sync),
482 path: &Path,
483 file: &mut fs::File,
484 mode: HeaderMode,
485) -> io::Result<()> {
486 let stat = file.metadata().await?;
487 append_fs(dst, path, &stat, file, mode, None).await?;
488 Ok(())
489}
490
491async fn append_dir(
492 dst: &mut (dyn Write + Unpin + Send + Sync),
493 path: &Path,
494 src_path: &Path,
495 mode: HeaderMode,
496) -> io::Result<()> {
497 let stat = fs::metadata(src_path).await?;
498 append_fs(dst, path, &stat, &mut io::empty(), mode, None).await?;
499 Ok(())
500}
501
502fn prepare_header(size: u64, entry_type: EntryType) -> Header {
503 let mut header = Header::new_gnu();
504 let name = b"././@LongLink";
505 header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
506 header.set_mode(0o644);
507 header.set_uid(0);
508 header.set_gid(0);
509 header.set_mtime(0);
510 // + 1 to be compliant with GNU tar
511 header.set_size(size + 1);
512 header.set_entry_type(entry_type);
513 header.set_cksum();
514 header
515}
516
517async fn prepare_header_path(
518 dst: &mut (dyn Write + Unpin + Send + Sync),
519 header: &mut Header,
520 path: &Path,
521) -> io::Result<()> {
522 // Try to encode the path directly in the header, but if it ends up not
523 // working (probably because it's too long) then try to use the GNU-specific
524 // long name extension by emitting an entry which indicates that it's the
525 // filename.
526 if let Err(e) = header.set_path(path) {
527 let data = path2bytes(path)?;
528 let max = header.as_old().name.len();
529 // Since e isn't specific enough to let us know the path is indeed too
530 // long, verify it first before using the extension.
531 if data.len() < max {
532 return Err(e);
533 }
534 let header2 = prepare_header(data.len() as u64, EntryType::GNULongName);
535 // null-terminated string
536 let mut data2 = data.chain(io::repeat(0).take(1));
537 append(dst, &header2, &mut data2).await?;
538 // Truncate the path to store in the header we're about to emit to
539 // ensure we've got something at least mentioned.
540 let path = bytes2path(Cow::Borrowed(&data[..max]))?;
541 header.set_path(&path)?;
542 }
543 Ok(())
544}
545
546async fn prepare_header_link(
547 dst: &mut (dyn Write + Unpin + Send + Sync),
548 header: &mut Header,
549 link_name: &Path,
550) -> io::Result<()> {
551 // Same as previous function but for linkname
552 if let Err(e) = header.set_link_name(link_name) {
553 let data = path2bytes(link_name)?;
554 if data.len() < header.as_old().linkname.len() {
555 return Err(e);
556 }
557 let header2 = prepare_header(data.len() as u64, EntryType::GNULongLink);
558 let mut data2 = data.chain(io::repeat(0).take(1));
559 append(dst, &header2, &mut data2).await?;
560 }
561 Ok(())
562}
563
564async fn append_fs(
565 dst: &mut (dyn Write + Unpin + Send + Sync),
566 path: &Path,
567 meta: &fs::Metadata,
568 read: &mut (dyn Read + Unpin + Sync + Send),
569 mode: HeaderMode,
570 link_name: Option<&Path>,
571) -> io::Result<()> {
572 let mut header = Header::new_gnu();
573
574 prepare_header_path(dst, &mut header, path).await?;
575 header.set_metadata_in_mode(meta, mode);
576 if let Some(link_name) = link_name {
577 prepare_header_link(dst, &mut header, link_name).await?;
578 }
579 header.set_cksum();
580 append(dst, &header, read).await?;
581
582 Ok(())
583}
584
585async fn append_dir_all(
586 dst: &mut (dyn Write + Unpin + Send + Sync),
587 path: &Path,
588 src_path: &Path,
589 mode: HeaderMode,
590 follow: bool,
591) -> io::Result<()> {
592 let mut stack = vec![(src_path.to_path_buf(), true, false)];
593 while let Some((src, is_dir, is_symlink)) = stack.pop() {
594 let dest = path.join(src.strip_prefix(src_path).unwrap());
595
596 // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
597 if is_dir || (is_symlink && follow && src.is_dir().await) {
598 let mut entries = fs::read_dir(&src).await?;
599 while let Some(entry) = entries.next().await {
600 let entry = entry?;
601 let file_type = entry.file_type().await?;
602 stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
603 }
604 if dest != Path::new("") {
605 append_dir(dst, &dest, &src, mode).await?;
606 }
607 } else if !follow && is_symlink {
608 let stat = fs::symlink_metadata(&src).await?;
609 let link_name = fs::read_link(&src).await?;
610 append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name)).await?;
611 } else {
612 append_file(dst, &dest, &mut fs::File::open(src).await?, mode).await?;
613 }
614 }
615 Ok(())
616}
617
618impl<W: Write + Unpin + Send + Sync> Drop for Builder<W> {
619 fn drop(&mut self) {
620 async_std::task::block_on(async move {
621 let _ = self.finish().await;
622 });
623 }
624}
625
626#[cfg(test)]
627mod tests {
628 use super::*;
629
630 assert_impl_all!(async_std::fs::File: Send, Sync);
631 assert_impl_all!(Builder<async_std::fs::File>: Send, Sync);
632}