1use std::{
13 collections::BTreeMap,
14 ffi::{OsStr, OsString},
15 fmt,
16 fs::File,
17 os::unix::prelude::{OsStrExt, OsStringExt},
18 path::PathBuf,
19 sync::Arc,
20};
21
22use anyhow::{Context, Result, bail, ensure};
23use bytes::{Bytes, BytesMut};
24use rustix::fs::makedev;
25use tar_core::{
26 EntryType, HEADER_SIZE, PaxExtensions,
27 parse::{ParseEvent, Parser},
28};
29use tokio::{
30 io::{AsyncRead, AsyncReadExt},
31 sync::mpsc,
32};
33
34use composefs::{
35 INLINE_CONTENT_MAX_V0, dumpfile,
36 fsverity::FsVerityHashValue,
37 repository::{ObjectStoreMethod, Repository},
38 shared_internals::IO_BUF_CAPACITY,
39 splitstream::{SplitStreamBuilder, SplitStreamData, SplitStreamReader},
40 tree::{LeafContent, RegularFile, Stat},
41};
42
43use crate::ImportStats;
44
45fn pax_mtime_nsec(pax: &[u8]) -> u32 {
55 for ext in PaxExtensions::new(pax).flatten() {
56 if ext.key_bytes() == b"mtime" {
57 let Ok(value) = ext.value() else { return 0 };
58 let Some(frac) = value.split_once('.').map(|(_, f)| f) else {
60 return 0;
61 };
62 let frac = if frac.len() >= 9 {
64 &frac[..9]
65 } else {
66 return frac
68 .parse::<u32>()
69 .ok()
70 .map_or(0, |v| v * 10u32.pow(9 - frac.len() as u32));
71 };
72 return frac.parse::<u32>().unwrap_or(0);
73 }
74 }
75 0
76}
77
78fn receive_and_finalize_object<ObjectID: FsVerityHashValue>(
82 rx: mpsc::Receiver<Bytes>,
83 size: u64,
84 repo: &Repository<ObjectID>,
85) -> Result<(ObjectID, ObjectStoreMethod)> {
86 use std::io::Write;
87
88 let tmpfile_fd = repo.create_object_tmpfile()?;
90 let mut tmpfile = std::io::BufWriter::with_capacity(IO_BUF_CAPACITY, File::from(tmpfile_fd));
91
92 let mut rx = rx;
94 while let Some(chunk) = rx.blocking_recv() {
95 tmpfile.write_all(&chunk)?;
96 }
97
98 let tmpfile = tmpfile.into_inner()?;
100
101 repo.finalize_object_tmpfile(tmpfile, size)
103}
104
105async fn stream_large_file<ObjectID: FsVerityHashValue>(
111 tx: mpsc::Sender<Bytes>,
112 handle: tokio::task::JoinHandle<Result<(ObjectID, ObjectStoreMethod)>>,
113 builder: &mut SplitStreamBuilder<ObjectID>,
114 buf: &mut BytesMut,
115 tar_stream: &mut (impl AsyncRead + Unpin),
116 actual_size: usize,
117 storage_size: usize,
118) -> Result<()> {
119 let from_buf = std::cmp::min(buf.len(), actual_size);
121 if from_buf > 0 && tx.send(buf.split_to(from_buf).freeze()).await.is_err() {
122 drop(tx);
124 return handle
125 .await?
126 .map(|_| ())
127 .context("Object write task failed");
128 }
129
130 let mut remaining = actual_size.checked_sub(from_buf).unwrap();
132 while remaining > 0 {
133 buf.reserve(std::cmp::min(remaining, IO_BUF_CAPACITY));
135 let n = tar_stream.read_buf(buf).await?;
136 if n == 0 {
137 bail!("unexpected EOF reading tar entry");
138 }
139 let chunk_size = std::cmp::min(remaining, buf.len());
140 if tx.send(buf.split_to(chunk_size).freeze()).await.is_err() {
141 drop(tx);
146 return handle
147 .await?
148 .map(|_| ())
149 .context("Object write task failed");
150 }
151 remaining = remaining.checked_sub(chunk_size).unwrap();
153 }
154 drop(tx);
155
156 builder.push_external(handle, actual_size as u64);
157
158 let padding_size = storage_size.checked_sub(actual_size).unwrap();
161 if padding_size > 0 {
162 let pad_from_buf = std::cmp::min(buf.len(), padding_size);
163 if pad_from_buf > 0 {
164 builder.push_inline(&buf.split_to(pad_from_buf));
165 }
166 let stream_padding = padding_size - pad_from_buf;
167 if stream_padding > 0 {
168 buf.reserve(stream_padding);
169 while buf.len() < stream_padding {
170 let n = tar_stream.read_buf(buf).await?;
171 if n == 0 {
172 bail!("unexpected EOF reading tar padding");
173 }
174 }
175 builder.push_inline(&buf.split_to(stream_padding));
176 }
177 }
178
179 Ok(())
180}
181
182pub async fn split_async<ObjectID: FsVerityHashValue>(
202 mut tar_stream: impl AsyncRead + Unpin,
203 repo: Arc<Repository<ObjectID>>,
204 content_type: u64,
205) -> Result<(ObjectID, ImportStats)> {
206 let semaphore = repo.write_semaphore();
207 let mut builder = SplitStreamBuilder::new(repo.clone(), content_type)?;
208 let mut parser = Parser::with_defaults();
209 let mut buf = BytesMut::with_capacity(IO_BUF_CAPACITY);
210 let mut need = HEADER_SIZE;
211
212 loop {
213 while buf.len() < need {
215 buf.reserve(need - buf.len());
216 let n = tar_stream.read_buf(&mut buf).await?;
217 if n == 0 {
218 if buf.is_empty() {
219 let (object_id, ss_stats) = builder.finish().await?;
221 return Ok((object_id, ImportStats::from_split_stream_stats(&ss_stats)));
222 }
223 bail!("unexpected EOF in tar stream");
224 }
225 }
226
227 match parser.parse(&buf)? {
228 ParseEvent::NeedData { min_bytes } => {
229 need = min_bytes;
230 continue;
231 }
232 ParseEvent::GlobalExtensions { consumed, .. } => {
233 builder.push_inline(&buf.split_to(consumed));
234 need = HEADER_SIZE;
235 continue;
236 }
237 ParseEvent::End { consumed } => {
238 builder.push_inline(&buf.split_to(consumed));
239 if !buf.is_empty() {
249 builder.push_inline(&buf.split());
250 }
251 loop {
252 buf.reserve(IO_BUF_CAPACITY);
253 let n = tar_stream.read_buf(&mut buf).await?;
254 if n == 0 {
255 break;
256 }
257 builder.push_inline(&buf.split());
258 }
259 break;
260 }
261 ParseEvent::SparseEntry { .. } => {
262 bail!("sparse tar entries are not supported");
263 }
264 ParseEvent::Entry { consumed, entry } => {
265 let actual_size = entry.size as usize;
267 let is_large_file =
268 entry.entry_type.is_file() && actual_size > INLINE_CONTENT_MAX_V0;
269
270 builder.push_inline(&buf.split_to(consumed));
272
273 let storage_size = actual_size.next_multiple_of(512);
274
275 if is_large_file {
276 let permit = semaphore.clone().acquire_owned().await?;
277 let (tx, rx) = mpsc::channel::<Bytes>(4);
278 let repo_clone = repo.clone();
279 let handle = tokio::task::spawn_blocking(move || {
280 let result =
281 receive_and_finalize_object(rx, actual_size as u64, &repo_clone);
282 drop(permit);
283 result
284 });
285
286 stream_large_file(
287 tx,
288 handle,
289 &mut builder,
290 &mut buf,
291 &mut tar_stream,
292 actual_size,
293 storage_size,
294 )
295 .await?;
296 } else {
297 if storage_size > 0 {
299 let from_buf = std::cmp::min(buf.len(), storage_size);
301 if from_buf > 0 {
302 builder.push_inline(&buf.split_to(from_buf));
303 }
304 let mut remaining = storage_size.checked_sub(from_buf).unwrap();
306 while remaining > 0 {
307 buf.reserve(std::cmp::min(remaining, IO_BUF_CAPACITY));
308 let n = tar_stream.read_buf(&mut buf).await?;
309 if n == 0 {
310 bail!("unexpected EOF reading tar entry");
311 }
312 let n = std::cmp::min(remaining, buf.len());
313 builder.push_inline(&buf.split_to(n));
314 remaining = remaining.checked_sub(n).unwrap();
316 }
317 }
318 }
319
320 need = HEADER_SIZE;
321 }
322 }
323 }
324
325 let (object_id, ss_stats) = builder.finish().await?;
326 Ok((object_id, ImportStats::from_split_stream_stats(&ss_stats)))
327}
328
329#[derive(Debug)]
334pub enum TarItem<ObjectID: FsVerityHashValue> {
335 Directory,
337 Leaf(LeafContent<ObjectID>),
339 Hardlink(OsString),
341}
342
343#[derive(Debug)]
348pub struct TarEntry<ObjectID: FsVerityHashValue> {
349 pub path: PathBuf,
351 pub stat: Stat,
353 pub item: TarItem<ObjectID>,
355}
356
357impl<ObjectID: FsVerityHashValue> fmt::Display for TarEntry<ObjectID> {
358 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
359 match self.item {
360 TarItem::Hardlink(ref target) => dumpfile::write_hardlink(fmt, &self.path, target),
361 TarItem::Directory => dumpfile::write_directory(fmt, &self.path, &self.stat, 1),
362 TarItem::Leaf(ref content) => {
363 dumpfile::write_leaf(fmt, &self.path, &self.stat, content, 1)
364 }
365 }
366 }
367}
368
369fn make_absolute_path(tar_path: &[u8]) -> PathBuf {
371 let tar_path = tar_path.strip_prefix(b"/").unwrap_or(tar_path);
372 let mut path = Vec::with_capacity(1 + tar_path.len());
373 path.push(b'/');
374 path.extend(tar_path);
375 while path.last() == Some(&b'/') && path.len() > 1 {
376 path.pop();
377 }
378 if path == b"/" {
380 path.clear();
381 }
382 PathBuf::from(OsString::from_vec(path))
383}
384
385pub fn get_entry<ObjectID: FsVerityHashValue>(
394 reader: &mut SplitStreamReader<ObjectID>,
395) -> Result<Option<TarEntry<ObjectID>>> {
396 let mut parser = Parser::with_defaults();
397 let mut header_buf: Vec<u8> = Vec::new();
398 let mut block = [0u8; 512];
399
400 loop {
403 if !reader.read_inline_exact(&mut block)? {
404 return Ok(None);
405 }
406 header_buf.extend_from_slice(&block);
407
408 loop {
410 match parser.parse(&header_buf)? {
411 ParseEvent::NeedData { .. } => {
412 break;
414 }
415 ParseEvent::GlobalExtensions { consumed, .. } => {
416 header_buf.drain(..consumed);
418 continue;
419 }
420 ParseEvent::End { .. } => {
421 return Ok(None);
422 }
423 ParseEvent::Entry { entry, .. } => {
424 let size = entry.size;
425 let stored_size = size.next_multiple_of(512);
426
427 let item = match reader.read_exact(size as usize, stored_size as usize)? {
428 SplitStreamData::External(id) => match entry.entry_type {
429 EntryType::Regular | EntryType::Continuous => {
430 ensure!(
431 size as usize > INLINE_CONTENT_MAX_V0,
432 "Splitstream incorrectly stored a small ({size} byte) file external"
433 );
434 TarItem::Leaf(LeafContent::Regular(RegularFile::External(id, size)))
435 }
436 _ => bail!(
437 "Unsupported external-chunked entry {:?} {id:?}",
438 entry.entry_type
439 ),
440 },
441 SplitStreamData::Inline(content) => match entry.entry_type {
442 EntryType::Directory => TarItem::Directory,
443 EntryType::Regular | EntryType::Continuous => {
444 ensure!(
445 content.len() <= INLINE_CONTENT_MAX_V0,
446 "Splitstream incorrectly stored a large ({} byte) file inline",
447 content.len()
448 );
449 TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(content)))
450 }
451 EntryType::Link => TarItem::Hardlink({
452 let link_target = entry.link_target.as_deref().unwrap_or_default();
453 make_absolute_path(link_target).into_os_string()
454 }),
455 EntryType::Symlink => TarItem::Leaf(LeafContent::Symlink({
456 let link_target = entry.link_target.as_deref().unwrap_or_default();
457 OsStr::from_bytes(link_target).into()
458 })),
459 EntryType::Block => TarItem::Leaf(LeafContent::BlockDevice(
460 match (entry.dev_major, entry.dev_minor) {
461 (Some(major), Some(minor)) => makedev(major, minor),
462 _ => bail!("Device entry without device numbers?"),
463 },
464 )),
465 EntryType::Char => TarItem::Leaf(LeafContent::CharacterDevice(match (
466 entry.dev_major,
467 entry.dev_minor,
468 ) {
469 (Some(major), Some(minor)) => makedev(major, minor),
470 _ => bail!("Device entry without device numbers?"),
471 })),
472 EntryType::Fifo => TarItem::Leaf(LeafContent::Fifo),
473 _ => {
474 bail!("Unsupported entry type {:?}", entry.entry_type);
475 }
476 },
477 };
478
479 let xattrs: BTreeMap<_, _> = entry
480 .xattrs
481 .into_iter()
482 .map(|(k, v)| (Box::from(OsStr::from_bytes(&k)), Box::from(v.as_ref())))
483 .collect();
484
485 return Ok(Some(TarEntry {
486 path: make_absolute_path(&entry.path),
487 stat: Stat {
488 st_uid: entry.uid as u32,
489 st_gid: entry.gid as u32,
490 st_mode: entry.mode,
491 st_mtim_sec: entry.mtime as i64,
492 st_mtim_nsec: entry.pax.map_or(0, pax_mtime_nsec),
493 xattrs,
494 },
495 item,
496 }));
497 }
498 ParseEvent::SparseEntry { .. } => {
499 bail!("Sparse tar entries are not supported");
500 }
501 }
502 }
503 }
504}
505
506#[cfg(test)]
507mod tests {
508 use crate::TAR_LAYER_CONTENT_TYPE;
509
510 use super::*;
511 use composefs::{
512 fsverity::Sha256HashValue,
513 generic_tree::LeafContent,
514 repository::{Repository, RepositoryConfig},
515 splitstream::SplitStreamReader,
516 };
517 use std::{io::Read, path::Path, sync::Arc};
518 use tar::Builder;
519
520 use once_cell::sync::Lazy;
521 use std::sync::Mutex;
522
523 static TEST_TEMPDIRS: Lazy<Mutex<Vec<tempfile::TempDir>>> =
524 Lazy::new(|| Mutex::new(Vec::new()));
525
526 pub(crate) fn create_test_repository() -> Result<Arc<Repository<Sha256HashValue>>> {
527 let tempdir = tempfile::TempDir::new().unwrap();
528 let repo_path = tempdir.path().join("repo");
529 let (repo, _) = Repository::init_path(
530 rustix::fs::CWD,
531 &repo_path,
532 RepositoryConfig::default().set_insecure(),
533 )?;
534
535 {
537 let mut guard = TEST_TEMPDIRS.lock().unwrap();
538 guard.push(tempdir);
539 }
540
541 Ok(Arc::new(repo))
542 }
543
544 fn append_file(
546 builder: &mut Builder<&mut Vec<u8>>,
547 path: &str,
548 content: &[u8],
549 ) -> Result<tar::Header> {
550 let mut header = tar::Header::new_gnu();
551 header.set_mode(0o644);
552 header.set_uid(1000);
553 header.set_gid(1000);
554 header.set_mtime(1234567890);
555 header.set_size(content.len() as u64);
556 header.set_entry_type(tar::EntryType::Regular);
557 builder.append_data(&mut header, path, content)?;
558 Ok(header)
559 }
560
561 async fn read_all_via_splitstream(tar_data: Vec<u8>) -> Result<Vec<TarEntry<Sha256HashValue>>> {
563 let repo = create_test_repository()?;
564
565 let (object_id, _stats) =
566 split_async(&tar_data[..], repo.clone(), TAR_LAYER_CONTENT_TYPE).await?;
567
568 let mut reader: SplitStreamReader<Sha256HashValue> = SplitStreamReader::new(
569 repo.open_object(&object_id)?.into(),
570 Some(TAR_LAYER_CONTENT_TYPE),
571 )?;
572
573 let mut entries = Vec::new();
574 while let Some(entry) = get_entry(&mut reader)? {
575 entries.push(entry);
576 }
577 Ok(entries)
578 }
579
580 #[test]
581 fn test_pax_mtime_nsec_parsing() {
582 let pax = b"30 mtime=1234567890.123456789\n";
585 assert_eq!(pax_mtime_nsec(pax), 123_456_789, "9-digit fraction");
586
587 let pax = b"22 mtime=1234567890.5\n";
590 assert_eq!(pax_mtime_nsec(pax), 500_000_000, "1-digit fraction");
591
592 let pax = b"30 mtime=1234567890.000000001\n";
595 assert_eq!(pax_mtime_nsec(pax), 1, "trailing single non-zero digit");
596
597 let pax = b"31 mtime=1234567890.1234567899\n";
600 assert_eq!(
601 pax_mtime_nsec(pax),
602 123_456_789,
603 "10-digit fraction truncated"
604 );
605
606 let pax = b"20 mtime=1234567890\n";
609 assert_eq!(pax_mtime_nsec(pax), 0, "no fractional part");
610
611 let pax = b"16 path=foo.txt\n";
614 assert_eq!(pax_mtime_nsec(pax), 0, "no mtime key");
615
616 assert_eq!(pax_mtime_nsec(b""), 0, "empty pax");
618 }
619
620 #[test]
621 fn test_make_absolute_path() {
622 let cases: &[(&[u8], &str)] = &[
623 (b"foo/bar", "/foo/bar"),
624 (b"/foo/bar", "/foo/bar"),
625 (b"dir/", "/dir"),
626 (b"/dir/", "/dir"),
627 (b"a", "/a"),
628 (b"/a", "/a"),
629 (
630 b"usr/lib/python3/dist-packages/foo",
631 "/usr/lib/python3/dist-packages/foo",
632 ),
633 (b"dir//", "/dir"),
635 (b"file.txt", "/file.txt"),
637 (b"a/b/c/", "/a/b/c"),
639 (b"", ""),
641 (b"/", ""),
643 ];
644 for (input, expected) in cases {
645 assert_eq!(
646 make_absolute_path(input),
647 PathBuf::from(expected),
648 "make_absolute_path({:?})",
649 String::from_utf8_lossy(input),
650 );
651 }
652 }
653
654 #[tokio::test]
655 async fn test_empty_tar() {
656 let mut tar_data = Vec::new();
657 {
658 let mut builder = Builder::new(&mut tar_data);
659 builder.finish().unwrap();
660 }
661
662 let repo = create_test_repository().unwrap();
663
664 let (object_id, stats) = split_async(&tar_data[..], repo.clone(), TAR_LAYER_CONTENT_TYPE)
665 .await
666 .unwrap();
667 assert_eq!(
668 stats.objects_copied, 0,
669 "empty tar should have no external objects"
670 );
671
672 let mut reader: SplitStreamReader<Sha256HashValue> = SplitStreamReader::new(
673 repo.open_object(&object_id).unwrap().into(),
674 Some(TAR_LAYER_CONTENT_TYPE),
675 )
676 .unwrap();
677 assert!(get_entry(&mut reader).unwrap().is_none());
678 }
679
680 #[test]
684 fn test_no_record_padding_roundtrip() {
685 let mut tar_data = Vec::new();
686 {
687 let mut builder = Builder::new(&mut tar_data);
688 append_file(&mut builder, "hello.txt", b"hello world").unwrap();
689 builder.finish().unwrap();
690 }
691 const GNU_RECORD_SIZE: usize = 20 * 512;
693 assert_ne!(
694 tar_data.len() % GNU_RECORD_SIZE,
695 0,
696 "expected tar without GNU record padding for this test"
697 );
698 roundtrip_tar_bytes(&tar_data);
699 }
700
701 #[test]
707 fn test_gnu_record_padding_roundtrip() {
708 const GNU_RECORD_SIZE: usize = 20 * 512; let mut tar_data = Vec::new();
711 {
712 let mut builder = Builder::new(&mut tar_data);
713 append_file(&mut builder, "hello.txt", b"hello world").unwrap();
714 builder.finish().unwrap();
715 }
716
717 let remainder = tar_data.len() % GNU_RECORD_SIZE;
719 if remainder != 0 {
720 tar_data.resize(tar_data.len() + (GNU_RECORD_SIZE - remainder), 0);
721 }
722
723 assert_eq!(tar_data.len() % GNU_RECORD_SIZE, 0);
725
726 roundtrip_tar_bytes(&tar_data);
728 }
729
730 #[tokio::test]
731 async fn test_single_small_file() {
732 let mut tar_data = Vec::new();
733 let original_header = {
734 let mut builder = Builder::new(&mut tar_data);
735
736 let content = b"Hello, World!";
738 let header = append_file(&mut builder, "hello.txt", content).unwrap();
739
740 builder.finish().unwrap();
741 header
742 };
743
744 let repo = create_test_repository().unwrap();
745
746 let (object_id, stats) = split_async(&tar_data[..], repo.clone(), TAR_LAYER_CONTENT_TYPE)
747 .await
748 .unwrap();
749 assert_eq!(
750 stats.objects_copied, 0,
751 "small file should be inline, not external"
752 );
753
754 let mut reader: SplitStreamReader<Sha256HashValue> = SplitStreamReader::new(
755 repo.open_object(&object_id).unwrap().into(),
756 Some(TAR_LAYER_CONTENT_TYPE),
757 )
758 .unwrap();
759
760 let entry = get_entry(&mut reader)
762 .unwrap()
763 .expect("Should have one entry");
764 assert_eq!(entry.path, PathBuf::from("/hello.txt"));
765 assert!(matches!(
766 entry.item,
767 TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(_)))
768 ));
769
770 assert_header_stat_equal(&original_header, &entry.stat, "hello.txt");
772
773 if let TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(ref content))) = entry.item {
774 assert_eq!(content.as_ref(), b"Hello, World!");
775 }
776
777 assert!(get_entry(&mut reader).unwrap().is_none());
779 }
780
781 #[tokio::test]
782 async fn test_inline_threshold() {
783 let mut tar_data = Vec::new();
784 let (threshold_header, over_threshold_header) = {
785 let mut builder = Builder::new(&mut tar_data);
786
787 let threshold_content = vec![b'X'; INLINE_CONTENT_MAX_V0];
789 let header1 =
790 append_file(&mut builder, "threshold_file.txt", &threshold_content).unwrap();
791
792 let over_threshold_content = vec![b'Y'; INLINE_CONTENT_MAX_V0 + 1];
794 let header2 = append_file(
795 &mut builder,
796 "over_threshold_file.txt",
797 &over_threshold_content,
798 )
799 .unwrap();
800
801 builder.finish().unwrap();
802 (header1, header2)
803 };
804
805 let repo = create_test_repository().unwrap();
806
807 let (object_id, stats) = split_async(&tar_data[..], repo.clone(), TAR_LAYER_CONTENT_TYPE)
808 .await
809 .unwrap();
810 assert_eq!(
811 stats.objects_copied, 1,
812 "one file over threshold should be external"
813 );
814
815 let mut reader: SplitStreamReader<Sha256HashValue> = SplitStreamReader::new(
816 repo.open_object(&object_id).unwrap().into(),
817 Some(TAR_LAYER_CONTENT_TYPE),
818 )
819 .unwrap();
820
821 let mut object_refs = Vec::new();
822 reader
823 .get_object_refs(|id| object_refs.push(id.clone()))
824 .unwrap();
825 assert_eq!(
826 object_refs.len(),
827 1,
828 "should have exactly 1 external object ref"
829 );
830
831 let mut entries = Vec::new();
832
833 while let Some(entry) = get_entry(&mut reader).unwrap() {
834 entries.push(entry);
835 }
836
837 assert_eq!(entries.len(), 2);
838
839 assert_eq!(entries[0].path, PathBuf::from("/threshold_file.txt"));
841 assert_header_stat_equal(&threshold_header, &entries[0].stat, "threshold_file.txt");
842 if let TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(ref content))) =
843 entries[0].item
844 {
845 assert_eq!(content.len(), INLINE_CONTENT_MAX_V0);
846 assert_eq!(content[0], b'X');
847 } else {
848 panic!("Expected inline regular file for threshold file");
849 }
850
851 assert_eq!(entries[1].path, PathBuf::from("/over_threshold_file.txt"));
853 assert_header_stat_equal(
854 &over_threshold_header,
855 &entries[1].stat,
856 "over_threshold_file.txt",
857 );
858 if let TarItem::Leaf(LeafContent::Regular(RegularFile::External(_, size))) = entries[1].item
859 {
860 assert_eq!(size, (INLINE_CONTENT_MAX_V0 + 1) as u64);
861 } else {
862 panic!("Expected external regular file for over-threshold file");
863 }
864 }
865
866 #[tokio::test]
867 async fn test_round_trip_simple() {
868 let mut original_tar = Vec::new();
870 let (small_header, large_header) = {
871 let mut builder = Builder::new(&mut original_tar);
872
873 let small_content = b"Small file content";
875 let header1 = append_file(&mut builder, "small.txt", small_content).unwrap();
876
877 let large_content = vec![b'L'; INLINE_CONTENT_MAX_V0 + 100];
879 let header2 = append_file(&mut builder, "large.txt", &large_content).unwrap();
880
881 builder.finish().unwrap();
882 (header1, header2)
883 };
884
885 let repo = create_test_repository().unwrap();
886
887 let (object_id, stats) =
888 split_async(&original_tar[..], repo.clone(), TAR_LAYER_CONTENT_TYPE)
889 .await
890 .unwrap();
891 assert_eq!(
892 stats.objects_copied, 1,
893 "only the large file should be external"
894 );
895
896 let mut reader: SplitStreamReader<Sha256HashValue> = SplitStreamReader::new(
898 repo.open_object(&object_id).unwrap().into(),
899 Some(TAR_LAYER_CONTENT_TYPE),
900 )
901 .unwrap();
902
903 let mut object_refs = Vec::new();
904 reader
905 .get_object_refs(|id| object_refs.push(id.clone()))
906 .unwrap();
907 assert_eq!(
908 object_refs.len(),
909 1,
910 "should have exactly 1 external object ref"
911 );
912
913 let mut entries = Vec::new();
914
915 while let Some(entry) = get_entry(&mut reader).unwrap() {
916 entries.push(entry);
917 }
918
919 assert_eq!(entries.len(), 2, "Should have exactly 2 entries");
920
921 assert_eq!(entries[0].path, PathBuf::from("/small.txt"));
923 assert_header_stat_equal(&small_header, &entries[0].stat, "small.txt");
924
925 if let TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(ref content))) =
926 entries[0].item
927 {
928 assert_eq!(content.as_ref(), b"Small file content");
929 } else {
930 panic!("Expected inline regular file for small.txt");
931 }
932
933 assert_eq!(entries[1].path, PathBuf::from("/large.txt"));
935 assert_header_stat_equal(&large_header, &entries[1].stat, "large.txt");
936
937 if let TarItem::Leaf(LeafContent::Regular(RegularFile::External(ref id, size))) =
938 entries[1].item
939 {
940 assert_eq!(size, (INLINE_CONTENT_MAX_V0 + 100) as u64);
941 use std::io::Read;
943 let mut external_data = Vec::new();
944 std::fs::File::from(repo.open_object(id).unwrap())
945 .read_to_end(&mut external_data)
946 .unwrap();
947 let expected_content = vec![b'L'; INLINE_CONTENT_MAX_V0 + 100];
948 assert_eq!(
949 external_data, expected_content,
950 "External file content should match"
951 );
952 } else {
953 panic!("Expected external regular file for large.txt");
954 }
955 }
956
957 #[tokio::test]
958 async fn test_special_filename_cases() {
959 let mut tar_data = Vec::new();
960 {
961 let mut builder = Builder::new(&mut tar_data);
962
963 let content1 = b"Special chars content";
965 append_file(&mut builder, "file-with_special.chars@123", content1).unwrap();
966
967 let long_name = "a".repeat(100);
969 let content2 = b"Long filename content";
970 append_file(&mut builder, &long_name, content2).unwrap();
971
972 builder.finish().unwrap();
973 };
974
975 let entries = read_all_via_splitstream(tar_data).await.unwrap();
976 assert_eq!(entries.len(), 2);
977
978 assert_eq!(
980 entries[0].path,
981 PathBuf::from("/file-with_special.chars@123")
982 );
983 assert_eq!(
984 entries[0].path.file_name().unwrap(),
985 "file-with_special.chars@123"
986 );
987
988 let expected_long_path = format!("/{}", "a".repeat(100));
990 assert_eq!(entries[1].path, PathBuf::from(expected_long_path));
991 assert_eq!(entries[1].path.file_name().unwrap(), &*"a".repeat(100));
992 }
993
994 #[tokio::test]
995 async fn test_gnu_long_filename_reproduction() {
996 let very_long_path = format!(
998 "very/long/path/that/exceeds/the/normal/tar/header/limit/{}",
999 "x".repeat(120)
1000 );
1001 let content = b"Content for very long path";
1002
1003 let mut tar_data = Vec::new();
1005 {
1006 let mut builder = Builder::new(&mut tar_data);
1007 append_file(&mut builder, &very_long_path, content).unwrap();
1008 builder.finish().unwrap();
1009 };
1010
1011 let entries = read_all_via_splitstream(tar_data).await.unwrap();
1012 assert_eq!(entries.len(), 1);
1013 let abspath = format!("/{very_long_path}");
1014 assert_eq!(entries[0].path, Path::new(&abspath));
1015 }
1016
1017 #[tokio::test]
1018 async fn test_gnu_longlink() {
1019 let very_long_path = format!(
1020 "very/long/path/that/exceeds/the/normal/tar/header/limit/{}",
1021 "x".repeat(120)
1022 );
1023
1024 let mut tar_data = Vec::new();
1026 {
1027 let mut builder = Builder::new(&mut tar_data);
1028 let mut header = tar::Header::new_gnu();
1029 header.set_mode(0o777);
1030 header.set_entry_type(tar::EntryType::Symlink);
1031 header.set_size(0);
1032 header.set_uid(0);
1033 header.set_gid(0);
1034 builder
1035 .append_link(&mut header, "long-symlink", &very_long_path)
1036 .unwrap();
1037 builder.finish().unwrap();
1038 };
1039
1040 let entries = read_all_via_splitstream(tar_data).await.unwrap();
1041 assert_eq!(entries.len(), 1);
1042 match &entries[0].item {
1043 TarItem::Leaf(LeafContent::Symlink(target)) => {
1044 assert_eq!(&**target, OsStr::new(&very_long_path));
1045 }
1046 _ => unreachable!(),
1047 };
1048 }
1049
1050 fn assert_header_stat_equal(header: &tar::Header, stat: &Stat, msg_prefix: &str) {
1052 assert_eq!(
1053 header.mode().unwrap(),
1054 stat.st_mode,
1055 "{msg_prefix}: mode mismatch"
1056 );
1057 assert_eq!(
1058 header.uid().unwrap() as u32,
1059 stat.st_uid,
1060 "{msg_prefix}: uid mismatch"
1061 );
1062 assert_eq!(
1063 header.gid().unwrap() as u32,
1064 stat.st_gid,
1065 "{msg_prefix}: gid mismatch"
1066 );
1067 assert_eq!(
1068 header.mtime().unwrap() as i64,
1069 stat.st_mtim_sec,
1070 "{msg_prefix}: mtime mismatch"
1071 );
1072 }
1073
1074 #[test]
1078 #[ignore]
1079 fn bench_tar_split() {
1080 use std::time::Instant;
1081
1082 const NUM_FILES: usize = 10000;
1084 const FILE_SIZE: usize = 200 * 1024; const ITERATIONS: usize = 3;
1086
1087 println!("\n=== Tar Split Benchmark ===");
1088 println!(
1089 "Configuration: {} files of {}KB each, {} iterations",
1090 NUM_FILES,
1091 FILE_SIZE / 1024,
1092 ITERATIONS
1093 );
1094
1095 fn generate_test_data(size: usize, seed: u8) -> Vec<u8> {
1097 (0..size)
1098 .map(|i| ((i as u8).wrapping_add(seed)).wrapping_mul(17))
1099 .collect()
1100 }
1101
1102 let mut tar_data = Vec::new();
1104 {
1105 let mut builder = Builder::new(&mut tar_data);
1106 for i in 0..NUM_FILES {
1107 let content = generate_test_data(FILE_SIZE, i as u8);
1108 let filename = format!("file_{:04}.bin", i);
1109 append_file(&mut builder, &filename, &content).unwrap();
1110 }
1111 builder.finish().unwrap();
1112 }
1113
1114 let tar_size = tar_data.len();
1115 println!(
1116 "Tar archive size: {} bytes ({:.2} MB)",
1117 tar_size,
1118 tar_size as f64 / (1024.0 * 1024.0)
1119 );
1120
1121 let rt = tokio::runtime::Builder::new_multi_thread()
1122 .enable_all()
1123 .build()
1124 .unwrap();
1125
1126 let mut times = Vec::with_capacity(ITERATIONS);
1127 for i in 0..ITERATIONS {
1128 let repo = create_test_repository().unwrap();
1129 let tar_data_clone = tar_data.clone();
1130
1131 let start = Instant::now();
1132 rt.block_on(async {
1133 split_async(&tar_data_clone[..], repo, TAR_LAYER_CONTENT_TYPE)
1134 .await
1135 .map(|(id, _stats)| id)
1136 })
1137 .unwrap();
1138 let elapsed = start.elapsed();
1139 times.push(elapsed);
1140 println!("Iteration {}: {:?}", i + 1, elapsed);
1141 }
1142
1143 let total: std::time::Duration = times.iter().sum();
1144 let avg = total / ITERATIONS as u32;
1145 println!("\n=== Summary ===");
1146 println!(
1147 "Average: {:?} ({:.2} MB/s)",
1148 avg,
1149 (tar_size as f64 / (1024.0 * 1024.0)) / avg.as_secs_f64()
1150 );
1151 }
1152
1153 #[tokio::test]
1155 async fn test_split_streaming_roundtrip() {
1156 let mut tar_data = Vec::new();
1158 {
1159 let mut builder = Builder::new(&mut tar_data);
1160
1161 let small_content = b"Small file content";
1163 append_file(&mut builder, "small.txt", small_content).unwrap();
1164
1165 let large_content = vec![b'L'; INLINE_CONTENT_MAX_V0 + 100];
1167 append_file(&mut builder, "large.txt", &large_content).unwrap();
1168
1169 let small2_content = b"Another small file";
1171 append_file(&mut builder, "small2.txt", small2_content).unwrap();
1172
1173 builder.finish().unwrap();
1174 }
1175
1176 let repo = create_test_repository().unwrap();
1177
1178 let (object_id, stats) = split_async(&tar_data[..], repo.clone(), TAR_LAYER_CONTENT_TYPE)
1180 .await
1181 .unwrap();
1182 assert_eq!(
1183 stats.objects_copied, 1,
1184 "only the large file should be external"
1185 );
1186
1187 let mut reader: SplitStreamReader<Sha256HashValue> = SplitStreamReader::new(
1189 repo.open_object(&object_id).unwrap().into(),
1190 Some(TAR_LAYER_CONTENT_TYPE),
1191 )
1192 .unwrap();
1193
1194 let mut object_refs = Vec::new();
1195 reader
1196 .get_object_refs(|id| object_refs.push(id.clone()))
1197 .unwrap();
1198 assert_eq!(
1199 object_refs.len(),
1200 1,
1201 "should have exactly 1 external object ref"
1202 );
1203
1204 let mut entries = Vec::new();
1205 while let Some(entry) = get_entry(&mut reader).unwrap() {
1206 entries.push(entry);
1207 }
1208
1209 assert_eq!(entries.len(), 3, "Should have 3 entries");
1210
1211 assert_eq!(entries[0].path, PathBuf::from("/small.txt"));
1213 if let TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(ref content))) =
1214 entries[0].item
1215 {
1216 assert_eq!(content.as_ref(), b"Small file content");
1217 } else {
1218 panic!("Expected inline regular file for small.txt");
1219 }
1220
1221 assert_eq!(entries[1].path, PathBuf::from("/large.txt"));
1223 if let TarItem::Leaf(LeafContent::Regular(RegularFile::External(ref id, size))) =
1224 entries[1].item
1225 {
1226 assert_eq!(size, (INLINE_CONTENT_MAX_V0 + 100) as u64);
1227 let mut external_data = Vec::new();
1229 std::fs::File::from(repo.open_object(id).unwrap())
1230 .read_to_end(&mut external_data)
1231 .unwrap();
1232 let expected_content = vec![b'L'; INLINE_CONTENT_MAX_V0 + 100];
1233 assert_eq!(
1234 external_data, expected_content,
1235 "External file content should match"
1236 );
1237 } else {
1238 panic!("Expected external regular file for large.txt");
1239 }
1240
1241 assert_eq!(entries[2].path, PathBuf::from("/small2.txt"));
1243 if let TarItem::Leaf(LeafContent::Regular(RegularFile::Inline(ref content))) =
1244 entries[2].item
1245 {
1246 assert_eq!(content.as_ref(), b"Another small file");
1247 } else {
1248 panic!("Expected inline regular file for small2.txt");
1249 }
1250 }
1251
1252 #[tokio::test]
1254 async fn test_split_streaming_multiple_large_files() {
1255 let mut tar_data = Vec::new();
1256 {
1257 let mut builder = Builder::new(&mut tar_data);
1258
1259 for i in 0..3 {
1261 let content = vec![(i + 0x41) as u8; INLINE_CONTENT_MAX_V0 + 1000]; let filename = format!("file{}.bin", i);
1263 append_file(&mut builder, &filename, &content).unwrap();
1264 }
1265
1266 builder.finish().unwrap();
1267 }
1268
1269 let repo = create_test_repository().unwrap();
1270
1271 let (object_id, stats) = split_async(&tar_data[..], repo.clone(), TAR_LAYER_CONTENT_TYPE)
1272 .await
1273 .unwrap();
1274 assert_eq!(
1275 stats.objects_copied, 3,
1276 "all 3 large files should be external"
1277 );
1278
1279 let mut reader: SplitStreamReader<Sha256HashValue> = SplitStreamReader::new(
1281 repo.open_object(&object_id).unwrap().into(),
1282 Some(TAR_LAYER_CONTENT_TYPE),
1283 )
1284 .unwrap();
1285
1286 let mut object_refs = Vec::new();
1287 reader
1288 .get_object_refs(|id| object_refs.push(id.clone()))
1289 .unwrap();
1290 assert_eq!(
1291 object_refs.len(),
1292 3,
1293 "should have exactly 3 external object refs"
1294 );
1295
1296 let mut entries = Vec::new();
1297 while let Some(entry) = get_entry(&mut reader).unwrap() {
1298 entries.push(entry);
1299 }
1300
1301 assert_eq!(entries.len(), 3, "Should have 3 entries");
1302
1303 for (i, entry) in entries.iter().enumerate() {
1304 let expected_path = format!("/file{}.bin", i);
1305 assert_eq!(entry.path, PathBuf::from(&expected_path));
1306
1307 if let TarItem::Leaf(LeafContent::Regular(RegularFile::External(ref id, size))) =
1308 entry.item
1309 {
1310 assert_eq!(size, (INLINE_CONTENT_MAX_V0 + 1000) as u64);
1311 let mut external_data = Vec::new();
1312 std::fs::File::from(repo.open_object(id).unwrap())
1313 .read_to_end(&mut external_data)
1314 .unwrap();
1315 let expected_content = vec![(i + 0x41) as u8; INLINE_CONTENT_MAX_V0 + 1000];
1316 assert_eq!(
1317 external_data, expected_content,
1318 "External file {} content should match",
1319 i
1320 );
1321 } else {
1322 panic!("Expected external regular file for file{}.bin", i);
1323 }
1324 }
1325 }
1326
1327 #[tokio::test]
1338 async fn test_longpath_formats() {
1339 let cases: &[(&str, fn() -> String, bool)] = &[
1342 ("short path", || "short.txt".to_string(), false),
1344 ("exactly 100 chars", || "x".repeat(100), false),
1345 (
1347 "ustar prefix",
1348 || format!("{}/{}", "dir".repeat(40), "file.txt"),
1349 false,
1350 ),
1351 (
1352 "max ustar (~254 chars)",
1353 || format!("{}/{}", "p".repeat(154), "n".repeat(99)),
1354 false,
1355 ),
1356 (
1358 "gnu longname",
1359 || format!("{}/{}", "a".repeat(80), "b".repeat(50)),
1360 true,
1361 ),
1362 (
1364 "pax extended",
1365 || format!("{}/{}", "sub/".repeat(60), "file.txt"),
1366 false,
1367 ),
1368 ];
1369
1370 for (desc, make_path, use_gnu) in cases {
1371 let path = make_path();
1372 let content = b"test content";
1373
1374 let mut tar_data = Vec::new();
1375 {
1376 let mut builder = Builder::new(&mut tar_data);
1377 let mut header = if *use_gnu {
1378 tar::Header::new_gnu()
1379 } else {
1380 tar::Header::new_ustar()
1381 };
1382 header.set_mode(0o644);
1383 header.set_uid(1000);
1384 header.set_gid(1000);
1385 header.set_mtime(1234567890);
1386 header.set_size(content.len() as u64);
1387 header.set_entry_type(tar::EntryType::Regular);
1388 builder
1389 .append_data(&mut header, &path, &content[..])
1390 .unwrap();
1391 builder.finish().unwrap();
1392 }
1393
1394 let entries = read_all_via_splitstream(tar_data).await.unwrap();
1395 assert_eq!(entries.len(), 1, "{desc}: expected 1 entry");
1396 assert_eq!(
1397 entries[0].path,
1398 PathBuf::from(format!("/{}", path)),
1399 "{desc}: path mismatch (len={})",
1400 path.len()
1401 );
1402 }
1403 }
1404
1405 #[tokio::test]
1407 async fn test_longpath_hardlinks() {
1408 let cases: &[(&str, fn() -> String, bool)] = &[
1409 ("short target", || "target.txt".to_string(), true),
1410 (
1411 "gnu longlink",
1412 || format!("{}/{}", "c".repeat(80), "d".repeat(50)),
1413 true,
1414 ),
1415 (
1416 "pax linkpath",
1417 || format!("{}/{}", "sub/".repeat(60), "target.txt"),
1418 false,
1419 ),
1420 ];
1421
1422 for (desc, make_target, use_gnu) in cases {
1423 let target_path = make_target();
1424 let link_name = "hardlink";
1425 let content = b"target content";
1426
1427 let mut tar_data = Vec::new();
1428 {
1429 let mut builder = Builder::new(&mut tar_data);
1430
1431 let mut header = if *use_gnu {
1433 tar::Header::new_gnu()
1434 } else {
1435 tar::Header::new_ustar()
1436 };
1437 header.set_mode(0o644);
1438 header.set_uid(1000);
1439 header.set_gid(1000);
1440 header.set_mtime(1234567890);
1441 header.set_size(content.len() as u64);
1442 header.set_entry_type(tar::EntryType::Regular);
1443 builder
1444 .append_data(&mut header, &target_path, &content[..])
1445 .unwrap();
1446
1447 let mut link_header = if *use_gnu {
1449 tar::Header::new_gnu()
1450 } else {
1451 tar::Header::new_ustar()
1452 };
1453 link_header.set_mode(0o644);
1454 link_header.set_uid(1000);
1455 link_header.set_gid(1000);
1456 link_header.set_mtime(1234567890);
1457 link_header.set_size(0);
1458 link_header.set_entry_type(tar::EntryType::Link);
1459 builder
1460 .append_link(&mut link_header, link_name, &target_path)
1461 .unwrap();
1462
1463 builder.finish().unwrap();
1464 }
1465
1466 let entries = read_all_via_splitstream(tar_data).await.unwrap();
1467 assert_eq!(entries.len(), 2, "{desc}: expected 2 entries");
1468 assert_eq!(
1469 entries[0].path,
1470 PathBuf::from(format!("/{}", target_path)),
1471 "{desc}"
1472 );
1473 assert_eq!(
1474 entries[1].path,
1475 PathBuf::from(format!("/{}", link_name)),
1476 "{desc}"
1477 );
1478
1479 match &entries[1].item {
1480 TarItem::Hardlink(target) => {
1481 assert_eq!(
1482 target.to_str().unwrap(),
1483 format!("/{}", target_path),
1484 "{desc}: hardlink target mismatch"
1485 );
1486 }
1487 _ => panic!("{desc}: expected hardlink entry"),
1488 }
1489 }
1490 }
1491
1492 #[tokio::test]
1494 async fn test_ustar_prefix_field_used() {
1495 let dir_path =
1497 "usr/lib/python3.12/site-packages/some-very-long-package-name-here/__pycache__/subdir";
1498 let filename = "module_name_with_extra_stuff.cpython-312.opt-2.pyc";
1499 let full_path = format!("{dir_path}/{filename}");
1500
1501 assert!(
1503 full_path.len() > 100,
1504 "full path must exceed 100 chars to use prefix"
1505 );
1506 assert!(filename.len() <= 100, "filename must fit in name field");
1507
1508 let mut tar_data = Vec::new();
1509 {
1510 let mut builder = Builder::new(&mut tar_data);
1511 let mut header = tar::Header::new_ustar();
1512 header.set_mode(0o644);
1513 header.set_size(4);
1514 header.set_entry_type(tar::EntryType::Regular);
1515 header.set_path(&full_path).unwrap();
1516 header.set_cksum();
1517 builder.append(&header, b"test".as_slice()).unwrap();
1518 builder.finish().unwrap();
1519 }
1520
1521 let prefix_field = &tar_data[345..500];
1523 let prefix_str = std::str::from_utf8(prefix_field)
1524 .unwrap()
1525 .trim_end_matches('\0');
1526 assert_eq!(
1527 prefix_str, dir_path,
1528 "UStar prefix field should contain directory"
1529 );
1530
1531 let entries = read_all_via_splitstream(tar_data).await.unwrap();
1532 assert_eq!(entries[0].path, PathBuf::from(format!("/{full_path}")));
1533 }
1534
1535 fn roundtrip_tar_bytes(tar_data: &[u8]) {
1539 let rt = tokio::runtime::Runtime::new().unwrap();
1540 rt.block_on(async {
1541 let repo = create_test_repository().unwrap();
1542 let (object_id, _stats) = split_async(tar_data, repo.clone(), TAR_LAYER_CONTENT_TYPE)
1543 .await
1544 .unwrap();
1545
1546 let mut reader: SplitStreamReader<Sha256HashValue> = SplitStreamReader::new(
1547 repo.open_object(&object_id).unwrap().into(),
1548 Some(TAR_LAYER_CONTENT_TYPE),
1549 )
1550 .unwrap();
1551
1552 let mut reassembled = Vec::new();
1553 reader.cat(&repo, &mut reassembled).unwrap();
1554 assert_eq!(
1555 reassembled.len(),
1556 tar_data.len(),
1557 "reassembled tar length mismatch"
1558 );
1559 assert_eq!(
1560 reassembled, tar_data,
1561 "reassembled tar bytes differ from original"
1562 );
1563 });
1564 }
1565
1566 mod proptest_tests {
1568 use super::*;
1569 use proptest::prelude::*;
1570
1571 fn path_component() -> impl Strategy<Value = String> {
1573 proptest::string::string_regex("[a-zA-Z0-9_][a-zA-Z0-9_.-]{0,30}")
1574 .expect("valid regex")
1575 .prop_filter("non-empty", |s| !s.is_empty())
1576 }
1577
1578 fn path_with_length(min_len: usize, max_len: usize) -> impl Strategy<Value = String> {
1580 prop::collection::vec(path_component(), 1..20)
1581 .prop_map(|components| components.join("/"))
1582 .prop_filter("length in range", move |p| {
1583 p.len() >= min_len && p.len() <= max_len
1584 })
1585 }
1586
1587 fn roundtrip_path(path: &str) {
1589 let content = b"proptest content";
1590
1591 let mut tar_data = Vec::new();
1592 {
1593 let mut builder = Builder::new(&mut tar_data);
1594 let mut header = tar::Header::new_ustar();
1595 header.set_mode(0o644);
1596 header.set_uid(1000);
1597 header.set_gid(1000);
1598 header.set_mtime(1234567890);
1599 header.set_size(content.len() as u64);
1600 header.set_entry_type(tar::EntryType::Regular);
1601 builder
1602 .append_data(&mut header, path, &content[..])
1603 .unwrap();
1604 builder.finish().unwrap();
1605 }
1606
1607 let rt = tokio::runtime::Runtime::new().unwrap();
1608 let entries = rt.block_on(read_all_via_splitstream(tar_data)).unwrap();
1609 assert_eq!(entries.len(), 1, "expected 1 entry for path: {path}");
1610 assert_eq!(
1611 entries[0].path,
1612 PathBuf::from(format!("/{path}")),
1613 "path mismatch"
1614 );
1615 }
1616
1617 fn roundtrip_hardlink(target_path: &str) {
1619 let link_name = "link";
1620 let content = b"target content";
1621
1622 let mut tar_data = Vec::new();
1623 {
1624 let mut builder = Builder::new(&mut tar_data);
1625
1626 let mut header = tar::Header::new_ustar();
1627 header.set_mode(0o644);
1628 header.set_uid(1000);
1629 header.set_gid(1000);
1630 header.set_mtime(1234567890);
1631 header.set_size(content.len() as u64);
1632 header.set_entry_type(tar::EntryType::Regular);
1633 builder
1634 .append_data(&mut header, target_path, &content[..])
1635 .unwrap();
1636
1637 let mut link_header = tar::Header::new_ustar();
1638 link_header.set_mode(0o644);
1639 link_header.set_uid(1000);
1640 link_header.set_gid(1000);
1641 link_header.set_mtime(1234567890);
1642 link_header.set_size(0);
1643 link_header.set_entry_type(tar::EntryType::Link);
1644 builder
1645 .append_link(&mut link_header, link_name, target_path)
1646 .unwrap();
1647
1648 builder.finish().unwrap();
1649 }
1650
1651 let rt = tokio::runtime::Runtime::new().unwrap();
1652 let entries = rt.block_on(read_all_via_splitstream(tar_data)).unwrap();
1653 assert_eq!(entries.len(), 2);
1654 assert_eq!(entries[0].path, PathBuf::from(format!("/{target_path}")));
1655
1656 match &entries[1].item {
1657 TarItem::Hardlink(target) => {
1658 assert_eq!(target.to_str().unwrap(), format!("/{target_path}"));
1659 }
1660 _ => panic!("expected hardlink"),
1661 }
1662 }
1663
1664 fn file_size_strategy() -> impl Strategy<Value = usize> {
1668 prop_oneof![
1669 3 => 0..=INLINE_CONTENT_MAX_V0, 2 => (INLINE_CONTENT_MAX_V0 + 1)..=(INLINE_CONTENT_MAX_V0 + 2048), 1 => (INLINE_CONTENT_MAX_V0 + 2048)..=100_000usize, 2 => prop::sample::select(vec![
1674 0, 1, 63, 64, 65, 511, 512, 513, 1023, 1024, 1025, ]),
1678 ]
1679 }
1680
1681 fn tar_entry_strategy() -> impl Strategy<Value = (String, Vec<u8>)> {
1683 (file_size_strategy(), any::<u8>()).prop_flat_map(|(size, fill)| {
1684 (0..10000u32).prop_map(move |id| {
1686 let name = format!("file_{:05}.bin", id);
1687 let content = vec![fill.wrapping_add(id as u8); size];
1688 (name, content)
1689 })
1690 })
1691 }
1692
1693 fn build_tar(entries: &[(String, Vec<u8>)]) -> Vec<u8> {
1695 let mut tar_data = Vec::new();
1696 {
1697 let mut builder = Builder::new(&mut tar_data);
1698 for (name, content) in entries {
1699 append_file(&mut builder, name, content).unwrap();
1700 }
1701 builder.finish().unwrap();
1702 }
1703 tar_data
1704 }
1705
1706 proptest! {
1707 #![proptest_config(ProptestConfig::with_cases(64))]
1708
1709 #[test]
1710 fn test_short_paths(path in path_with_length(1, 100)) {
1711 roundtrip_path(&path);
1712 }
1713
1714 #[test]
1715 fn test_medium_paths(path in path_with_length(101, 255)) {
1716 roundtrip_path(&path);
1717 }
1718
1719 #[test]
1720 fn test_long_paths(path in path_with_length(256, 500)) {
1721 roundtrip_path(&path);
1722 }
1723
1724 #[test]
1725 fn test_hardlink_targets(target in path_with_length(1, 400)) {
1726 roundtrip_hardlink(&target);
1727 }
1728
1729 #[test]
1733 fn test_tar_byte_roundtrip_proptest(
1734 entries in prop::collection::vec(tar_entry_strategy(), 1..8)
1735 ) {
1736 let tar_data = build_tar(&entries);
1737 roundtrip_tar_bytes(&tar_data);
1738 }
1739 }
1740 }
1741}