1use std::fmt;
8use std::pin::Pin;
9use std::task::{Context, Poll};
10
11use async_compression::tokio::bufread::{GzipDecoder, ZstdDecoder};
12use futures::StreamExt;
13use sha2::{Digest as Sha2Digest, Sha256};
14use tokio::io::{AsyncRead, AsyncReadExt, BufReader, ReadBuf};
15use tokio_tar as tar;
16
17use crate::filetree::{
18 DataSpool, DeviceNode, DirectoryNode, FileData, FileTree, FileTreeError, InodeMetadata,
19 RegularFileNode, ResourceLimits, SPOOL_THRESHOLD, SymlinkNode, TreeNode, Xattr,
20};
21
22const WHITEOUT_PREFIX: &[u8] = b".wh.";
28
29const OPAQUE_WHITEOUT: &[u8] = b".wh..wh..opq";
31
32use crate::filetree::{OPAQUE_XATTR_NAME, OPAQUE_XATTR_VALUE, WHITEOUT_MAJOR, WHITEOUT_MINOR};
33
34const GZIP_MAGIC: [u8; 2] = [0x1F, 0x8B];
36
37const ZSTD_MAGIC: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
39
40const INGEST_YIELD_EVERY_ENTRIES: u64 = 32;
42
43#[derive(Clone, Copy, Debug, PartialEq, Eq)]
49pub enum Compression {
50 None,
52 Gzip,
54 Zstd,
56}
57
58#[derive(Debug)]
60pub enum IngestError {
61 Io(std::io::Error),
63 PathTraversal(String),
65 PathTooLong(String),
67 PathTooDeep(String),
69 FileTooLarge(String),
71 TotalSizeExceeded,
73 EntryCountExceeded,
75 SymlinkTargetTooLong(String),
77 HardlinkTarget(String),
79 InvalidEntry(String),
81 Tree(FileTreeError),
83}
84
85enum WhiteoutKind<'a> {
87 None,
89 Opaque,
91 File(&'a [u8]),
93}
94
95impl Compression {
100 pub fn from_media_type(media_type: &str) -> Self {
102 if media_type.contains("gzip") {
103 Compression::Gzip
104 } else if media_type.contains("zstd") {
105 Compression::Zstd
106 } else {
107 Compression::None
108 }
109 }
110
111 pub fn detect(magic: &[u8]) -> Self {
113 if magic.len() >= 4 && magic[..4] == ZSTD_MAGIC {
114 Compression::Zstd
115 } else if magic.len() >= 2 && magic[..2] == GZIP_MAGIC {
116 Compression::Gzip
117 } else {
118 Compression::None
119 }
120 }
121}
122
123impl fmt::Display for IngestError {
128 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
129 match self {
130 IngestError::Io(e) => write!(f, "I/O error: {e}"),
131 IngestError::PathTraversal(p) => write!(f, "path traversal in tar: \"{p}\""),
132 IngestError::PathTooLong(p) => write!(f, "path too long: \"{p}\""),
133 IngestError::PathTooDeep(p) => write!(f, "path too deep: \"{p}\""),
134 IngestError::FileTooLarge(p) => write!(f, "file too large: \"{p}\""),
135 IngestError::TotalSizeExceeded => write!(f, "total extracted size exceeded"),
136 IngestError::EntryCountExceeded => write!(f, "entry count exceeded"),
137 IngestError::SymlinkTargetTooLong(p) => {
138 write!(f, "symlink target too long: \"{p}\"")
139 }
140 IngestError::HardlinkTarget(p) => {
141 write!(f, "hardlink target not found: \"{p}\"")
142 }
143 IngestError::InvalidEntry(msg) => write!(f, "invalid tar entry: {msg}"),
144 IngestError::Tree(e) => write!(f, "file tree error: {e}"),
145 }
146 }
147}
148
149impl std::error::Error for IngestError {
150 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
151 match self {
152 IngestError::Io(e) => Some(e),
153 IngestError::Tree(e) => Some(e),
154 _ => None,
155 }
156 }
157}
158
159impl From<std::io::Error> for IngestError {
160 fn from(e: std::io::Error) -> Self {
161 IngestError::Io(e)
162 }
163}
164
165impl From<FileTreeError> for IngestError {
166 fn from(e: FileTreeError) -> Self {
167 IngestError::Tree(e)
168 }
169}
170
171pub async fn ingest_tar<R: AsyncRead + Unpin>(
180 reader: R,
181 limits: &ResourceLimits,
182 mut spool: Option<&mut DataSpool>,
183) -> Result<FileTree, IngestError> {
184 let mut archive = tar::Archive::new(reader);
185 let mut tree = FileTree::new();
186 let mut entry_count: u64 = 0;
187 let mut total_size: u64 = 0;
188
189 let mut entries = archive.entries().map_err(IngestError::Io)?;
190
191 while let Some(entry_result) = entries.next().await {
192 let mut entry = entry_result.map_err(IngestError::Io)?;
193
194 entry_count += 1;
195 if entry_count > limits.max_entry_count {
196 return Err(IngestError::EntryCountExceeded);
197 }
198
199 let header = entry.header().clone();
200
201 let raw_path = entry.path_bytes().map_err(IngestError::Io)?;
203 let path = normalize_path(&raw_path, limits)?;
204
205 let path = match path {
207 Some(p) => p,
208 None => continue,
209 };
210
211 let entry_type = header.entry_type();
212
213 let metadata = extract_metadata(&header);
215
216 match entry_type {
217 tar::EntryType::Link => {
218 let link_target_bytes = entry
220 .link_name_bytes()
221 .map_err(IngestError::Io)?
222 .ok_or_else(|| {
223 IngestError::InvalidEntry("hardlink with no target".to_string())
224 })?;
225 let target_path = normalize_path(&link_target_bytes, limits)?;
226 let target_path = match target_path {
227 Some(p) => p,
228 None => {
229 return Err(IngestError::HardlinkTarget(
230 String::from_utf8_lossy(&link_target_bytes).into_owned(),
231 ));
232 }
233 };
234
235 handle_hardlink(&mut tree, &path, &target_path)?;
236 }
237 tar::EntryType::Directory => {
238 let node = TreeNode::Directory(DirectoryNode {
239 metadata,
240 xattrs: Vec::new(),
241 entries: std::collections::BTreeMap::new(),
242 });
243 tree.insert(&path, node)?;
244 }
245 tar::EntryType::Symlink => {
246 let link_target = entry
247 .link_name_bytes()
248 .map_err(IngestError::Io)?
249 .ok_or_else(|| {
250 IngestError::InvalidEntry("symlink with no target".to_string())
251 })?;
252
253 if link_target.len() > limits.max_symlink_target {
254 return Err(IngestError::SymlinkTargetTooLong(
255 String::from_utf8_lossy(&path).into_owned(),
256 ));
257 }
258
259 let file_name = path_filename(&path);
261 match classify_whiteout(file_name) {
262 WhiteoutKind::Opaque => {
263 apply_opaque_xattr(&mut tree, &path)?;
265 }
266 WhiteoutKind::File(real_name) => {
267 let whiteout_path = replace_filename(&path, real_name);
269 let node = TreeNode::CharDevice(DeviceNode {
270 metadata,
271 major: WHITEOUT_MAJOR,
272 minor: WHITEOUT_MINOR,
273 });
274 tree.insert(&whiteout_path, node)?;
275 }
276 WhiteoutKind::None => {
277 let node = TreeNode::Symlink(SymlinkNode {
278 metadata,
279 target: link_target.into_owned(),
280 });
281 tree.insert(&path, node)?;
282 }
283 }
284 }
285 tar::EntryType::Regular | tar::EntryType::Continuous => {
286 let size = header.size().map_err(IngestError::Io)?;
288 if size > limits.max_file_size {
289 return Err(IngestError::FileTooLarge(
290 String::from_utf8_lossy(&path).into_owned(),
291 ));
292 }
293 total_size = total_size.saturating_add(size);
294 if total_size > limits.max_total_size {
295 return Err(IngestError::TotalSizeExceeded);
296 }
297
298 let file_name = path_filename(&path);
301 match classify_whiteout(file_name) {
302 WhiteoutKind::Opaque => {
303 apply_opaque_xattr(&mut tree, &path)?;
304 }
305 WhiteoutKind::File(real_name) => {
306 let whiteout_path = replace_filename(&path, real_name);
307 let node = TreeNode::CharDevice(DeviceNode {
308 metadata,
309 major: WHITEOUT_MAJOR,
310 minor: WHITEOUT_MINOR,
311 });
312 tree.insert(&whiteout_path, node)?;
313 }
314 WhiteoutKind::None => {
315 let mut buf = Vec::with_capacity(size as usize);
316 entry.read_to_end(&mut buf).await.map_err(IngestError::Io)?;
317
318 let file_data = if buf.len() as u64 >= SPOOL_THRESHOLD
320 && let Some(spool) = spool.as_mut()
321 {
322 spool.write_data(&buf).map_err(IngestError::Io)?
323 } else {
324 FileData::Memory(buf)
325 };
326
327 let node = TreeNode::RegularFile(RegularFileNode {
328 metadata,
329 xattrs: Vec::new(),
330 data: file_data,
331 nlink: 1,
332 });
333 tree.insert(&path, node)?;
334 }
335 }
336 }
337 tar::EntryType::Char => {
338 let major = header.device_major().map_err(IngestError::Io)?.unwrap_or(0);
339 let minor = header.device_minor().map_err(IngestError::Io)?.unwrap_or(0);
340 let node = TreeNode::CharDevice(DeviceNode {
341 metadata,
342 major,
343 minor,
344 });
345 tree.insert(&path, node)?;
346 }
347 tar::EntryType::Block => {
348 let major = header.device_major().map_err(IngestError::Io)?.unwrap_or(0);
349 let minor = header.device_minor().map_err(IngestError::Io)?.unwrap_or(0);
350 let node = TreeNode::BlockDevice(DeviceNode {
351 metadata,
352 major,
353 minor,
354 });
355 tree.insert(&path, node)?;
356 }
357 tar::EntryType::Fifo => {
358 let node = TreeNode::Fifo(metadata);
359 tree.insert(&path, node)?;
360 }
361 tar::EntryType::Other(0o140) => {
364 let node = TreeNode::Socket(metadata);
366 tree.insert(&path, node)?;
367 }
368 _ => {
369 }
373 }
374
375 if entry_count.is_multiple_of(INGEST_YIELD_EVERY_ENTRIES) {
376 tokio::task::yield_now().await;
377 }
378 }
379
380 Ok(tree)
381}
382
383pub struct IngestResult {
387 pub tree: FileTree,
389 pub uncompressed_digest: String,
391}
392
393pub async fn ingest_compressed_tar<R: AsyncRead + Unpin>(
394 reader: R,
395 compression: Compression,
396 limits: &ResourceLimits,
397 spool_path: Option<&std::path::Path>,
398) -> Result<IngestResult, IngestError> {
399 let mut spool = spool_path
400 .map(DataSpool::new)
401 .transpose()
402 .map_err(IngestError::Io)?;
403
404 match compression {
405 Compression::None => {
406 let mut hashing = HashingReader::new(reader);
407 let tree = ingest_tar(&mut hashing, limits, spool.as_mut()).await?;
408 drain_reader(&mut hashing).await?;
409 Ok(IngestResult {
410 tree,
411 uncompressed_digest: hashing.hex_digest(),
412 })
413 }
414 Compression::Gzip => {
415 let decoder = GzipDecoder::new(BufReader::new(reader));
416 let mut hashing = HashingReader::new(decoder);
417 let tree = ingest_tar(&mut hashing, limits, spool.as_mut()).await?;
418 drain_reader(&mut hashing).await?;
421 Ok(IngestResult {
422 tree,
423 uncompressed_digest: hashing.hex_digest(),
424 })
425 }
426 Compression::Zstd => {
427 let decoder = ZstdDecoder::new(BufReader::new(reader));
428 let mut hashing = HashingReader::new(decoder);
429 let tree = ingest_tar(&mut hashing, limits, spool.as_mut()).await?;
430 drain_reader(&mut hashing).await?;
431 Ok(IngestResult {
432 tree,
433 uncompressed_digest: hashing.hex_digest(),
434 })
435 }
436 }
437}
438
439async fn drain_reader<R: AsyncRead + Unpin>(reader: &mut R) -> Result<(), IngestError> {
443 let mut buf = [0u8; 8192];
444 loop {
445 let n = reader.read(&mut buf).await.map_err(IngestError::Io)?;
446 if n == 0 {
447 break;
448 }
449 }
450 Ok(())
451}
452
453struct HashingReader<R> {
455 inner: R,
456 hasher: Sha256,
457}
458
459impl<R> HashingReader<R> {
460 fn new(inner: R) -> Self {
461 Self {
462 inner,
463 hasher: Sha256::new(),
464 }
465 }
466
467 fn hex_digest(self) -> String {
468 hex::encode(self.hasher.finalize())
469 }
470}
471
472impl<R: AsyncRead + Unpin> AsyncRead for HashingReader<R> {
473 fn poll_read(
474 mut self: Pin<&mut Self>,
475 cx: &mut Context<'_>,
476 buf: &mut ReadBuf<'_>,
477 ) -> Poll<std::io::Result<()>> {
478 let before = buf.filled().len();
479 let result = Pin::new(&mut self.inner).poll_read(cx, buf);
480 if let Poll::Ready(Ok(())) = &result {
481 let new_bytes = &buf.filled()[before..];
482 if !new_bytes.is_empty() {
483 self.hasher.update(new_bytes);
484 }
485 }
486 result
487 }
488}
489
490fn normalize_path(raw: &[u8], limits: &ResourceLimits) -> Result<Option<Vec<u8>>, IngestError> {
496 let path = strip_dot_slash(raw);
497 let path = strip_leading_slashes(path);
498
499 let path = strip_trailing_slashes(path);
501
502 if path.is_empty() {
504 return Ok(None);
505 }
506
507 let mut depth: usize = 0;
509 for component in path.split(|&b| b == b'/') {
510 if component.is_empty() {
511 continue;
512 }
513 if component == b".." {
514 return Err(IngestError::PathTraversal(
515 String::from_utf8_lossy(path).into_owned(),
516 ));
517 }
518 depth += 1;
519 }
520
521 if path.len() > limits.max_path_length {
523 return Err(IngestError::PathTooLong(
524 String::from_utf8_lossy(path).into_owned(),
525 ));
526 }
527
528 if depth > limits.max_path_depth {
530 return Err(IngestError::PathTooDeep(
531 String::from_utf8_lossy(path).into_owned(),
532 ));
533 }
534
535 Ok(Some(path.to_vec()))
536}
537
538fn strip_dot_slash(path: &[u8]) -> &[u8] {
540 if path.starts_with(b"./") {
541 &path[2..]
542 } else if path == b"." {
543 b""
544 } else {
545 path
546 }
547}
548
549fn strip_trailing_slashes(path: &[u8]) -> &[u8] {
551 let mut end = path.len();
552 while end > 0 && path[end - 1] == b'/' {
553 end -= 1;
554 }
555 &path[..end]
556}
557
558fn strip_leading_slashes(path: &[u8]) -> &[u8] {
560 let mut start = 0;
561 while start < path.len() && path[start] == b'/' {
562 start += 1;
563 }
564 &path[start..]
565}
566
567fn extract_metadata(header: &tar::Header) -> InodeMetadata {
569 let uid = header.uid().unwrap_or(0) as u32;
570 let gid = header.gid().unwrap_or(0) as u32;
571 let mode = (header.mode().unwrap_or(0o644) & 0o7777) as u16;
572 let mtime = header.mtime().unwrap_or(0);
573
574 InodeMetadata {
575 uid,
576 gid,
577 mode,
578 mtime,
579 mtime_nsec: 0,
580 }
581}
582
583fn path_filename(path: &[u8]) -> &[u8] {
585 match path.iter().rposition(|&b| b == b'/') {
586 Some(pos) => &path[pos + 1..],
587 None => path,
588 }
589}
590
591fn path_parent(path: &[u8]) -> &[u8] {
594 match path.iter().rposition(|&b| b == b'/') {
595 Some(pos) => &path[..pos],
596 None => b"",
597 }
598}
599
600fn replace_filename(path: &[u8], new_name: &[u8]) -> Vec<u8> {
602 let parent = path_parent(path);
603 if parent.is_empty() {
604 new_name.to_vec()
605 } else {
606 let mut result = parent.to_vec();
607 result.push(b'/');
608 result.extend_from_slice(new_name);
609 result
610 }
611}
612
613fn classify_whiteout(filename: &[u8]) -> WhiteoutKind<'_> {
624 if filename == OPAQUE_WHITEOUT {
625 WhiteoutKind::Opaque
626 } else if filename.starts_with(WHITEOUT_PREFIX) {
627 let real_name = &filename[WHITEOUT_PREFIX.len()..];
628 if real_name.is_empty() {
629 WhiteoutKind::None
630 } else {
631 WhiteoutKind::File(real_name)
632 }
633 } else {
634 WhiteoutKind::None
635 }
636}
637
638fn apply_opaque_xattr(tree: &mut FileTree, path: &[u8]) -> Result<(), IngestError> {
640 let parent = path_parent(path);
641
642 let dir = if parent.is_empty() {
644 &mut tree.root
645 } else {
646 match tree.get_mut(parent) {
648 Some(TreeNode::Directory(dir)) => dir,
649 _ => {
650 let node = TreeNode::Directory(DirectoryNode::new(InodeMetadata::default()));
652 tree.insert(parent, node)?;
653 match tree.get_mut(parent) {
654 Some(TreeNode::Directory(dir)) => dir,
655 _ => {
656 return Err(IngestError::InvalidEntry(
657 "failed to create parent for opaque whiteout".to_string(),
658 ));
659 }
660 }
661 }
662 }
663 };
664
665 let already_has = dir
667 .xattrs
668 .iter()
669 .any(|x| x.name == OPAQUE_XATTR_NAME && x.value == OPAQUE_XATTR_VALUE);
670
671 if !already_has {
672 dir.xattrs.push(Xattr {
673 name: OPAQUE_XATTR_NAME.to_vec(),
674 value: OPAQUE_XATTR_VALUE.to_vec(),
675 });
676 }
677
678 Ok(())
679}
680
681fn handle_hardlink(
683 tree: &mut FileTree,
684 link_path: &[u8],
685 target_path: &[u8],
686) -> Result<(), IngestError> {
687 let target_path_str = String::from_utf8_lossy(target_path).into_owned();
688
689 let cloned_node = match tree.get(target_path) {
691 Some(TreeNode::RegularFile(f)) => {
692 let cloned = RegularFileNode {
693 metadata: InodeMetadata {
694 uid: f.metadata.uid,
695 gid: f.metadata.gid,
696 mode: f.metadata.mode,
697 mtime: f.metadata.mtime,
698 mtime_nsec: f.metadata.mtime_nsec,
699 },
700 xattrs: f
701 .xattrs
702 .iter()
703 .map(|x| Xattr {
704 name: x.name.clone(),
705 value: x.value.clone(),
706 })
707 .collect(),
708 data: DataSpool::clone_ref(&f.data),
709 nlink: f.nlink + 1,
710 };
711 let new_nlink = cloned.nlink;
713 (TreeNode::RegularFile(cloned), new_nlink)
714 }
715 Some(_) => {
716 return Err(IngestError::HardlinkTarget(format!(
718 "hardlink target is not a regular file: \"{target_path_str}\""
719 )));
720 }
721 None => {
722 return Err(IngestError::HardlinkTarget(target_path_str));
723 }
724 };
725
726 let (node, new_nlink) = cloned_node;
727
728 if let Some(TreeNode::RegularFile(target)) = tree.get_mut(target_path) {
730 target.nlink = new_nlink;
731 }
732
733 tree.insert(link_path, node)?;
735
736 Ok(())
737}
738
739#[cfg(test)]
744mod tests {
745 use super::*;
746
747 #[test]
750 fn normalize_strips_dot_slash_prefix() {
751 let limits = ResourceLimits::default();
752 let result = normalize_path(b"./foo/bar.txt", &limits).unwrap();
753 assert_eq!(result, Some(b"foo/bar.txt".to_vec()));
754 }
755
756 #[test]
757 fn normalize_strips_bare_dot() {
758 let limits = ResourceLimits::default();
759 let result = normalize_path(b".", &limits).unwrap();
760 assert_eq!(result, None);
761 }
762
763 #[test]
764 fn normalize_strips_dot_slash_only() {
765 let limits = ResourceLimits::default();
766 let result = normalize_path(b"./", &limits).unwrap();
767 assert_eq!(result, None);
768 }
769
770 #[test]
771 fn normalize_strips_absolute_path_prefix() {
772 let limits = ResourceLimits::default();
773 let result = normalize_path(b"/etc/passwd", &limits).unwrap();
774 assert_eq!(result, Some(b"etc/passwd".to_vec()));
775 }
776
777 #[test]
778 fn normalize_skips_bare_root_path() {
779 let limits = ResourceLimits::default();
780 let result = normalize_path(b"/", &limits).unwrap();
781 assert_eq!(result, None);
782 }
783
784 #[test]
785 fn normalize_rejects_dotdot() {
786 let limits = ResourceLimits::default();
787 let result = normalize_path(b"foo/../etc/passwd", &limits);
788 assert!(matches!(result, Err(IngestError::PathTraversal(_))));
789 }
790
791 #[test]
792 fn normalize_rejects_leading_dotdot() {
793 let limits = ResourceLimits::default();
794 let result = normalize_path(b"../etc/passwd", &limits);
795 assert!(matches!(result, Err(IngestError::PathTraversal(_))));
796 }
797
798 #[test]
799 fn normalize_allows_dotdot_in_filename() {
800 let limits = ResourceLimits::default();
802 let result = normalize_path(b"dir/..foo", &limits).unwrap();
803 assert_eq!(result, Some(b"dir/..foo".to_vec()));
804 }
805
806 #[test]
807 fn normalize_enforces_path_length() {
808 let limits = ResourceLimits {
809 max_path_length: 10,
810 ..ResourceLimits::default()
811 };
812 let result = normalize_path(b"a/very/long/path/here", &limits);
813 assert!(matches!(result, Err(IngestError::PathTooLong(_))));
814 }
815
816 #[test]
817 fn normalize_enforces_path_depth() {
818 let limits = ResourceLimits {
819 max_path_depth: 2,
820 ..ResourceLimits::default()
821 };
822 let result = normalize_path(b"a/b/c", &limits);
823 assert!(matches!(result, Err(IngestError::PathTooDeep(_))));
824 }
825
826 #[test]
827 fn normalize_strips_trailing_slash() {
828 let limits = ResourceLimits::default();
829 let result = normalize_path(b"./foo/bar/", &limits).unwrap();
830 assert_eq!(result, Some(b"foo/bar".to_vec()));
831 }
832
833 #[test]
836 fn detect_gzip_magic() {
837 assert_eq!(
838 Compression::detect(&[0x1F, 0x8B, 0x08, 0x00]),
839 Compression::Gzip
840 );
841 }
842
843 #[test]
844 fn detect_zstd_magic() {
845 assert_eq!(
846 Compression::detect(&[0x28, 0xB5, 0x2F, 0xFD, 0x00]),
847 Compression::Zstd
848 );
849 }
850
851 #[test]
852 fn detect_none_for_unknown() {
853 assert_eq!(
854 Compression::detect(&[0x00, 0x00, 0x00, 0x00]),
855 Compression::None
856 );
857 }
858
859 #[test]
860 fn detect_none_for_short_input() {
861 assert_eq!(Compression::detect(&[0x1F]), Compression::None);
862 }
863
864 #[test]
865 fn detect_zstd_takes_priority_over_partial_gzip() {
866 assert_eq!(
868 Compression::detect(&[0x28, 0xB5, 0x2F, 0xFD]),
869 Compression::Zstd
870 );
871 }
872
873 #[test]
874 fn from_media_type_gzip() {
875 assert_eq!(
876 Compression::from_media_type("application/vnd.oci.image.layer.v1.tar+gzip"),
877 Compression::Gzip
878 );
879 }
880
881 #[test]
882 fn from_media_type_zstd() {
883 assert_eq!(
884 Compression::from_media_type("application/vnd.oci.image.layer.v1.tar+zstd"),
885 Compression::Zstd
886 );
887 }
888
889 #[test]
890 fn from_media_type_plain() {
891 assert_eq!(
892 Compression::from_media_type("application/vnd.oci.image.layer.v1.tar"),
893 Compression::None
894 );
895 }
896
897 #[test]
900 fn classify_whiteout_opaque() {
901 assert!(matches!(
902 classify_whiteout(b".wh..wh..opq"),
903 WhiteoutKind::Opaque
904 ));
905 }
906
907 #[test]
908 fn classify_whiteout_regular() {
909 match classify_whiteout(b".wh.myfile") {
910 WhiteoutKind::File(name) => assert_eq!(name, b"myfile"),
911 _ => panic!("expected WhiteoutKind::File"),
912 }
913 }
914
915 #[test]
916 fn classify_whiteout_empty_name() {
917 assert!(matches!(classify_whiteout(b".wh."), WhiteoutKind::None));
919 }
920
921 #[test]
922 fn classify_whiteout_normal_file() {
923 assert!(matches!(
924 classify_whiteout(b"regular_file.txt"),
925 WhiteoutKind::None
926 ));
927 }
928
929 #[test]
932 fn path_filename_with_parent() {
933 assert_eq!(path_filename(b"a/b/c.txt"), b"c.txt");
934 }
935
936 #[test]
937 fn path_filename_no_parent() {
938 assert_eq!(path_filename(b"file.txt"), b"file.txt");
939 }
940
941 #[test]
942 fn path_parent_with_components() {
943 assert_eq!(path_parent(b"a/b/c.txt"), b"a/b");
944 }
945
946 #[test]
947 fn path_parent_single_component() {
948 assert_eq!(path_parent(b"file.txt"), b"");
949 }
950
951 #[test]
952 fn replace_filename_with_parent() {
953 assert_eq!(
954 replace_filename(b"dir/.wh.myfile", b"myfile"),
955 b"dir/myfile"
956 );
957 }
958
959 #[test]
960 fn replace_filename_no_parent() {
961 assert_eq!(replace_filename(b".wh.myfile", b"myfile"), b"myfile");
962 }
963
964 use ::tar as sync_tar;
970 use tempfile::tempdir;
971
972 fn build_tar(build: impl FnOnce(&mut sync_tar::Builder<Vec<u8>>)) -> Vec<u8> {
973 let mut builder = sync_tar::Builder::new(Vec::new());
974 build(&mut builder);
975 builder.into_inner().unwrap()
976 }
977
978 #[tokio::test]
979 async fn ingest_regular_file() {
980 let data = build_tar(|b| {
981 let content = b"hello world";
982 let mut header = sync_tar::Header::new_gnu();
983 header.set_path("foo.txt").unwrap();
984 header.set_size(content.len() as u64);
985 header.set_entry_type(sync_tar::EntryType::Regular);
986 header.set_mode(0o644);
987 header.set_uid(1000);
988 header.set_gid(1000);
989 header.set_mtime(1234567890);
990 header.set_cksum();
991 b.append(&header, &content[..]).unwrap();
992 });
993
994 let limits = ResourceLimits::default();
995 let tree = ingest_tar(std::io::Cursor::new(data), &limits, None)
996 .await
997 .unwrap();
998
999 match tree.get(b"foo.txt").unwrap() {
1000 TreeNode::RegularFile(f) => {
1001 assert_eq!(f.data, FileData::Memory(b"hello world".to_vec()));
1002 assert_eq!(f.metadata.uid, 1000);
1003 assert_eq!(f.metadata.gid, 1000);
1004 assert_eq!(f.metadata.mode, 0o644);
1005 assert_eq!(f.metadata.mtime, 1234567890);
1006 assert_eq!(f.nlink, 1);
1007 }
1008 _ => panic!("expected regular file"),
1009 }
1010 }
1011
1012 #[tokio::test]
1013 async fn ingest_large_file_spools_to_disk() {
1014 let content = vec![b'x'; SPOOL_THRESHOLD as usize + 1];
1015 let data = build_tar(|b| {
1016 let mut header = sync_tar::Header::new_gnu();
1017 header.set_path("large.bin").unwrap();
1018 header.set_size(content.len() as u64);
1019 header.set_entry_type(sync_tar::EntryType::Regular);
1020 header.set_mode(0o644);
1021 header.set_cksum();
1022 b.append(&header, content.as_slice()).unwrap();
1023 });
1024
1025 let tempdir = tempdir().unwrap();
1026 let spool_path = tempdir.path().join("layer.spool");
1027 let mut spool = DataSpool::new(&spool_path).unwrap();
1028 let limits = ResourceLimits::default();
1029 let tree = ingest_tar(std::io::Cursor::new(data), &limits, Some(&mut spool))
1030 .await
1031 .unwrap();
1032
1033 match tree.get(b"large.bin").unwrap() {
1034 TreeNode::RegularFile(f) => {
1035 assert!(matches!(f.data, FileData::Spool { .. }));
1036 assert_eq!(f.data.read_all().unwrap(), content);
1037 }
1038 _ => panic!("expected regular file"),
1039 }
1040 }
1041
1042 #[tokio::test]
1043 async fn ingest_directory() {
1044 let data = build_tar(|b| {
1045 let mut header = sync_tar::Header::new_gnu();
1046 header.set_path("mydir/").unwrap();
1047 header.set_size(0);
1048 header.set_entry_type(sync_tar::EntryType::Directory);
1049 header.set_mode(0o755);
1050 header.set_cksum();
1051 b.append(&header, &[] as &[u8]).unwrap();
1052 });
1053
1054 let limits = ResourceLimits::default();
1055 let tree = ingest_tar(std::io::Cursor::new(data), &limits, None)
1056 .await
1057 .unwrap();
1058
1059 match tree.get(b"mydir").unwrap() {
1060 TreeNode::Directory(d) => {
1061 assert_eq!(d.metadata.mode, 0o755);
1062 }
1063 _ => panic!("expected directory"),
1064 }
1065 }
1066
1067 #[tokio::test]
1068 async fn ingest_symlink() {
1069 let data = build_tar(|b| {
1070 let mut header = sync_tar::Header::new_gnu();
1071 header.set_path("link").unwrap();
1072 header.set_size(0);
1073 header.set_entry_type(sync_tar::EntryType::Symlink);
1074 header.set_link_name("/usr/bin/target").unwrap();
1075 header.set_mode(0o777);
1076 header.set_cksum();
1077 b.append(&header, &[] as &[u8]).unwrap();
1078 });
1079
1080 let limits = ResourceLimits::default();
1081 let tree = ingest_tar(std::io::Cursor::new(data), &limits, None)
1082 .await
1083 .unwrap();
1084
1085 match tree.get(b"link").unwrap() {
1086 TreeNode::Symlink(s) => {
1087 assert_eq!(s.target, b"/usr/bin/target");
1088 }
1089 _ => panic!("expected symlink"),
1090 }
1091 }
1092
1093 #[tokio::test]
1094 async fn ingest_hardlink() {
1095 let data = build_tar(|b| {
1096 let content = b"shared data";
1098 let mut header = sync_tar::Header::new_gnu();
1099 header.set_path("original.txt").unwrap();
1100 header.set_size(content.len() as u64);
1101 header.set_entry_type(sync_tar::EntryType::Regular);
1102 header.set_mode(0o644);
1103 header.set_cksum();
1104 b.append(&header, &content[..]).unwrap();
1105
1106 let mut header = sync_tar::Header::new_gnu();
1108 header.set_path("hardlink.txt").unwrap();
1109 header.set_size(0);
1110 header.set_entry_type(sync_tar::EntryType::Link);
1111 header.set_link_name("original.txt").unwrap();
1112 header.set_cksum();
1113 b.append(&header, &[] as &[u8]).unwrap();
1114 });
1115
1116 let limits = ResourceLimits::default();
1117 let tree = ingest_tar(std::io::Cursor::new(data), &limits, None)
1118 .await
1119 .unwrap();
1120
1121 match tree.get(b"original.txt").unwrap() {
1123 TreeNode::RegularFile(f) => {
1124 assert_eq!(f.data, FileData::Memory(b"shared data".to_vec()));
1125 assert_eq!(f.nlink, 2);
1126 }
1127 _ => panic!("expected regular file"),
1128 }
1129 match tree.get(b"hardlink.txt").unwrap() {
1130 TreeNode::RegularFile(f) => {
1131 assert_eq!(f.data, FileData::Memory(b"shared data".to_vec()));
1132 assert_eq!(f.nlink, 2);
1133 }
1134 _ => panic!("expected regular file"),
1135 }
1136 }
1137
1138 #[tokio::test]
1139 async fn ingest_hardlink_missing_target() {
1140 let data = build_tar(|b| {
1141 let mut header = sync_tar::Header::new_gnu();
1142 header.set_path("bad_link.txt").unwrap();
1143 header.set_size(0);
1144 header.set_entry_type(sync_tar::EntryType::Link);
1145 header.set_link_name("nonexistent.txt").unwrap();
1146 header.set_cksum();
1147 b.append(&header, &[] as &[u8]).unwrap();
1148 });
1149
1150 let limits = ResourceLimits::default();
1151 let result = ingest_tar(std::io::Cursor::new(data), &limits, None).await;
1152 assert!(matches!(result, Err(IngestError::HardlinkTarget(_))));
1153 }
1154
1155 #[tokio::test]
1156 async fn ingest_whiteout_file() {
1157 let data = build_tar(|b| {
1158 let mut header = sync_tar::Header::new_gnu();
1160 header.set_path("dir/.wh.deleted_file").unwrap();
1161 header.set_size(0);
1162 header.set_entry_type(sync_tar::EntryType::Regular);
1163 header.set_mode(0o644);
1164 header.set_cksum();
1165 b.append(&header, &[] as &[u8]).unwrap();
1166 });
1167
1168 let limits = ResourceLimits::default();
1169 let tree = ingest_tar(std::io::Cursor::new(data), &limits, None)
1170 .await
1171 .unwrap();
1172
1173 match tree.get(b"dir/deleted_file").unwrap() {
1175 TreeNode::CharDevice(dev) => {
1176 assert_eq!(dev.major, 0);
1177 assert_eq!(dev.minor, 0);
1178 }
1179 _ => panic!("expected char device (whiteout)"),
1180 }
1181
1182 assert!(tree.get(b"dir/.wh.deleted_file").is_none());
1184 }
1185
1186 #[tokio::test]
1187 async fn ingest_opaque_whiteout() {
1188 let data = build_tar(|b| {
1189 let mut header = sync_tar::Header::new_gnu();
1191 header.set_path("mydir/").unwrap();
1192 header.set_size(0);
1193 header.set_entry_type(sync_tar::EntryType::Directory);
1194 header.set_mode(0o755);
1195 header.set_cksum();
1196 b.append(&header, &[] as &[u8]).unwrap();
1197
1198 let mut header = sync_tar::Header::new_gnu();
1200 header.set_path("mydir/.wh..wh..opq").unwrap();
1201 header.set_size(0);
1202 header.set_entry_type(sync_tar::EntryType::Regular);
1203 header.set_mode(0o644);
1204 header.set_cksum();
1205 b.append(&header, &[] as &[u8]).unwrap();
1206 });
1207
1208 let limits = ResourceLimits::default();
1209 let tree = ingest_tar(std::io::Cursor::new(data), &limits, None)
1210 .await
1211 .unwrap();
1212
1213 match tree.get(b"mydir").unwrap() {
1215 TreeNode::Directory(d) => {
1216 assert!(
1217 d.xattrs
1218 .iter()
1219 .any(|x| x.name == OPAQUE_XATTR_NAME && x.value == OPAQUE_XATTR_VALUE)
1220 );
1221 }
1222 _ => panic!("expected directory"),
1223 }
1224 }
1225
1226 #[tokio::test]
1227 async fn ingest_accepts_absolute_path_in_tar() {
1228 let data = build_tar(|b| {
1229 let mut header = sync_tar::Header::new_gnu();
1230 header.set_size(0);
1231 header.set_entry_type(sync_tar::EntryType::Regular);
1232 header.set_mode(0o644);
1233 let path_bytes = b"/etc/passwd";
1236 let gnu = header.as_gnu_mut().unwrap();
1237 gnu.name[..path_bytes.len()].copy_from_slice(path_bytes);
1238 gnu.name[path_bytes.len()] = 0;
1239 header.set_cksum();
1240 b.append(&header, &[] as &[u8]).unwrap();
1241 });
1242
1243 let limits = ResourceLimits::default();
1244 let tree = ingest_tar(std::io::Cursor::new(data), &limits, None)
1245 .await
1246 .unwrap();
1247 assert!(matches!(
1248 tree.get(b"etc/passwd"),
1249 Some(TreeNode::RegularFile(_))
1250 ));
1251 }
1252
1253 #[tokio::test]
1254 async fn ingest_accepts_absolute_hardlink_target() {
1255 let data = build_tar(|b| {
1256 let content = b"shared data";
1257 let mut header = sync_tar::Header::new_gnu();
1258 header.set_size(content.len() as u64);
1259 header.set_entry_type(sync_tar::EntryType::Regular);
1260 header.set_mode(0o644);
1261 let path_bytes = b"/nix/store/original.txt";
1262 let gnu = header.as_gnu_mut().unwrap();
1263 gnu.name[..path_bytes.len()].copy_from_slice(path_bytes);
1264 gnu.name[path_bytes.len()] = 0;
1265 header.set_cksum();
1266 b.append(&header, &content[..]).unwrap();
1267
1268 let mut header = sync_tar::Header::new_gnu();
1269 header.set_size(0);
1270 header.set_entry_type(sync_tar::EntryType::Link);
1271 let path_bytes = b"/nix/store/link.txt";
1272 let link_bytes = b"/nix/store/original.txt";
1273 let gnu = header.as_gnu_mut().unwrap();
1274 gnu.name[..path_bytes.len()].copy_from_slice(path_bytes);
1275 gnu.name[path_bytes.len()] = 0;
1276 gnu.linkname[..link_bytes.len()].copy_from_slice(link_bytes);
1277 gnu.linkname[link_bytes.len()] = 0;
1278 header.set_cksum();
1279 b.append(&header, &[] as &[u8]).unwrap();
1280 });
1281
1282 let limits = ResourceLimits::default();
1283 let tree = ingest_tar(std::io::Cursor::new(data), &limits, None)
1284 .await
1285 .unwrap();
1286
1287 match tree.get(b"nix/store/link.txt").unwrap() {
1288 TreeNode::RegularFile(f) => {
1289 assert_eq!(f.data, FileData::Memory(b"shared data".to_vec()));
1290 assert_eq!(f.nlink, 2);
1291 }
1292 _ => panic!("expected regular file"),
1293 }
1294 }
1295
1296 #[tokio::test]
1297 async fn ingest_entry_count_exceeded() {
1298 let data = build_tar(|b| {
1299 for i in 0..5 {
1300 let mut header = sync_tar::Header::new_gnu();
1301 header.set_path(format!("file{i}.txt")).unwrap();
1302 header.set_size(0);
1303 header.set_entry_type(sync_tar::EntryType::Regular);
1304 header.set_mode(0o644);
1305 header.set_cksum();
1306 b.append(&header, &[] as &[u8]).unwrap();
1307 }
1308 });
1309
1310 let limits = ResourceLimits {
1311 max_entry_count: 3,
1312 ..ResourceLimits::default()
1313 };
1314 let result = ingest_tar(std::io::Cursor::new(data), &limits, None).await;
1315 assert!(matches!(result, Err(IngestError::EntryCountExceeded)));
1316 }
1317
1318 #[tokio::test]
1319 async fn ingest_file_too_large() {
1320 let data = build_tar(|b| {
1321 let content = vec![0u8; 1024];
1322 let mut header = sync_tar::Header::new_gnu();
1323 header.set_path("big.bin").unwrap();
1324 header.set_size(content.len() as u64);
1325 header.set_entry_type(sync_tar::EntryType::Regular);
1326 header.set_mode(0o644);
1327 header.set_cksum();
1328 b.append(&header, &content[..]).unwrap();
1329 });
1330
1331 let limits = ResourceLimits {
1332 max_file_size: 512,
1333 ..ResourceLimits::default()
1334 };
1335 let result = ingest_tar(std::io::Cursor::new(data), &limits, None).await;
1336 assert!(matches!(result, Err(IngestError::FileTooLarge(_))));
1337 }
1338
1339 #[tokio::test]
1340 async fn ingest_dot_slash_prefix_stripped() {
1341 let data = build_tar(|b| {
1342 let content = b"data";
1343 let mut header = sync_tar::Header::new_gnu();
1344 header.set_path("./foo/bar.txt").unwrap();
1345 header.set_size(content.len() as u64);
1346 header.set_entry_type(sync_tar::EntryType::Regular);
1347 header.set_mode(0o644);
1348 header.set_cksum();
1349 b.append(&header, &content[..]).unwrap();
1350 });
1351
1352 let limits = ResourceLimits::default();
1353 let tree = ingest_tar(std::io::Cursor::new(data), &limits, None)
1354 .await
1355 .unwrap();
1356
1357 assert!(tree.get(b"foo/bar.txt").is_some());
1359 }
1360
1361 #[tokio::test]
1362 async fn ingest_root_entry_skipped() {
1363 let data = build_tar(|b| {
1364 let mut header = sync_tar::Header::new_gnu();
1366 header.set_path("./").unwrap();
1367 header.set_size(0);
1368 header.set_entry_type(sync_tar::EntryType::Directory);
1369 header.set_mode(0o755);
1370 header.set_cksum();
1371 b.append(&header, &[] as &[u8]).unwrap();
1372
1373 let content = b"data";
1375 let mut header = sync_tar::Header::new_gnu();
1376 header.set_path("./file.txt").unwrap();
1377 header.set_size(content.len() as u64);
1378 header.set_entry_type(sync_tar::EntryType::Regular);
1379 header.set_mode(0o644);
1380 header.set_cksum();
1381 b.append(&header, &content[..]).unwrap();
1382 });
1383
1384 let limits = ResourceLimits::default();
1385 let tree = ingest_tar(std::io::Cursor::new(data), &limits, None)
1386 .await
1387 .unwrap();
1388
1389 assert_eq!(tree.node_count(), 1);
1392 assert!(tree.get(b"file.txt").is_some());
1393 }
1394}