Skip to main content

arcbox_ext4/
unpack.rs

1// OCI container image layer unpacking.
2//
3// Streams a tar archive into the ext4 formatter, handling OCI-specific whiteout
4// files (`.wh.*` and `.wh..wh..opq`) and hard-link cycle detection.
5
6use std::collections::HashMap;
7use std::io::Read;
8use std::path::{Path, PathBuf};
9
10use crate::constants::*;
11use crate::error::{FormatError, FormatResult};
12use crate::formatter::{FileTimestamps, Formatter};
13use crate::types::timestamp_now;
14
15impl Formatter {
16    /// Unpack a tar archive onto this ext4 filesystem.
17    ///
18    /// Handles:
19    /// - Regular files, directories, and symbolic links
20    /// - OCI whiteout files (`.wh.<name>` deletes `<name>`, `.wh..wh..opq`
21    ///   deletes all children of the containing directory)
22    /// - Hard links with cycle detection
23    /// - Preservation of uid/gid, permissions, and timestamps
24    pub fn unpack_tar<R: Read>(&mut self, reader: R) -> FormatResult<()> {
25        let mut archive = tar::Archive::new(reader);
26        let mut hardlinks: HashMap<PathBuf, PathBuf> = HashMap::new();
27
28        for entry_result in archive.entries().map_err(io_to_format)? {
29            let mut entry = entry_result.map_err(io_to_format)?;
30            let raw_path = entry.path().map_err(io_to_format)?.into_owned();
31
32            let path_str = preprocess_path(&raw_path);
33            let path = Path::new(&path_str);
34
35            let basename = path
36                .file_name()
37                .and_then(|n| n.to_str())
38                .unwrap_or("");
39
40            // ── OCI whiteouts ──
41            if basename.starts_with(".wh.") {
42                if basename == ".wh..wh..opq" {
43                    // Opaque whiteout: delete all children of the parent dir.
44                    let parent = parent_str(&path_str);
45                    self.unlink(parent, true)?;
46                } else {
47                    // Single-file whiteout: `.wh.<name>` deletes `<name>`.
48                    let target_name = &basename[".wh.".len()..];
49                    let parent = parent_str(&path_str);
50                    let target = if parent == "/" {
51                        format!("/{target_name}")
52                    } else {
53                        format!("{parent}/{target_name}")
54                    };
55                    self.unlink(&target, false)?;
56                }
57                continue;
58            }
59
60            // ── Hard links (deferred) ──
61            // Only treat entries whose type is explicitly `Link` (hard link).
62            // Symlinks also populate `link_name()`, but they must be handled
63            // in the entry-type dispatch below.
64            if entry.header().entry_type() == tar::EntryType::Link {
65                if let Some(link_target) = entry.link_name().map_err(io_to_format)? {
66                    let target_str = preprocess_path(&link_target.into_owned());
67                    hardlinks.insert(
68                        PathBuf::from(&path_str),
69                        PathBuf::from(target_str),
70                    );
71                    continue;
72                }
73            }
74
75            // ── Timestamps ──
76            let ts = entry_timestamps(&entry);
77
78            // ── uid / gid ──
79            let header = entry.header();
80            let uid = header.uid().ok().map(|u| u as u32);
81            let gid = header.gid().ok().map(|g| g as u32);
82            let perm = (header.mode().unwrap_or(0o644) & 0o7777) as u16;
83
84            match entry.header().entry_type() {
85                tar::EntryType::Directory => {
86                    self.create(
87                        &path_str,
88                        make_mode(file_mode::S_IFDIR, perm),
89                        None,
90                        Some(ts),
91                        None,
92                        uid,
93                        gid,
94                        None,
95                    )?;
96                }
97                tar::EntryType::Regular | tar::EntryType::Continuous => {
98                    self.create(
99                        &path_str,
100                        make_mode(file_mode::S_IFREG, perm),
101                        None,
102                        Some(ts),
103                        Some(&mut entry as &mut dyn Read),
104                        uid,
105                        gid,
106                        None,
107                    )?;
108                }
109                tar::EntryType::Symlink => {
110                    let target = entry
111                        .link_name()
112                        .map_err(io_to_format)?
113                        .map(|p| p.to_string_lossy().into_owned());
114                    self.create(
115                        &path_str,
116                        make_mode(file_mode::S_IFLNK, perm),
117                        target.as_deref(),
118                        Some(ts),
119                        None,
120                        uid,
121                        gid,
122                        None,
123                    )?;
124                }
125                // Block/char devices, FIFOs, sockets -- silently skip.
126                _ => continue,
127            }
128        }
129
130        // ── Resolve hard links ──
131        if !check_acyclic(&hardlinks) {
132            return Err(FormatError::CircularLinks);
133        }
134
135        for (link_path, _) in &hardlinks {
136            if let Some(resolved) = resolve_hardlink(link_path, &hardlinks) {
137                let link_str = link_path.to_string_lossy();
138                let target_str = resolved.to_string_lossy();
139                self.link(&link_str, &target_str)?;
140            }
141        }
142
143        Ok(())
144    }
145}
146
147// ---------------------------------------------------------------------------
148// Helpers
149// ---------------------------------------------------------------------------
150
151/// Normalize a tar entry path into an absolute path starting with "/".
152fn preprocess_path(p: &Path) -> String {
153    let s = p.to_string_lossy();
154    let mut s = s.as_ref();
155
156    // Strip leading "./"
157    if let Some(stripped) = s.strip_prefix("./") {
158        s = stripped;
159    }
160
161    // Ensure leading "/"
162    if !s.starts_with('/') {
163        return format!("/{s}");
164    }
165    s.to_string()
166}
167
168/// Return the parent directory of a path string. "/" -> "/"
169fn parent_str(path: &str) -> &str {
170    if path == "/" {
171        return "/";
172    }
173    let trimmed = path.trim_end_matches('/');
174    match trimmed.rfind('/') {
175        Some(0) => "/",
176        Some(i) => &trimmed[..i],
177        None => "/",
178    }
179}
180
181/// Build `FileTimestamps` from a tar entry's header.
182fn entry_timestamps<R: Read>(entry: &tar::Entry<'_, R>) -> FileTimestamps {
183    let (now_lo, now_hi) = timestamp_now();
184
185    let mtime = entry.header().mtime().unwrap_or(0);
186    let mtime_lo = mtime as u32;
187
188    FileTimestamps {
189        access_lo: mtime_lo,
190        access_hi: 0,
191        modification_lo: mtime_lo,
192        modification_hi: 0,
193        creation_lo: mtime_lo,
194        creation_hi: 0,
195        now_lo,
196        now_hi,
197    }
198}
199
200/// Check that the hard-link map contains no cycles.
201fn check_acyclic(links: &HashMap<PathBuf, PathBuf>) -> bool {
202    for target in links.values() {
203        let mut visited = std::collections::HashSet::new();
204        visited.insert(target.clone());
205        let mut next = target.clone();
206        while let Some(item) = links.get(&next) {
207            if visited.contains(item) {
208                return false;
209            }
210            visited.insert(item.clone());
211            next = item.clone();
212        }
213    }
214    true
215}
216
217/// Resolve a hard-link chain to its final target path.
218fn resolve_hardlink(
219    key: &Path,
220    links: &HashMap<PathBuf, PathBuf>,
221) -> Option<PathBuf> {
222    let target = links.get(key)?;
223    let mut next = target.clone();
224    let mut visited = std::collections::HashSet::new();
225    visited.insert(next.clone());
226    while let Some(item) = links.get(&next) {
227        if visited.contains(item) {
228            return None; // cycle
229        }
230        visited.insert(item.clone());
231        next = item.clone();
232    }
233    Some(next)
234}
235
236fn io_to_format(e: std::io::Error) -> FormatError {
237    FormatError::Io(e)
238}
239
240#[cfg(test)]
241mod tests {
242    use super::*;
243
244    // -- preprocess_path tests -----------------------------------------------
245
246    #[test]
247    fn test_preprocess_path_relative() {
248        assert_eq!(preprocess_path(Path::new("etc/passwd")), "/etc/passwd");
249    }
250
251    #[test]
252    fn test_preprocess_path_dot_prefix() {
253        assert_eq!(preprocess_path(Path::new("./etc/passwd")), "/etc/passwd");
254    }
255
256    #[test]
257    fn test_preprocess_path_absolute() {
258        assert_eq!(preprocess_path(Path::new("/usr/bin")), "/usr/bin");
259    }
260
261    #[test]
262    fn test_preprocess_path_dot_only() {
263        // "./" stripped to "", then prepended with "/" -> "/"
264        assert_eq!(preprocess_path(Path::new("./")), "/");
265    }
266
267    #[test]
268    fn test_preprocess_path_bare_name() {
269        assert_eq!(preprocess_path(Path::new("file.txt")), "/file.txt");
270    }
271
272    // -- parent_str tests ----------------------------------------------------
273
274    #[test]
275    fn test_parent_str_root() {
276        assert_eq!(parent_str("/"), "/");
277    }
278
279    #[test]
280    fn test_parent_str_top_level() {
281        assert_eq!(parent_str("/etc"), "/");
282    }
283
284    #[test]
285    fn test_parent_str_nested() {
286        assert_eq!(parent_str("/etc/passwd"), "/etc");
287    }
288
289    #[test]
290    fn test_parent_str_deep() {
291        assert_eq!(parent_str("/a/b/c/d"), "/a/b/c");
292    }
293
294    #[test]
295    fn test_parent_str_trailing_slash() {
296        // Trailing slash is stripped before computing parent.
297        assert_eq!(parent_str("/etc/"), "/");
298    }
299
300    // -- check_acyclic tests -------------------------------------------------
301
302    #[test]
303    fn test_check_acyclic_empty() {
304        let links = HashMap::new();
305        assert!(check_acyclic(&links));
306    }
307
308    #[test]
309    fn test_check_acyclic_simple_chain() {
310        let mut links = HashMap::new();
311        links.insert(PathBuf::from("/b"), PathBuf::from("/a"));
312        links.insert(PathBuf::from("/c"), PathBuf::from("/b"));
313        assert!(check_acyclic(&links));
314    }
315
316    #[test]
317    fn test_check_acyclic_cycle() {
318        let mut links = HashMap::new();
319        links.insert(PathBuf::from("/a"), PathBuf::from("/b"));
320        links.insert(PathBuf::from("/b"), PathBuf::from("/a"));
321        assert!(!check_acyclic(&links));
322    }
323
324    #[test]
325    fn test_check_acyclic_three_node_cycle() {
326        let mut links = HashMap::new();
327        links.insert(PathBuf::from("/a"), PathBuf::from("/b"));
328        links.insert(PathBuf::from("/b"), PathBuf::from("/c"));
329        links.insert(PathBuf::from("/c"), PathBuf::from("/a"));
330        assert!(!check_acyclic(&links));
331    }
332
333    // -- resolve_hardlink tests ----------------------------------------------
334
335    #[test]
336    fn test_resolve_hardlink_direct() {
337        let mut links = HashMap::new();
338        links.insert(PathBuf::from("/link"), PathBuf::from("/target"));
339        // /target is not in the map, so it resolves immediately.
340        let resolved = resolve_hardlink(Path::new("/link"), &links);
341        assert_eq!(resolved, Some(PathBuf::from("/target")));
342    }
343
344    #[test]
345    fn test_resolve_hardlink_chain() {
346        let mut links = HashMap::new();
347        links.insert(PathBuf::from("/c"), PathBuf::from("/b"));
348        links.insert(PathBuf::from("/b"), PathBuf::from("/a"));
349        // /a is not a key, so chain resolves: /c -> /b -> /a.
350        let resolved = resolve_hardlink(Path::new("/c"), &links);
351        assert_eq!(resolved, Some(PathBuf::from("/a")));
352    }
353
354    #[test]
355    fn test_resolve_hardlink_not_found() {
356        let links = HashMap::new();
357        let resolved = resolve_hardlink(Path::new("/nonexistent"), &links);
358        assert_eq!(resolved, None);
359    }
360
361    #[test]
362    fn test_resolve_hardlink_cycle_returns_none() {
363        let mut links = HashMap::new();
364        links.insert(PathBuf::from("/a"), PathBuf::from("/b"));
365        links.insert(PathBuf::from("/b"), PathBuf::from("/a"));
366        let resolved = resolve_hardlink(Path::new("/a"), &links);
367        assert_eq!(resolved, None);
368    }
369}