Skip to main content

initramfs_builder/image/
layer.rs

1use anyhow::{Context, Result};
2use flate2::read::GzDecoder;
3use std::collections::HashSet;
4use std::fs;
5use std::path::{Path, PathBuf};
6use tar::Archive;
7use tracing::debug;
8
9pub struct LayerExtractor {
10    exclude_patterns: Vec<glob::Pattern>,
11    whiteouts: HashSet<PathBuf>,
12    opaque_dirs: HashSet<PathBuf>,
13}
14
15impl LayerExtractor {
16    pub fn new() -> Self {
17        Self {
18            exclude_patterns: Vec::new(),
19            whiteouts: HashSet::new(),
20            opaque_dirs: HashSet::new(),
21        }
22    }
23
24    pub fn with_excludes(mut self, patterns: &[&str]) -> Result<Self> {
25        for pattern in patterns {
26            let compiled = glob::Pattern::new(pattern)
27                .with_context(|| format!("Invalid glob pattern: {}", pattern))?;
28            self.exclude_patterns.push(compiled);
29        }
30        Ok(self)
31    }
32
33    fn should_exclude(&self, path: &Path) -> bool {
34        let path_str = path.to_string_lossy();
35        self.exclude_patterns
36            .iter()
37            .any(|p| p.matches(&path_str) || p.matches_path(path))
38    }
39
40    /// Extract a single layer (gzipped tar) to the target directory
41    pub fn extract_layer(&mut self, layer_data: &[u8], target_dir: &Path) -> Result<()> {
42        // First pass: collect whiteouts
43        let decoder = GzDecoder::new(layer_data);
44        let mut archive = Archive::new(decoder);
45
46        for entry in archive.entries()? {
47            let entry = entry?;
48            let path = entry.path()?;
49
50            if let Some(name) = path.file_name() {
51                let name_str = name.to_string_lossy();
52
53                if name_str == ".wh..wh..opq" {
54                    if let Some(parent) = path.parent() {
55                        debug!("Opaque whiteout for directory: {:?}", parent);
56                        self.opaque_dirs.insert(parent.to_path_buf());
57
58                        // Remove existing directory contents
59                        let full_path = target_dir.join(parent);
60                        if full_path.exists() {
61                            fs::remove_dir_all(&full_path).ok();
62                            fs::create_dir_all(&full_path)?;
63                        }
64                    }
65                } else if name_str.starts_with(".wh.") {
66                    let deleted_name = name_str.strip_prefix(".wh.").unwrap();
67                    let deleted_path = path
68                        .parent()
69                        .map_or_else(|| PathBuf::from(deleted_name), |p| p.join(deleted_name));
70                    debug!("Whiteout for file: {:?}", deleted_path);
71                    self.whiteouts.insert(deleted_path.to_path_buf());
72
73                    let full_path = target_dir.join(&deleted_path);
74                    if full_path.exists() {
75                        if full_path.is_dir() {
76                            fs::remove_dir_all(&full_path).ok();
77                        } else {
78                            fs::remove_file(&full_path).ok();
79                        }
80                    }
81                }
82            }
83        }
84
85        // Second pass: extract files with proper handling
86        let decoder2 = GzDecoder::new(layer_data);
87        let mut archive2 = Archive::new(decoder2);
88        archive2.set_preserve_permissions(true);
89        archive2.set_preserve_mtime(true);
90        // Don't preserve ownership on extraction (we're not root)
91        archive2.set_unpack_xattrs(false);
92
93        for entry in archive2.entries()? {
94            let mut entry = entry?;
95            let path = entry.path()?;
96            let path_owned = path.to_path_buf();
97
98            // Skip whiteout marker files
99            if let Some(name) = path.file_name() {
100                let name_str = name.to_string_lossy();
101                if name_str.starts_with(".wh.") {
102                    continue;
103                }
104            }
105
106            // Skip excluded paths
107            if self.should_exclude(&path_owned) {
108                debug!("Excluding: {:?}", path_owned);
109                continue;
110            }
111
112            let target_path = target_dir.join(&path_owned);
113
114            // Ensure parent directory exists
115            if let Some(parent) = target_path.parent() {
116                fs::create_dir_all(parent)?;
117            }
118
119            // Handle different entry types
120            let entry_type = entry.header().entry_type();
121
122            match entry_type {
123                tar::EntryType::Link => {
124                    // Hard link - get the link target and copy instead
125                    if let Ok(Some(link_target)) = entry.link_name() {
126                        let source_path = target_dir.join(link_target.as_ref());
127                        if source_path.exists() {
128                            // Try hard link first, fall back to copy
129                            if fs::hard_link(&source_path, &target_path).is_err() {
130                                fs::copy(&source_path, &target_path).ok();
131                            }
132                        }
133                    }
134                }
135                tar::EntryType::Symlink => {
136                    // Symlink - create it
137                    if let Ok(Some(link_target)) = entry.link_name() {
138                        // Remove existing file if any
139                        if target_path.exists() || target_path.is_symlink() {
140                            fs::remove_file(&target_path).ok();
141                        }
142                        #[cfg(unix)]
143                        std::os::unix::fs::symlink(link_target.as_ref(), &target_path).ok();
144                    }
145                }
146                _ => {
147                    // Regular file or directory - use normal unpack
148                    entry
149                        .unpack(&target_path)
150                        .with_context(|| format!("Failed to extract {:?}", path_owned))?;
151                }
152            }
153        }
154
155        Ok(())
156    }
157
158    /// Extract all layers in order to build the final rootfs
159    pub fn extract_all_layers(&mut self, layers: &[Vec<u8>], target_dir: &Path) -> Result<()> {
160        fs::create_dir_all(target_dir)?;
161
162        for (idx, layer_data) in layers.iter().enumerate() {
163            debug!("Extracting layer {}/{}", idx + 1, layers.len());
164            self.extract_layer(layer_data, target_dir)?;
165        }
166
167        Ok(())
168    }
169}
170
171impl Default for LayerExtractor {
172    fn default() -> Self {
173        Self::new()
174    }
175}
176
177#[cfg(test)]
178mod tests {
179    use super::*;
180
181    #[test]
182    fn test_exclude_patterns() {
183        let extractor = LayerExtractor::new()
184            .with_excludes(&["/usr/share/doc/*", "*.pyc"])
185            .unwrap();
186
187        assert!(extractor.should_exclude(Path::new("/usr/share/doc/readme.txt")));
188        assert!(extractor.should_exclude(Path::new("module.pyc")));
189        assert!(!extractor.should_exclude(Path::new("/usr/bin/python")));
190    }
191}