docker_image_pusher/
tar_utils.rs

1//! Shared tar processing utilities to eliminate duplication
2//!
3//! This module provides [`TarUtils`] for extracting layer data and handling tarball offsets.
4//! It ensures that layer data is extracted in the correct format (gzip or uncompressed) for digest validation and upload.
5
6use crate::error::{PusherError, Result};
7use std::fs::File;
8use std::io::Read;
9use std::path::Path;
10use tar::Archive;
11
12/// Tar processing utilities for layer extraction and offset calculation
13pub struct TarUtils;
14
15impl TarUtils {
16    /// Extract layer data from tar archive
17    ///
18    /// 注意:Docker 镜像层的 digest 必须基于 gzip 压缩后的 tar 字节流计算。
19    /// 本方法会自动检测数据是否已经是 gzip 格式(通过检查 gzip 魔数 0x1f 0x8b),
20    /// 如果不是则进行 gzip 压缩,保证返回的数据始终为 gzip 字节流,
21    /// 便于后续 digest 校验和上传。
22    ///
23    /// 参数 layer_path 应为 manifest.json 中的层路径(如 xxx/layer.tar 或 blobs/sha256/xxx)。
24    ///
25    /// 重要:Docker/Podman 中的层 digest 是基于 gzip 压缩后的 tar 内容计算的,
26    /// 而不是基于原始的 tar 内容,因此必须确保正确的压缩格式。
27    pub fn extract_layer_data(tar_path: &Path, layer_path: &str) -> Result<Vec<u8>> {
28        let file = File::open(tar_path)
29            .map_err(|e| PusherError::Io(format!("Failed to open tar file: {}", e)))?;
30        let mut archive = Archive::new(file);
31        archive.set_ignore_zeros(true);
32
33        // First look for the original layer file path (from manifest)
34        for entry_result in archive
35            .entries()
36            .map_err(|e| PusherError::ImageParsing(format!("Failed to read tar entries: {}", e)))?
37        {
38            let mut entry = entry_result.map_err(|e| {
39                PusherError::ImageParsing(format!("Failed to read tar entry: {}", e))
40            })?;
41
42            let path = entry
43                .path()
44                .map_err(|e| {
45                    PusherError::ImageParsing(format!("Failed to read entry path: {}", e))
46                })?
47                .to_string_lossy()
48                .to_string();
49
50            if path == layer_path {
51                // Found the exact layer path in the tar archive
52                let mut data = Vec::new();
53                entry.read_to_end(&mut data).map_err(|e| {
54                    PusherError::ImageParsing(format!("Failed to read layer data: {}", e))
55                })?;
56
57                // Keep the original format of the layer file as it appears in the Docker tar archive
58                // This preserves the digest calculation exactly as Docker expects it
59                // DO NOT modify or compress the data here - return it exactly as stored in the archive
60                return Ok(data);
61            }
62        }
63
64        // If we didn't find the exact path, try a more flexible approach
65        // Second pass: look for any file that matches the digest in the path
66        let digest_part = if layer_path.contains("sha256:") {
67            layer_path.split("sha256:").nth(1).unwrap_or("")
68        } else {
69            // Extract digest from filename like "abc123def.tar.gz"
70            layer_path.split('.').next().unwrap_or("")
71        };
72
73        if digest_part.len() >= 8 {
74            // Try to find a file containing this digest part
75            let file = File::open(tar_path).map_err(|e| {
76                PusherError::Io(format!("Failed to open tar file (second pass): {}", e))
77            })?;
78            let mut archive = Archive::new(file);
79            archive.set_ignore_zeros(true);
80
81            for entry_result in archive.entries().map_err(|e| {
82                PusherError::ImageParsing(format!(
83                    "Failed to read tar entries (second pass): {}",
84                    e
85                ))
86            })? {
87                let mut entry = entry_result.map_err(|e| {
88                    PusherError::ImageParsing(format!(
89                        "Failed to read tar entry (second pass): {}",
90                        e
91                    ))
92                })?;
93
94                let path = entry
95                    .path()
96                    .map_err(|e| {
97                        PusherError::ImageParsing(format!(
98                            "Failed to read entry path (second pass): {}",
99                            e
100                        ))
101                    })?
102                    .to_string_lossy()
103                    .to_string();
104
105                if path.contains(digest_part) {
106                    let mut data = Vec::new();
107                    entry.read_to_end(&mut data).map_err(|e| {
108                        PusherError::ImageParsing(format!(
109                            "Failed to read layer data (second pass): {}",
110                            e
111                        ))
112                    })?;
113                    // Keep the original format
114                    return Ok(data);
115                }
116            }
117        }
118
119        // Last resort: try to find any layer tar file in the archive
120        let file = File::open(tar_path).map_err(|e| {
121            PusherError::Io(format!("Failed to open tar file (last resort): {}", e))
122        })?;
123        let mut archive = Archive::new(file);
124        archive.set_ignore_zeros(true);
125
126        for entry_result in archive.entries().map_err(|e| {
127            PusherError::ImageParsing(format!("Failed to read tar entries (last resort): {}", e))
128        })? {
129            let mut entry = entry_result.map_err(|e| {
130                PusherError::ImageParsing(format!("Failed to read tar entry (last resort): {}", e))
131            })?;
132
133            let path = entry
134                .path()
135                .map_err(|e| {
136                    PusherError::ImageParsing(format!(
137                        "Failed to read entry path (last resort): {}",
138                        e
139                    ))
140                })?
141                .to_string_lossy()
142                .to_string();
143
144            if (path.ends_with(".tar") || path.ends_with(".tar.gz"))
145                && (path.contains("layer") || path.contains("blob"))
146            {
147                let mut data = Vec::new();
148                entry.read_to_end(&mut data).map_err(|e| {
149                    PusherError::ImageParsing(format!(
150                        "Failed to read layer data (last resort): {}",
151                        e
152                    ))
153                })?;
154
155                return Ok(data);
156            }
157        }
158
159        Err(PusherError::ImageParsing(format!(
160            "Layer '{}' not found in tar archive",
161            layer_path
162        )))
163    }
164
165    /// Find the offset of a layer within the tar archive
166    pub fn find_layer_offset(tar_path: &Path, layer_path: &str) -> Result<u64> {
167        let file = File::open(tar_path)
168            .map_err(|e| PusherError::Io(format!("Failed to open tar file: {}", e)))?;
169        let mut archive = Archive::new(file);
170        archive.set_ignore_zeros(true);
171
172        let mut current_offset = 0u64;
173
174        for entry_result in archive
175            .entries()
176            .map_err(|e| PusherError::ImageParsing(format!("Failed to read tar entries: {}", e)))?
177        {
178            let entry = entry_result.map_err(|e| {
179                PusherError::ImageParsing(format!("Failed to read tar entry: {}", e))
180            })?;
181
182            let path = entry
183                .path()
184                .map_err(|e| {
185                    PusherError::ImageParsing(format!("Failed to read entry path: {}", e))
186                })?
187                .to_string_lossy()
188                .to_string();
189
190            if path == layer_path {
191                return Ok(current_offset);
192            }
193
194            // Calculate entry size including headers (simplified calculation)
195            let size = entry.header().size().map_err(|e| {
196                PusherError::ImageParsing(format!("Failed to read entry size: {}", e))
197            })?;
198
199            current_offset += size + 512; // 512 bytes for TAR header (simplified)
200        }
201
202        Err(PusherError::ImageParsing(format!(
203            "Layer '{}' not found for offset calculation",
204            layer_path
205        )))
206    }
207
208    /// Get a list of all entries in the tar archive with their sizes
209    pub fn list_tar_entries(tar_path: &Path) -> Result<Vec<(String, u64)>> {
210        let file = File::open(tar_path)
211            .map_err(|e| PusherError::Io(format!("Failed to open tar file: {}", e)))?;
212        let mut archive = Archive::new(file);
213        archive.set_ignore_zeros(true);
214
215        let mut entries = Vec::new();
216
217        for entry_result in archive
218            .entries()
219            .map_err(|e| PusherError::ImageParsing(format!("Failed to read tar entries: {}", e)))?
220        {
221            let entry = entry_result.map_err(|e| {
222                PusherError::ImageParsing(format!("Failed to read tar entry: {}", e))
223            })?;
224
225            let path = entry
226                .path()
227                .map_err(|e| {
228                    PusherError::ImageParsing(format!("Failed to read entry path: {}", e))
229                })?
230                .to_string_lossy()
231                .to_string();
232
233            let size = entry.header().size().map_err(|e| {
234                PusherError::ImageParsing(format!("Failed to read entry size: {}", e))
235            })?;
236
237            entries.push((path, size));
238        }
239
240        Ok(entries)
241    }
242
243    /// Validate that a tar archive is readable and properly formatted
244    pub fn validate_tar_archive(tar_path: &Path) -> Result<()> {
245        let file = File::open(tar_path)
246            .map_err(|e| PusherError::Io(format!("Failed to open tar file: {}", e)))?;
247        let mut archive = Archive::new(file);
248        archive.set_ignore_zeros(true);
249
250        // Try to read the first few entries to validate format
251        let mut entry_count = 0;
252        for entry_result in archive
253            .entries()
254            .map_err(|e| PusherError::ImageParsing(format!("Failed to read tar entries: {}", e)))?
255        {
256            let entry = entry_result.map_err(|e| {
257                PusherError::ImageParsing(format!("Failed to read tar entry: {}", e))
258            })?;
259
260            // Validate that we can read the path
261            let _ = entry.path().map_err(|e| {
262                PusherError::ImageParsing(format!("Failed to read entry path: {}", e))
263            })?;
264
265            entry_count += 1;
266
267            // Only validate the first 10 entries for performance
268            if entry_count >= 10 {
269                break;
270            }
271        }
272
273        if entry_count == 0 {
274            return Err(PusherError::ImageParsing(
275                "Tar archive appears to be empty".to_string(),
276            ));
277        }
278
279        Ok(())
280    }
281
282    /// Check if data is in gzip format by examining the gzip magic number (0x1f 0x8b)
283    pub fn is_gzipped(data: &[u8]) -> bool {
284        data.len() >= 2 && data[0] == 0x1f && data[1] == 0x8b
285    }
286}