docker_image_pusher/registry/
tar_utils.rs1use crate::error::{RegistryError, Result};
7use std::fs::File;
8use std::io::Read;
9use std::path::Path;
10use tar::Archive;
11
12pub struct TarUtils;
14
15impl TarUtils {
16 pub fn extract_layer_data(tar_path: &Path, layer_path: &str) -> Result<Vec<u8>> {
21 let file = File::open(tar_path)
22 .map_err(|e| RegistryError::Io(format!("Failed to open tar file: {}", e)))?;
23 let mut archive = Archive::new(file);
24 archive.set_ignore_zeros(true);
25
26 for entry_result in archive.entries().map_err(|e| {
27 RegistryError::ImageParsing(format!("Failed to read tar entries: {}", e))
28 })? {
29 let mut entry = entry_result.map_err(|e| {
30 RegistryError::ImageParsing(format!("Failed to read tar entry: {}", e))
31 })?;
32
33 let path = entry
34 .path()
35 .map_err(|e| {
36 RegistryError::ImageParsing(format!("Failed to read entry path: {}", e))
37 })?
38 .to_string_lossy()
39 .to_string();
40
41 if path == layer_path {
42 let mut data = Vec::new();
43 entry.read_to_end(&mut data).map_err(|e| {
44 RegistryError::ImageParsing(format!("Failed to read layer data: {}", e))
45 })?;
46
47 return Ok(data);
50 }
51 }
52
53 Err(RegistryError::ImageParsing(format!(
54 "Layer '{}' not found in tar archive",
55 layer_path
56 )))
57 }
58
59 pub fn find_layer_offset(tar_path: &Path, layer_path: &str) -> Result<u64> {
61 let file = File::open(tar_path)
62 .map_err(|e| RegistryError::Io(format!("Failed to open tar file: {}", e)))?;
63 let mut archive = Archive::new(file);
64 archive.set_ignore_zeros(true);
65
66 let mut current_offset = 0u64;
67
68 for entry_result in archive.entries().map_err(|e| {
69 RegistryError::ImageParsing(format!("Failed to read tar entries: {}", e))
70 })? {
71 let entry = entry_result.map_err(|e| {
72 RegistryError::ImageParsing(format!("Failed to read tar entry: {}", e))
73 })?;
74
75 let path = entry
76 .path()
77 .map_err(|e| {
78 RegistryError::ImageParsing(format!("Failed to read entry path: {}", e))
79 })?
80 .to_string_lossy()
81 .to_string();
82
83 if path == layer_path {
84 return Ok(current_offset);
85 }
86
87 let size = entry.header().size().map_err(|e| {
89 RegistryError::ImageParsing(format!("Failed to read entry size: {}", e))
90 })?;
91
92 current_offset += size + 512; }
94
95 Err(RegistryError::ImageParsing(format!(
96 "Layer '{}' not found for offset calculation",
97 layer_path
98 )))
99 }
100
101 pub fn list_tar_entries(tar_path: &Path) -> Result<Vec<(String, u64)>> {
103 let file = File::open(tar_path)
104 .map_err(|e| RegistryError::Io(format!("Failed to open tar file: {}", e)))?;
105 let mut archive = Archive::new(file);
106 archive.set_ignore_zeros(true);
107
108 let mut entries = Vec::new();
109
110 for entry_result in archive.entries().map_err(|e| {
111 RegistryError::ImageParsing(format!("Failed to read tar entries: {}", e))
112 })? {
113 let entry = entry_result.map_err(|e| {
114 RegistryError::ImageParsing(format!("Failed to read tar entry: {}", e))
115 })?;
116
117 let path = entry
118 .path()
119 .map_err(|e| {
120 RegistryError::ImageParsing(format!("Failed to read entry path: {}", e))
121 })?
122 .to_string_lossy()
123 .to_string();
124
125 let size = entry.header().size().map_err(|e| {
126 RegistryError::ImageParsing(format!("Failed to read entry size: {}", e))
127 })?;
128
129 entries.push((path, size));
130 }
131
132 Ok(entries)
133 }
134
135 pub fn validate_tar_archive(tar_path: &Path) -> Result<()> {
137 let file = File::open(tar_path)
138 .map_err(|e| RegistryError::Io(format!("Failed to open tar file: {}", e)))?;
139 let mut archive = Archive::new(file);
140 archive.set_ignore_zeros(true);
141
142 let mut entry_count = 0;
144 for entry_result in archive.entries().map_err(|e| {
145 RegistryError::ImageParsing(format!("Failed to read tar entries: {}", e))
146 })? {
147 let entry = entry_result.map_err(|e| {
148 RegistryError::ImageParsing(format!("Failed to read tar entry: {}", e))
149 })?;
150
151 let _ = entry.path().map_err(|e| {
153 RegistryError::ImageParsing(format!("Failed to read entry path: {}", e))
154 })?;
155
156 entry_count += 1;
157
158 if entry_count >= 10 {
160 break;
161 }
162 }
163
164 if entry_count == 0 {
165 return Err(RegistryError::ImageParsing(
166 "Tar archive appears to be empty".to_string(),
167 ));
168 }
169
170 Ok(())
171 }
172
173 pub fn extract_manifest(tar_path: &Path) -> Result<String> {
177 let file = File::open(tar_path)
178 .map_err(|e| RegistryError::Io(format!("Failed to open tar file: {}", e)))?;
179 let mut archive = Archive::new(file);
180
181 for entry_result in archive.entries().map_err(|e| {
182 RegistryError::ImageParsing(format!("Failed to read tar entries: {}", e))
183 })? {
184 let mut entry = entry_result.map_err(|e| {
185 RegistryError::ImageParsing(format!("Failed to read tar entry: {}", e))
186 })?;
187
188 let path = entry.path().map_err(|e| {
189 RegistryError::ImageParsing(format!("Failed to get entry path: {}", e))
190 })?;
191
192 if path.to_string_lossy() == "manifest.json" {
193 let mut content = String::new();
194 entry
195 .read_to_string(&mut content)
196 .map_err(|e| RegistryError::Io(format!("Failed to read manifest: {}", e)))?;
197
198 return Ok(content);
199 }
200 }
201
202 Err(RegistryError::ImageParsing(
203 "manifest.json not found in tar file".to_string(),
204 ))
205 }
206
207 pub fn extract_config(tar_path: &Path, config_path: &str) -> Result<String> {
211 let file = File::open(tar_path)
212 .map_err(|e| RegistryError::Io(format!("Failed to open tar file: {}", e)))?;
213 let mut archive = Archive::new(file);
214
215 for entry_result in archive.entries().map_err(|e| {
216 RegistryError::ImageParsing(format!("Failed to read tar entries: {}", e))
217 })? {
218 let mut entry = entry_result.map_err(|e| {
219 RegistryError::ImageParsing(format!("Failed to read tar entry: {}", e))
220 })?;
221
222 let path = entry.path().map_err(|e| {
223 RegistryError::ImageParsing(format!("Failed to get entry path: {}", e))
224 })?;
225
226 if path.to_string_lossy() == config_path {
227 let mut content = String::new();
228 entry
229 .read_to_string(&mut content)
230 .map_err(|e| RegistryError::Io(format!("Failed to read config: {}", e)))?;
231
232 return Ok(content);
233 }
234 }
235
236 Err(RegistryError::ImageParsing(format!(
237 "Config file {} not found in tar file",
238 config_path
239 )))
240 }
241
242 pub fn extract_config_data(tar_path: &Path, config_digest: &str) -> Result<Vec<u8>> {
244 let digest_hash = config_digest.replace("sha256:", "");
245
246 let possible_paths = vec![
248 format!("{}.json", digest_hash), format!("blobs/sha256/{}", digest_hash), format!("{}/json", digest_hash), ];
252
253 let file = File::open(tar_path)
254 .map_err(|e| RegistryError::Io(format!("Failed to open tar file: {}", e)))?;
255 let mut archive = Archive::new(file);
256
257 for entry_result in archive.entries().map_err(|e| {
258 RegistryError::ImageParsing(format!("Failed to read tar entries: {}", e))
259 })? {
260 let mut entry = entry_result.map_err(|e| {
261 RegistryError::ImageParsing(format!("Failed to read tar entry: {}", e))
262 })?;
263
264 let path = entry
265 .path()
266 .map_err(|e| {
267 RegistryError::ImageParsing(format!("Failed to get entry path: {}", e))
268 })?
269 .to_string_lossy()
270 .to_string();
271
272 for possible_path in &possible_paths {
274 if path == *possible_path || path.ends_with(possible_path) {
275 let mut data = Vec::new();
276 entry.read_to_end(&mut data).map_err(|e| {
277 RegistryError::Io(format!("Failed to read config data: {}", e))
278 })?;
279
280 return Ok(data);
281 }
282 }
283 }
284
285 Err(RegistryError::ImageParsing(format!(
286 "Config file for digest {} not found in tar file. Tried paths: {:?}",
287 config_digest, possible_paths
288 )))
289 }
290
291 pub fn parse_image_info(tar_path: &Path) -> Result<crate::image::parser::ImageInfo> {
293 let manifest_content = Self::extract_manifest(tar_path)?;
294 let manifest: Vec<serde_json::Value> = serde_json::from_str(&manifest_content)?;
295
296 let image_manifest = manifest
297 .first()
298 .ok_or_else(|| RegistryError::ImageParsing("Empty manifest array".to_string()))?;
299
300 let config_file = image_manifest
302 .get("Config")
303 .and_then(|c| c.as_str())
304 .ok_or_else(|| RegistryError::ImageParsing("Config field not found".to_string()))?;
305
306 let config_digest = if config_file.starts_with("blobs/sha256/") {
307 format!("sha256:{}", config_file.replace("blobs/sha256/", ""))
309 } else if config_file.contains("/") && config_file.ends_with(".json") {
310 let digest_part = config_file.split('/').next().unwrap_or("");
312 format!("sha256:{}", digest_part)
313 } else {
314 format!("sha256:{}", config_file.replace(".json", ""))
316 };
317
318 let layers_array = image_manifest
320 .get("Layers")
321 .and_then(|l| l.as_array())
322 .ok_or_else(|| RegistryError::ImageParsing("Layers field not found".to_string()))?;
323
324 let mut layers = Vec::new();
325 for layer_file in layers_array {
326 let layer_path = layer_file
327 .as_str()
328 .ok_or_else(|| RegistryError::ImageParsing("Invalid layer path".to_string()))?;
329
330 let (digest, size) = Self::get_layer_info_from_tar(tar_path, layer_path)?;
331
332 layers.push(crate::image::parser::LayerInfo {
333 digest,
334 size,
335 tar_path: layer_path.to_string(),
336 media_type: "application/vnd.docker.image.rootfs.diff.tar.gzip".to_string(),
337 compressed_size: Some(size),
338 offset: None,
339 });
340 }
341
342 let config_size = Self::get_config_size_from_tar(tar_path, &config_digest)?;
343 let total_size = layers.iter().map(|l| l.size).sum();
344
345 Ok(crate::image::parser::ImageInfo {
346 config_digest,
347 config_size,
348 layers,
349 total_size,
350 })
351 }
352
353 fn get_layer_info_from_tar(tar_path: &Path, layer_path: &str) -> Result<(String, u64)> {
354 let file = File::open(tar_path)?;
355 let mut archive = Archive::new(file);
356
357 for entry_result in archive.entries()? {
358 let mut entry = entry_result?;
359 let path = entry.path()?.to_string_lossy().to_string();
360
361 if path == layer_path {
362 let size = entry.size();
363 let mut data = Vec::new();
364 entry.read_to_end(&mut data)?;
365
366 let digest = format!(
367 "sha256:{}",
368 hex::encode(crate::image::digest::DigestUtils::compute_sha256(&data))
369 );
370 return Ok((digest, size));
371 }
372 }
373
374 Err(RegistryError::ImageParsing(format!(
375 "Layer {} not found",
376 layer_path
377 )))
378 }
379
380 fn get_config_size_from_tar(tar_path: &Path, config_digest: &str) -> Result<u64> {
381 let file = File::open(tar_path)?;
382 let mut archive = Archive::new(file);
383
384 let possible_paths = vec![
386 format!("blobs/sha256/{}", config_digest.replace("sha256:", "")),
388 format!("{}.json", config_digest.replace("sha256:", "")),
390 format!("{}/json", config_digest.replace("sha256:", "")),
392 ];
393
394 for entry_result in archive.entries()? {
395 let entry = entry_result?;
396 let path = entry.path()?.to_string_lossy().to_string();
397
398 for possible_path in &possible_paths {
399 if path == *possible_path {
400 return Ok(entry.size());
401 }
402 }
403 }
404
405 Err(RegistryError::ImageParsing(format!(
406 "Config file not found for digest {}",
407 config_digest
408 )))
409 }
410
411 pub async fn extract_layer_data_streaming(
415 tar_path: &Path,
416 layer_path: &str,
417 ) -> Result<Vec<u8>> {
418 use tokio::task;
419
420 let tar_path = tar_path.to_path_buf();
421 let layer_path = layer_path.to_string();
422
423 task::spawn_blocking(move || {
425 let file = File::open(&tar_path)
426 .map_err(|e| RegistryError::Io(format!("Failed to open tar file: {}", e)))?;
427
428 let mut archive = Archive::new(file);
429 archive.set_ignore_zeros(true);
430
431 for entry_result in archive.entries().map_err(|e| {
432 RegistryError::ImageParsing(format!("Failed to read tar entries: {}", e))
433 })? {
434 let mut entry = entry_result.map_err(|e| {
435 RegistryError::ImageParsing(format!("Failed to read tar entry: {}", e))
436 })?;
437
438 let path = entry
439 .path()
440 .map_err(|e| {
441 RegistryError::ImageParsing(format!("Failed to read entry path: {}", e))
442 })?
443 .to_string_lossy()
444 .to_string();
445
446 if path == layer_path {
447 let mut data = Vec::new();
449 const CHUNK_SIZE: usize = 64 * 1024; let mut buffer = vec![0u8; CHUNK_SIZE];
451
452 loop {
453 let bytes_read = entry.read(&mut buffer).map_err(|e| {
454 RegistryError::ImageParsing(format!(
455 "Failed to read layer chunk: {}",
456 e
457 ))
458 })?;
459
460 if bytes_read == 0 {
461 break;
462 }
463
464 data.extend_from_slice(&buffer[..bytes_read]);
465 }
466
467 return Ok(data);
468 }
469 }
470
471 Err(RegistryError::ImageParsing(format!(
472 "Layer '{}' not found in tar archive",
473 layer_path
474 )))
475 })
476 .await
477 .map_err(|e| RegistryError::Upload(format!("Streaming extraction task failed: {}", e)))?
478 }
479
480 pub fn extract_layer_data_limited(
482 tar_path: &Path,
483 layer_path: &str,
484 max_size: u64,
485 ) -> Result<Vec<u8>> {
486 let file = File::open(tar_path)
487 .map_err(|e| RegistryError::Io(format!("Failed to open tar file: {}", e)))?;
488 let mut archive = Archive::new(file);
489 archive.set_ignore_zeros(true);
490
491 for entry_result in archive.entries().map_err(|e| {
492 RegistryError::ImageParsing(format!("Failed to read tar entries: {}", e))
493 })? {
494 let mut entry = entry_result.map_err(|e| {
495 RegistryError::ImageParsing(format!("Failed to read tar entry: {}", e))
496 })?;
497
498 let path = entry
499 .path()
500 .map_err(|e| {
501 RegistryError::ImageParsing(format!("Failed to read entry path: {}", e))
502 })?
503 .to_string_lossy()
504 .to_string();
505
506 if path == layer_path {
507 let size = entry.header().size().map_err(|e| {
508 RegistryError::ImageParsing(format!("Failed to read entry size: {}", e))
509 })?;
510
511 if size > max_size {
512 return Err(RegistryError::Validation(format!(
513 "Layer size {} exceeds limit {}",
514 size, max_size
515 )));
516 }
517
518 let mut data = Vec::with_capacity(size as usize);
519 entry.read_to_end(&mut data).map_err(|e| {
520 RegistryError::ImageParsing(format!("Failed to read layer data: {}", e))
521 })?;
522
523 return Ok(data);
524 }
525 }
526
527 Err(RegistryError::ImageParsing(format!(
528 "Layer '{}' not found in tar archive",
529 layer_path
530 )))
531 }
532}