1use crate::digest::DigestUtils;
7use crate::error::{PusherError, Result};
8use crate::output::OutputManager;
9use crate::tar_utils::TarUtils;
10use serde::{Deserialize, Serialize};
11use std::fs::File;
12use std::io::Read;
13use std::path::Path;
14use std::time::Instant;
15use tar::Archive;
16
17#[derive(Debug, Deserialize, Serialize, Clone)]
18pub struct LayerInfo {
19 pub digest: String,
20 pub size: u64,
21 pub media_type: String,
22 pub tar_path: String,
23 pub compressed_size: Option<u64>,
24 pub offset: Option<u64>,
25}
26
27#[derive(Debug, Deserialize, Serialize)]
28pub struct ImageConfig {
29 pub architecture: Option<String>,
30 pub os: Option<String>,
31 pub config: Option<serde_json::Value>,
32 pub rootfs: Option<serde_json::Value>,
33 pub history: Option<Vec<serde_json::Value>>,
34 pub created: Option<String>,
35 pub author: Option<String>,
36}
37
38#[derive(Debug, Deserialize, Serialize)]
39pub struct ImageInfo {
40 pub repository: String,
41 pub tag: String,
42 pub layers: Vec<LayerInfo>,
43 pub config: ImageConfig,
44 pub config_digest: String,
45 pub total_size: u64,
46 pub layer_count: usize,
47 pub large_layers_count: usize,
48}
49
50pub struct ImageParser {
51 output: OutputManager,
52 large_layer_threshold: u64,
53}
54
55impl ImageParser {
56 pub fn new(output: OutputManager) -> Self {
57 Self {
58 output,
59 large_layer_threshold: 100 * 1024 * 1024, }
61 }
62
63 pub fn set_large_layer_threshold(&mut self, threshold: u64) {
64 self.large_layer_threshold = threshold;
65 self.output.detail(&format!(
66 "Large layer threshold set to {}",
67 self.output.format_size(threshold)
68 ));
69 }
70
71 pub async fn parse_tar_file(&mut self, tar_path: &Path) -> Result<ImageInfo> {
72 let start_time = Instant::now();
73 self.output.section("Parsing Docker Image");
74 self.output.info(&format!("Source: {}", tar_path.display()));
75
76 let file_size = std::fs::metadata(tar_path)
77 .map_err(|e| PusherError::Io(format!("Failed to read file metadata: {}", e)))?
78 .len();
79
80 self.output.info(&format!(
81 "Archive size: {}",
82 self.output.format_size(file_size)
83 ));
84
85 let parse_result = self.parse_tar_contents(tar_path).await;
86 match parse_result {
87 Ok(mut image_info) => {
88 let elapsed = start_time.elapsed();
89 image_info.total_size = image_info.layers.iter().map(|l| l.size).sum();
90 image_info.layer_count = image_info.layers.len();
91 image_info.large_layers_count = image_info
92 .layers
93 .iter()
94 .filter(|l| l.size > self.large_layer_threshold)
95 .count();
96
97 self.output.success(&format!(
98 "Parsing completed in {} - {} layers, total size: {}",
99 self.output.format_duration(elapsed),
100 image_info.layer_count,
101 self.output.format_size(image_info.total_size)
102 ));
103
104 if self.output.verbose {
105 self.print_image_summary(&image_info);
106 } else {
107 self.print_podman_format_digests(&image_info);
109 }
110 Ok(image_info)
111 }
112 Err(e) => {
113 self.output.error(&format!(
114 "Parsing failed after {}: {}",
115 self.output.format_duration(start_time.elapsed()),
116 e
117 ));
118 Err(e)
119 }
120 }
121 }
122
123 fn detect_media_type(&self, layer_path: &str) -> String {
130 if layer_path.ends_with(".tar.gz") || layer_path.contains("gzip") {
131 "application/vnd.docker.image.rootfs.diff.tar.gzip".to_string()
132 } else if layer_path.ends_with(".tar") {
133 "application/vnd.docker.image.rootfs.diff.tar".to_string()
134 } else {
135 "application/vnd.docker.image.rootfs.diff.tar".to_string()
137 }
138 }
139
140 fn print_image_summary(&self, image_info: &ImageInfo) {
141 let empty_layers_count = image_info.layers.iter().filter(|l| l.size == 0).count();
142
143 let items = vec![
144 ("Layers", image_info.layer_count.to_string()),
145 ("Empty Layers", empty_layers_count.to_string()),
146 (
147 "Large Layers",
148 format!(
149 "{} (>{})",
150 image_info.large_layers_count,
151 self.output.format_size(self.large_layer_threshold)
152 ),
153 ),
154 ("Total Size", self.output.format_size(image_info.total_size)),
155 (
156 "Architecture",
157 image_info
158 .config
159 .architecture
160 .clone()
161 .unwrap_or_else(|| "unknown".to_string()),
162 ),
163 (
164 "OS",
165 image_info
166 .config
167 .os
168 .clone()
169 .unwrap_or_else(|| "unknown".to_string()),
170 ),
171 (
172 "Config Digest",
173 format!("{}...", &image_info.config_digest[..23]),
174 ),
175 ];
176
177 self.output.summary_kv("Image Information", &items);
179
180 if self.output.verbose {
181 self.output.subsection("Layer Details");
182 for (i, layer) in image_info.layers.iter().enumerate() {
183 let layer_type = if layer.size == 0 {
184 " (EMPTY)"
185 } else if layer.size > self.large_layer_threshold {
186 " (LARGE)"
187 } else {
188 ""
189 };
190
191 self.output.detail(&format!(
192 "Layer {}: {}... ({}){}",
193 i + 1,
194 &layer.digest[..23],
195 self.output.format_size(layer.size),
196 layer_type
197 ));
198 }
199 }
200 }
201
202 async fn parse_tar_contents(&mut self, tar_path: &Path) -> Result<ImageInfo> {
203 let mut manifest_data = None;
204 let mut config_data = None;
205 let mut layers = Vec::new();
206
207 self.output.subsection("Scanning archive entries");
208
209 let file = File::open(tar_path)
210 .map_err(|e| PusherError::Io(format!("Failed to open tar file: {}", e)))?;
211 let mut archive = Archive::new(file);
212
213 archive.set_ignore_zeros(true);
214
215 let entries = archive
216 .entries()
217 .map_err(|e| PusherError::ImageParsing(format!("Failed to read tar entries: {}", e)))?;
218
219 let mut entry_count = 0;
220 let mut layer_count = 0;
221
222 for entry_result in entries {
223 let mut entry = entry_result.map_err(|e| {
224 PusherError::ImageParsing(format!("Failed to read tar entry: {}", e))
225 })?;
226
227 let path = entry
228 .path()
229 .map_err(|e| {
230 PusherError::ImageParsing(format!("Failed to read entry path: {}", e))
231 })?
232 .to_string_lossy()
233 .to_string();
234
235 let size = entry.header().size().map_err(|e| {
236 PusherError::ImageParsing(format!("Failed to read entry size: {}", e))
237 })?;
238
239 entry_count += 1;
240
241 if path.ends_with(".tar")
242 || path.ends_with(".tar.gz")
243 || path.ends_with(".json")
244 || path == "manifest.json"
245 {
246 if size == 0 {
247 self.output
248 .detail(&format!("Entry {}: {} (EMPTY)", entry_count, path));
249 } else {
250 self.output.detail(&format!(
251 "Entry {}: {} ({})",
252 entry_count,
253 path,
254 self.output.format_size(size)
255 ));
256 }
257 }
258
259 match self
260 .process_tar_entry(&mut entry, &path, size, tar_path)
261 .await?
262 {
263 EntryType::Manifest(data) => manifest_data = Some(data),
264 EntryType::Config(data) => config_data = Some(data),
265 EntryType::Layer(layer_info) => {
266 layers.push(layer_info);
267 layer_count += 1;
268 }
269 EntryType::Other => {}
270 }
271 }
272
273 self.output
274 .info(&format!("Processed {} entries total", entry_count));
275 self.output
276 .info(&format!("Found {} layer entries", layer_count));
277
278 let image_info = self
280 .build_image_info_with_manifest_digests(manifest_data, config_data, layers)
281 .await?;
282 Ok(image_info)
283 }
284
285 fn extract_digest_from_layer_path(&self, layer_path: &str) -> Option<String> {
287 self.output.detail(&format!(
288 "Extracting digest from layer path: {}",
289 layer_path
290 ));
291
292 if let Some(digest) = DigestUtils::extract_digest_from_layer_path(layer_path) {
293 self.output
294 .detail(&format!(" ✅ Found digest: {}...", &digest[..16]));
295 Some(digest)
296 } else {
297 self.output
298 .detail(" ❌ No valid digest found in layer path");
299 None
300 }
301 }
302
303 fn is_valid_sha256_hex(&self, s: &str) -> bool {
305 DigestUtils::is_valid_sha256_hex(s)
306 }
307
308 async fn process_layer(
310 &mut self,
311 _tar_path: &Path,
312 layer_path: &str,
313 size: u64,
314 ) -> Result<LayerInfo> {
315 if size == 0 {
317 self.output.detail("Processing empty layer (0 bytes)");
318 let empty_digest = DigestUtils::empty_layer_digest();
319
320 return Ok(LayerInfo {
321 digest: empty_digest,
322 size: 0,
323 media_type: self.detect_media_type(layer_path),
324 tar_path: layer_path.to_string(),
325 compressed_size: Some(0),
326 offset: None,
327 });
328 }
329
330 let digest = if let Some(extracted_digest) = self.extract_digest_from_layer_path(layer_path)
332 {
333 format!("sha256:{}", extracted_digest)
334 } else {
335 let digest = DigestUtils::generate_path_based_digest(layer_path);
337 self.output.warning(&format!(
338 "Cannot extract digest from path '{}', using path hash: {}...",
339 layer_path,
340 &digest[..23]
341 ));
342 digest
343 };
344
345 self.output.detail(&format!(
346 "Processing layer: {} ({}) -> {}",
347 layer_path,
348 self.output.format_size(size),
349 &digest[..23]
350 ));
351
352 Ok(LayerInfo {
353 digest,
354 size,
355 media_type: self.detect_media_type(layer_path),
356 tar_path: layer_path.to_string(),
357 compressed_size: Some(size),
358 offset: None,
359 })
360 }
361
362 async fn build_image_info_with_manifest_digests(
363 &self,
364 manifest_data: Option<String>,
365 config_data: Option<(String, String)>,
366 mut layers: Vec<LayerInfo>,
367 ) -> Result<ImageInfo> {
368 self.output.subsection("Building image metadata");
369
370 let manifest_str = manifest_data.ok_or_else(|| {
371 PusherError::ImageParsing("No manifest.json found in archive".to_string())
372 })?;
373
374 self.output.detail("=== MANIFEST.JSON CONTENT ===");
376 self.output.detail(&manifest_str);
377 self.output.detail("=== END MANIFEST.JSON ===");
378
379 let manifest: Vec<serde_json::Value> = serde_json::from_str(&manifest_str)
380 .map_err(|e| PusherError::Parse(format!("Failed to parse manifest.json: {}", e)))?;
381
382 let image_manifest = manifest
383 .first()
384 .ok_or_else(|| PusherError::ImageParsing("Empty manifest array".to_string()))?;
385
386 self.output.detail("Available manifest keys:");
387 if let Some(obj) = image_manifest.as_object() {
388 for (key, value) in obj.iter() {
389 let value_preview = if value.to_string().len() > 100 {
390 format!("{}...", &value.to_string()[..100])
391 } else {
392 value.to_string()
393 };
394 self.output
395 .detail(&format!(" - {}: {}", key, value_preview));
396 }
397 }
398
399 let mut found_layer_digests = false;
401 let mut ordered_layers = Vec::new();
402 if let Some(layer_digests) = image_manifest.get("Layers").and_then(|l| l.as_array()) {
404 self.output.info(&format!(
405 "✅ Found {} layer paths in 'Layers' field",
406 layer_digests.len()
407 ));
408 found_layer_digests = true;
409
410 for (manifest_index, layer_digest_value) in layer_digests.iter().enumerate() {
412 if let Some(layer_file) = layer_digest_value.as_str() {
413 self.output.detail(&format!(
414 "Manifest Layer {}: {}",
415 manifest_index + 1,
416 layer_file
417 ));
418
419 let extracted_digest = self.extract_digest_from_layer_path(layer_file);
421
422 if let Some(digest) = extracted_digest {
423 let full_digest = format!("sha256:{}", digest);
424
425 let mut matched_layer = None;
427 for (i, layer) in layers.iter().enumerate() {
428 if layer.tar_path == layer_file {
430 matched_layer = Some(layers.remove(i));
431 self.output
432 .detail(&format!(" ✅ Exact path match: {}", layer_file));
433 break;
434 }
435 } if matched_layer.is_none() {
437 let mut match_index = None;
439 let mut match_tar_path = String::new();
440
441 for (i, layer) in layers.iter().enumerate() {
442 if layer.digest.contains(&digest)
444 || layer.tar_path.contains(&digest)
445 {
446 if digest.len() >= 12 {
448 match_index = Some(i);
450 match_tar_path = layer.tar_path.clone();
451 break;
452 }
453 }
454 }
455
456 if let Some(i) = match_index {
458 matched_layer = Some(layers.remove(i));
459 self.output.detail(&format!(
460 " ⚠️ Digest-based match: {} -> {}",
461 match_tar_path, layer_file
462 ));
463 }
464 }
465
466 if let Some(mut layer) = matched_layer {
467 layer.digest = full_digest.clone();
469 layer.tar_path = layer_file.to_string(); self.output.success(&format!(
471 "✅ Matched layer {}: {} -> {}...",
472 manifest_index + 1,
473 layer_file,
474 &full_digest[..23]
475 ));
476 ordered_layers.push(layer);
477 } else {
478 let is_empty = digest
480 == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
481
482 let size = if is_empty {
484 0
485 } else {
486 layers
487 .iter()
488 .find(|l| {
489 l.tar_path.contains(&digest) || l.digest.contains(&digest)
490 })
491 .map(|l| l.size)
492 .unwrap_or(0)
493 };
494
495 self.output.warning(&format!(
496 "⚠️ Creating new layer entry {}: {} ({} bytes)",
497 manifest_index + 1,
498 layer_file,
499 size
500 ));
501
502 ordered_layers.push(LayerInfo {
503 digest: full_digest,
504 size,
505 media_type: self.detect_media_type(layer_file),
506 tar_path: layer_file.to_string(),
507 compressed_size: Some(size),
508 offset: None,
509 });
510 }
511 } else {
512 return Err(PusherError::ImageParsing(format!(
513 "Could not extract valid SHA256 digest from layer path: {}",
514 layer_file
515 )));
516 }
517 }
518 }
519
520 layers = ordered_layers;
522 }
523
524 if !found_layer_digests {
526 self.output
527 .warning("No 'Layers' field found in manifest, using filenames as fallback");
528 for (i, layer) in layers.iter_mut().enumerate() {
529 if let Some(extracted_digest) = self.extract_digest_from_layer_path(&layer.tar_path)
530 {
531 layer.digest = format!("sha256:{}", extracted_digest);
532 self.output.detail(&format!(
533 "Layer {}: Extracted digest from filename: {}...",
534 i + 1,
535 &layer.digest[..23]
536 ));
537 } else {
538 self.output.warning(&format!(
539 "Layer {}: Could not extract digest from path: {}",
540 i + 1,
541 layer.tar_path
542 ));
543 }
544 }
545 }
546
547 for (i, layer) in layers.iter().enumerate() {
549 if !layer.digest.starts_with("sha256:") || layer.digest.len() != 71 {
550 return Err(PusherError::ImageParsing(format!(
551 "Layer {} has invalid SHA256 digest format: {}",
552 i + 1,
553 layer.digest
554 )));
555 }
556
557 let hex_part = &layer.digest[7..]; if !self.is_valid_sha256_hex(hex_part) {
560 return Err(PusherError::ImageParsing(format!(
561 "Layer {} has invalid SHA256 hex digest: {}",
562 i + 1,
563 layer.digest
564 )));
565 }
566 }
567
568 let (_, config_str) = config_data.ok_or_else(|| {
569 PusherError::ImageParsing("No config file found in archive".to_string())
570 })?;
571
572 let config: ImageConfig = serde_json::from_str(&config_str)
573 .map_err(|e| PusherError::Parse(format!("Failed to parse image config: {}", e)))?;
574 let config_digest = DigestUtils::compute_docker_digest_str(&config_str);
576
577 self.output.step(&format!("Found {} layers", layers.len()));
578 self.output
579 .step(&format!("Config digest: {}...", &config_digest[..23]));
580
581 self.output.subsection("Layer Digest Summary");
583 for (i, layer) in layers.iter().enumerate() {
584 let source = if found_layer_digests {
585 "manifest"
586 } else {
587 "filename"
588 };
589 let size_info = if layer.size > 0 {
590 format!(" ({})", self.output.format_size(layer.size))
591 } else {
592 " (EMPTY)".to_string()
593 };
594 self.output.detail(&format!(
595 "Layer {}: {}{} (from {})",
596 i + 1,
597 &layer.digest[..23],
598 size_info,
599 source
600 ));
601 }
602
603 if found_layer_digests {
604 self.output
605 .success("✅ Using real digests from Docker manifest");
606 } else {
607 self.output
608 .warning("⚠️ Using filename-based digests (may cause upload issues)");
609 }
610
611 self.output
612 .success("✅ All layer digests validated as proper SHA256 format");
613
614 Ok(ImageInfo {
615 repository: "unknown".to_string(),
616 tag: "latest".to_string(),
617 layers,
618 config,
619 config_digest,
620 total_size: 0,
621 layer_count: 0,
622 large_layers_count: 0,
623 })
624 }
625
626 async fn process_tar_entry(
628 &mut self,
629 entry: &mut tar::Entry<'_, std::fs::File>,
630 path: &str,
631 size: u64,
632 tar_path: &Path,
633 ) -> Result<EntryType> {
634 if path == "manifest.json" {
635 let mut content = String::new();
636 entry
637 .read_to_string(&mut content)
638 .map_err(|e| PusherError::Io(format!("Failed to read manifest: {}", e)))?;
639 return Ok(EntryType::Manifest(content));
640 }
641
642 if path.ends_with(".json") && !path.contains("/") {
643 let mut content = String::new();
645 entry
646 .read_to_string(&mut content)
647 .map_err(|e| PusherError::Io(format!("Failed to read config: {}", e)))?;
648 return Ok(EntryType::Config((path.to_string(), content)));
649 }
650
651 if path.ends_with(".tar") || path.ends_with("layer.tar") || path.contains("/layer") {
652 let layer_info = self.process_layer(tar_path, path, size).await?;
654 return Ok(EntryType::Layer(layer_info));
655 }
656
657 Ok(EntryType::Other)
658 }
659 pub async fn validate_layer_data(&self, tar_path: &Path, layer: &LayerInfo) -> Result<bool> {
661 self.output
662 .detail(&format!("Validating layer data: {}", &layer.digest[..23]));
663
664 match TarUtils::extract_layer_data(tar_path, &layer.tar_path) {
666 Ok(data) => {
667 let computed_digest = DigestUtils::compute_docker_digest(&data);
668 let matches = computed_digest == layer.digest;
669
670 if matches {
671 self.output.success(&format!(
672 "✅ Layer data integrity verified: {} bytes",
673 data.len()
674 ));
675 } else {
676 self.output
677 .error(&format!("❌ Layer data integrity failed!"));
678 self.output.detail(&format!(" Expected: {}", layer.digest));
679 self.output
680 .detail(&format!(" Computed: {}", computed_digest));
681 self.output
682 .detail(&format!(" Data size: {} bytes", data.len()));
683 }
684
685 Ok(matches)
686 }
687 Err(e) => {
688 self.output.error(&format!(
689 "Failed to extract layer data for validation: {}",
690 e
691 ));
692 Ok(false)
693 }
694 }
695 }
696
697 pub fn debug_layer_mapping(&self, manifest_layers: &[String], parsed_layers: &[LayerInfo]) {
699 self.output.subsection("Layer-to-Digest Mapping Analysis");
700
701 self.output.detail(&format!(
702 "Manifest contains {} layer entries:",
703 manifest_layers.len()
704 ));
705 for (i, layer_path) in manifest_layers.iter().enumerate() {
706 let extracted_digest = self.extract_digest_from_layer_path(layer_path);
707 self.output.detail(&format!(
708 " {}: {} -> {:?}",
709 i + 1,
710 layer_path,
711 extracted_digest
712 .as_ref()
713 .map(|d| &d[..12])
714 .unwrap_or("INVALID")
715 ));
716 }
717
718 self.output.detail(&format!(
719 "Parsed tar contains {} layer entries:",
720 parsed_layers.len()
721 ));
722 for (i, layer) in parsed_layers.iter().enumerate() {
723 self.output.detail(&format!(
724 " {}: {} ({} bytes) -> {}",
725 i + 1,
726 layer.tar_path,
727 layer.size,
728 &layer.digest[..23]
729 ));
730 }
731 }
732
733 pub fn print_podman_format_digests(&self, image_info: &ImageInfo) {
735 for layer in &image_info.layers {
736 println!("{}", layer.digest);
737 }
738 }
739}
740
741enum EntryType {
743 Manifest(String),
744 Config((String, String)),
745 Layer(LayerInfo),
746 Other,
747}