1use std::fs::File;
4use std::io::Read;
5use std::path::Path;
6use tar::Archive;
7use crate::error::{Result, PusherError};
8use crate::output::OutputManager;
9use crate::digest::DigestUtils;
10use serde::{Deserialize, Serialize};
11use std::time::Instant;
12
13#[derive(Debug, Deserialize, Serialize, Clone)]
14pub struct LayerInfo {
15 pub digest: String,
16 pub size: u64,
17 pub media_type: String,
18 pub tar_path: String,
19 pub compressed_size: Option<u64>,
20 pub offset: Option<u64>,
21}
22
23#[derive(Debug, Deserialize, Serialize)]
24pub struct ImageConfig {
25 pub architecture: Option<String>,
26 pub os: Option<String>,
27 pub config: Option<serde_json::Value>,
28 pub rootfs: Option<serde_json::Value>,
29 pub history: Option<Vec<serde_json::Value>>,
30 pub created: Option<String>,
31 pub author: Option<String>,
32}
33
34#[derive(Debug, Deserialize, Serialize)]
35pub struct ImageInfo {
36 pub repository: String,
37 pub tag: String,
38 pub layers: Vec<LayerInfo>,
39 pub config: ImageConfig,
40 pub config_digest: String,
41 pub total_size: u64,
42 pub layer_count: usize,
43 pub large_layers_count: usize,
44}
45
46pub struct ImageParser {
47 output: OutputManager,
48 large_layer_threshold: u64,
49}
50
51impl ImageParser {
52 pub fn new(output: OutputManager) -> Self {
53 Self {
54 output,
55 large_layer_threshold: 100 * 1024 * 1024, }
57 }
58
59 pub fn set_large_layer_threshold(&mut self, threshold: u64) {
60 self.large_layer_threshold = threshold;
61 self.output.detail(&format!("Large layer threshold set to {}",
62 self.output.format_size(threshold)));
63 }
64
65 pub async fn parse_tar_file(&mut self, tar_path: &Path) -> Result<ImageInfo> {
66 let start_time = Instant::now();
67 self.output.section("Parsing Docker Image");
68 self.output.info(&format!("Source: {}", tar_path.display()));
69
70 let file_size = std::fs::metadata(tar_path)
71 .map_err(|e| PusherError::Io(format!("Failed to read file metadata: {}", e)))?
72 .len();
73
74 self.output.info(&format!("Archive size: {}", self.output.format_size(file_size)));
75
76 let parse_result = self.parse_tar_contents(tar_path).await;
77
78 match parse_result {
79 Ok(mut image_info) => {
80 let elapsed = start_time.elapsed();
81 image_info.total_size = image_info.layers.iter().map(|l| l.size).sum();
82 image_info.layer_count = image_info.layers.len();
83 image_info.large_layers_count = image_info.layers.iter()
84 .filter(|l| l.size > self.large_layer_threshold)
85 .count();
86
87 self.output.success(&format!(
88 "Parsing completed in {} - {} layers, total size: {}",
89 self.output.format_duration(elapsed),
90 image_info.layer_count,
91 self.output.format_size(image_info.total_size)
92 ));
93
94 self.print_image_summary(&image_info);
95 Ok(image_info)
96 }
97 Err(e) => {
98 self.output.error(&format!("Parsing failed after {}: {}",
99 self.output.format_duration(start_time.elapsed()), e));
100 Err(e)
101 }
102 }
103 }
104
105 fn detect_media_type(&self, layer_path: &str) -> String {
112 if layer_path.ends_with(".tar.gz") || layer_path.contains("gzip") {
113 "application/vnd.docker.image.rootfs.diff.tar.gzip".to_string()
114 } else if layer_path.ends_with(".tar") {
115 "application/vnd.docker.image.rootfs.diff.tar".to_string()
116 } else {
117 "application/vnd.docker.image.rootfs.diff.tar".to_string()
119 }
120 }
121
122 fn print_image_summary(&self, image_info: &ImageInfo) {
123 let empty_layers_count = image_info.layers.iter()
124 .filter(|l| l.size == 0)
125 .count();
126
127 let items = vec![
128 ("Layers", image_info.layer_count.to_string()),
129 ("Empty Layers", empty_layers_count.to_string()),
130 ("Large Layers", format!("{} (>{})",
131 image_info.large_layers_count,
132 self.output.format_size(self.large_layer_threshold))),
133 ("Total Size", self.output.format_size(image_info.total_size)),
134 ("Architecture", image_info.config.architecture.clone().unwrap_or_else(|| "unknown".to_string())),
135 ("OS", image_info.config.os.clone().unwrap_or_else(|| "unknown".to_string())),
136 ("Config Digest", format!("{}...", &image_info.config_digest[..23])),
137 ];
138
139 self.output.summary_kv("Image Information", &items);
141
142 if self.output.verbose {
143 self.output.subsection("Layer Details");
144 for (i, layer) in image_info.layers.iter().enumerate() {
145 let layer_type = if layer.size == 0 {
146 " (EMPTY)"
147 } else if layer.size > self.large_layer_threshold {
148 " (LARGE)"
149 } else {
150 ""
151 };
152
153 self.output.detail(&format!("Layer {}: {}... ({}){}",
154 i + 1,
155 &layer.digest[..23],
156 self.output.format_size(layer.size),
157 layer_type));
158 }
159 }
160 }
161
162 async fn parse_tar_contents(&mut self, tar_path: &Path) -> Result<ImageInfo> {
163 let mut manifest_data = None;
164 let mut config_data = None;
165 let mut layers = Vec::new();
166
167 self.output.subsection("Scanning archive entries");
168
169 let file = File::open(tar_path)
170 .map_err(|e| PusherError::Io(format!("Failed to open tar file: {}", e)))?;
171 let mut archive = Archive::new(file);
172
173 archive.set_ignore_zeros(true);
174
175 let entries = archive.entries()
176 .map_err(|e| PusherError::ImageParsing(format!("Failed to read tar entries: {}", e)))?;
177
178 let mut entry_count = 0;
179 let mut layer_count = 0;
180
181 for entry_result in entries {
182 let mut entry = entry_result
183 .map_err(|e| PusherError::ImageParsing(format!("Failed to read tar entry: {}", e)))?;
184
185 let path = entry.path()
186 .map_err(|e| PusherError::ImageParsing(format!("Failed to read entry path: {}", e)))?
187 .to_string_lossy()
188 .to_string();
189
190 let size = entry.header().size()
191 .map_err(|e| PusherError::ImageParsing(format!("Failed to read entry size: {}", e)))?;
192
193 entry_count += 1;
194
195 if path.ends_with(".tar") || path.ends_with(".tar.gz") || path.ends_with(".json") || path == "manifest.json" {
196 if size == 0 {
197 self.output.detail(&format!("Entry {}: {} (EMPTY)", entry_count, path));
198 } else {
199 self.output.detail(&format!("Entry {}: {} ({})", entry_count, path, self.output.format_size(size)));
200 }
201 }
202
203 match self.process_tar_entry(&mut entry, &path, size, tar_path).await? {
204 EntryType::Manifest(data) => manifest_data = Some(data),
205 EntryType::Config(data) => config_data = Some(data),
206 EntryType::Layer(layer_info) => {
207 layers.push(layer_info);
208 layer_count += 1;
209 },
210 EntryType::Other => {}
211 }
212 }
213
214 self.output.info(&format!("Processed {} entries total", entry_count));
215 self.output.info(&format!("Found {} layer entries", layer_count));
216
217 let image_info = self.build_image_info_with_manifest_digests(manifest_data, config_data, layers).await?;
219 Ok(image_info)
220 }
221
222 fn extract_digest_from_layer_path(&self, layer_path: &str) -> Option<String> {
224 self.output.detail(&format!("Extracting digest from layer path: {}", layer_path));
225
226 if let Some(digest) = DigestUtils::extract_digest_from_layer_path(layer_path) {
227 self.output.detail(&format!(" ✅ Found digest: {}...", &digest[..16]));
228 Some(digest)
229 } else {
230 self.output.detail(" ❌ No valid digest found in layer path");
231 None
232 }
233 }
234
235 fn is_valid_sha256_hex(&self, s: &str) -> bool {
237 DigestUtils::is_valid_sha256_hex(s)
238 }
239
240 async fn process_layer(&mut self, _tar_path: &Path, layer_path: &str, size: u64) -> Result<LayerInfo> { if size == 0 {
243 self.output.detail("Processing empty layer (0 bytes)");
244 let empty_digest = DigestUtils::empty_layer_digest();
245
246 return Ok(LayerInfo {
247 digest: empty_digest,
248 size: 0,
249 media_type: self.detect_media_type(layer_path),
250 tar_path: layer_path.to_string(),
251 compressed_size: Some(0),
252 offset: None,
253 });
254 }
255
256 let digest = if let Some(extracted_digest) = self.extract_digest_from_layer_path(layer_path) {
258 format!("sha256:{}", extracted_digest) } else {
259 let digest = DigestUtils::generate_path_based_digest(layer_path);
261 self.output.warning(&format!("Cannot extract digest from path '{}', using path hash: {}...",
262 layer_path, &digest[..23]));
263 digest
264 };
265
266 self.output.detail(&format!("Processing layer: {} ({}) -> {}",
267 layer_path, self.output.format_size(size), &digest[..23]));
268
269 Ok(LayerInfo {
270 digest,
271 size,
272 media_type: self.detect_media_type(layer_path),
273 tar_path: layer_path.to_string(),
274 compressed_size: Some(size),
275 offset: None,
276 })
277 }
278
279 async fn build_image_info_with_manifest_digests(
280 &self,
281 manifest_data: Option<String>,
282 config_data: Option<(String, String)>,
283 mut layers: Vec<LayerInfo>,
284 ) -> Result<ImageInfo> {
285 self.output.subsection("Building image metadata");
286
287 let manifest_str = manifest_data
288 .ok_or_else(|| PusherError::ImageParsing("No manifest.json found in archive".to_string()))?;
289
290 self.output.detail("=== MANIFEST.JSON CONTENT ===");
292 self.output.detail(&manifest_str);
293 self.output.detail("=== END MANIFEST.JSON ===");
294
295 let manifest: Vec<serde_json::Value> = serde_json::from_str(&manifest_str)
296 .map_err(|e| PusherError::Parse(format!("Failed to parse manifest.json: {}", e)))?;
297
298 let image_manifest = manifest.first()
299 .ok_or_else(|| PusherError::ImageParsing("Empty manifest array".to_string()))?;
300
301 self.output.detail("Available manifest keys:");
302 if let Some(obj) = image_manifest.as_object() {
303 for (key, value) in obj.iter() {
304 let value_preview = if value.to_string().len() > 100 {
305 format!("{}...", &value.to_string()[..100])
306 } else {
307 value.to_string()
308 };
309 self.output.detail(&format!(" - {}: {}", key, value_preview));
310 }
311 }
312
313 let mut found_layer_digests = false;
315 let mut ordered_layers = Vec::new();
316
317 if let Some(layer_digests) = image_manifest.get("Layers").and_then(|l| l.as_array()) {
319 self.output.info(&format!("✅ Found {} layer paths in 'Layers' field", layer_digests.len()));
320 found_layer_digests = true;
321
322 for (manifest_index, layer_digest_value) in layer_digests.iter().enumerate() {
324 if let Some(layer_file) = layer_digest_value.as_str() {
325 self.output.detail(&format!("Manifest Layer {}: {}", manifest_index + 1, layer_file));
326
327 let extracted_digest = self.extract_digest_from_layer_path(layer_file);
329
330 if let Some(digest) = extracted_digest {
331 let full_digest = format!("sha256:{}", digest);
332
333 let mut matched_layer = None;
335 for (i, layer) in layers.iter().enumerate() {
336 if layer.tar_path == layer_file ||
338 layer.digest.ends_with(&digest) ||
339 layer.tar_path.contains(&digest) {
340 matched_layer = Some(layers.remove(i));
341 break;
342 }
343 }
344
345 if let Some(mut layer) = matched_layer {
346 layer.digest = full_digest.clone();
348 self.output.success(&format!("✅ Matched layer {}: {} -> {}...",
349 manifest_index + 1, layer.tar_path, &full_digest[..23]));
350 ordered_layers.push(layer);
351 } else {
352 let is_empty = digest == "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
354
355 self.output.warning(&format!("⚠️ Creating placeholder for layer {}: {} ({})",
356 manifest_index + 1, layer_file, if is_empty { "EMPTY" } else { "UNKNOWN SIZE" }));
357
358 ordered_layers.push(LayerInfo {
359 digest: full_digest,
360 size: if is_empty { 0 } else {
361 layers.iter()
363 .find(|l| l.tar_path.contains(&digest))
364 .map(|l| l.size)
365 .unwrap_or(0)
366 },
367 media_type: self.detect_media_type(layer_file),
368 tar_path: layer_file.to_string(),
369 compressed_size: Some(0),
370 offset: None,
371 });
372 }
373 } else {
374 return Err(PusherError::ImageParsing(format!(
375 "Could not extract valid SHA256 digest from layer path: {}", layer_file
376 )));
377 }
378 }
379 }
380
381 layers = ordered_layers;
383 }
384
385 if !found_layer_digests {
387 self.output.warning("No 'Layers' field found in manifest, using filenames as fallback");
388 for (i, layer) in layers.iter_mut().enumerate() {
389 if let Some(extracted_digest) = self.extract_digest_from_layer_path(&layer.tar_path) {
390 layer.digest = format!("sha256:{}", extracted_digest);
391 self.output.detail(&format!("Layer {}: Extracted digest from filename: {}...",
392 i + 1, &layer.digest[..23]));
393 } else {
394 self.output.warning(&format!("Layer {}: Could not extract digest from path: {}",
395 i + 1, layer.tar_path));
396 }
397 }
398 }
399
400 for (i, layer) in layers.iter().enumerate() {
402 if !layer.digest.starts_with("sha256:") || layer.digest.len() != 71 {
403 return Err(PusherError::ImageParsing(format!(
404 "Layer {} has invalid SHA256 digest format: {}", i + 1, layer.digest
405 )));
406 }
407
408 let hex_part = &layer.digest[7..]; if !self.is_valid_sha256_hex(hex_part) {
411 return Err(PusherError::ImageParsing(format!(
412 "Layer {} has invalid SHA256 hex digest: {}", i + 1, layer.digest
413 )));
414 }
415 }
416
417 let (_, config_str) = config_data
418 .ok_or_else(|| PusherError::ImageParsing("No config file found in archive".to_string()))?;
419
420 let config: ImageConfig = serde_json::from_str(&config_str)
421 .map_err(|e| PusherError::Parse(format!("Failed to parse image config: {}", e)))?;
422 let config_digest = DigestUtils::compute_docker_digest_str(&config_str);
424
425 self.output.step(&format!("Found {} layers", layers.len()));
426 self.output.step(&format!("Config digest: {}...", &config_digest[..23]));
427
428 self.output.subsection("Layer Digest Summary");
430 for (i, layer) in layers.iter().enumerate() {
431 let source = if found_layer_digests { "manifest" } else { "filename" };
432 let size_info = if layer.size > 0 {
433 format!(" ({})", self.output.format_size(layer.size))
434 } else {
435 " (EMPTY)".to_string()
436 };
437 self.output.detail(&format!("Layer {}: {}{} (from {})",
438 i + 1, &layer.digest[..23], size_info, source));
439 }
440
441 if found_layer_digests {
442 self.output.success("✅ Using real digests from Docker manifest");
443 } else {
444 self.output.warning("⚠️ Using filename-based digests (may cause upload issues)");
445 }
446
447 self.output.success("✅ All layer digests validated as proper SHA256 format");
448
449 Ok(ImageInfo {
450 repository: "unknown".to_string(),
451 tag: "latest".to_string(),
452 layers,
453 config,
454 config_digest,
455 total_size: 0,
456 layer_count: 0,
457 large_layers_count: 0,
458 }) }
459
460 async fn process_tar_entry(
462 &mut self,
463 entry: &mut tar::Entry<'_, std::fs::File>,
464 path: &str,
465 size: u64,
466 tar_path: &Path,
467 ) -> Result<EntryType> {
468 if path == "manifest.json" {
469 let mut content = String::new();
470 entry.read_to_string(&mut content)
471 .map_err(|e| PusherError::Io(format!("Failed to read manifest: {}", e)))?;
472 return Ok(EntryType::Manifest(content));
473 }
474
475 if path.ends_with(".json") && !path.contains("/") {
476 let mut content = String::new();
478 entry.read_to_string(&mut content)
479 .map_err(|e| PusherError::Io(format!("Failed to read config: {}", e)))?;
480 return Ok(EntryType::Config((path.to_string(), content)));
481 }
482
483 if path.ends_with(".tar") || path.ends_with("layer.tar") || path.contains("/layer") {
484 let layer_info = self.process_layer(tar_path, path, size).await?;
486 return Ok(EntryType::Layer(layer_info));
487 }
488
489 Ok(EntryType::Other)
490 }
491}
492
493enum EntryType {
495 Manifest(String),
496 Config((String, String)),
497 Layer(LayerInfo),
498 Other,
499}