docker_image_pusher/image/
parser.rs

1// This file contains the ImageParser struct, which is responsible for extracting image layers and metadata from the tar package.
2
3use std::fs::File;
4use std::io::Read;
5use std::path::Path;
6use tar::Archive;
7use crate::error::{Result, PusherError};
8use serde::{Deserialize, Serialize};
9use std::collections::HashMap;
10use sha2::{Sha256, Digest};
11
12#[derive(Debug, Deserialize, Serialize, Clone)]
13pub struct LayerInfo {
14    pub digest: String,
15    pub size: u64,
16    pub media_type: String,
17    pub tar_path: String,
18}
19
20#[derive(Debug, Deserialize, Serialize)]
21pub struct ImageConfig {
22    pub architecture: Option<String>,
23    pub os: Option<String>,
24    pub config: Option<serde_json::Value>,
25    pub rootfs: Option<serde_json::Value>,
26    pub history: Option<Vec<serde_json::Value>>,
27}
28
29#[derive(Debug, Deserialize, Serialize)]
30pub struct ImageInfo {
31    pub repository: String,
32    pub tag: String,
33    pub layers: Vec<LayerInfo>,
34    pub config_digest: String,
35    pub config: ImageConfig,
36}
37
38#[derive(Debug, Deserialize)]
39struct ManifestEntry {
40    #[serde(rename = "Config")]
41    config: String,
42    #[serde(rename = "RepoTags")]
43    repo_tags: Vec<String>,
44    #[serde(rename = "Layers")]
45    layers: Vec<String>,
46}
47
48pub struct ImageParser;
49
50impl ImageParser {
51    pub fn new() -> Self {
52        Self
53    }
54
55    pub async fn parse_tar_file(&self, tar_path: &Path) -> Result<ImageInfo> {
56        println!("Opening tar file: {}", tar_path.display());
57        
58        if !tar_path.exists() {
59            return Err(PusherError::ImageParsing(format!("Tar file does not exist: {}", tar_path.display())));
60        }
61
62        // Scan tar file to collect information
63        let (manifest_entry, config_data, layer_files) = self.scan_tar_file(tar_path)?;
64        
65        // Parse config
66        let config: ImageConfig = serde_json::from_str(&config_data)
67            .map_err(|e| PusherError::ImageParsing(format!("Failed to parse config: {}", e)))?;
68        
69        // Build layer info
70        let layers = self.build_layer_info(&manifest_entry, &layer_files)?;
71        
72        // Parse repository and tag
73        let (repository, tag) = self.parse_repo_tag(&manifest_entry.repo_tags)?;
74        
75        // Calculate config digest
76        let config_digest = self.calculate_config_digest(&config_data)?;
77        
78        println!("Image info:");
79        println!("  Repository: {}", repository);
80        println!("  Tag: {}", tag);
81        println!("  Layers: {} found", layers.len());
82        for (i, layer) in layers.iter().enumerate() {
83            println!("    Layer {}: {} ({})", i + 1, layer.digest, layer.size);
84        }
85        
86        Ok(ImageInfo {
87            repository,
88            tag,
89            layers,
90            config_digest,
91            config,
92        })
93    }
94
95    fn scan_tar_file(&self, tar_path: &Path) -> Result<(ManifestEntry, String, HashMap<String, u64>)> {
96        let file = File::open(tar_path)
97            .map_err(|e| PusherError::Io(e))?;
98        let mut archive = Archive::new(file);
99        
100        let mut manifest_data = None;
101        let mut config_data = None;
102        let mut layer_files = HashMap::new();
103        
104        println!("Scanning tar entries...");
105        
106        for entry in archive.entries().map_err(PusherError::Io)? {
107            let mut entry = entry.map_err(PusherError::Io)?;
108            let path = entry.path().map_err(PusherError::Io)?.to_string_lossy().to_string();
109            let size = entry.header().size().map_err(PusherError::Io)?;
110            
111            println!("  Found: {}", path);
112            
113            if path == "manifest.json" {
114                let mut contents = String::new();
115                entry.read_to_string(&mut contents).map_err(PusherError::Io)?;
116                manifest_data = Some(contents);
117                println!("    -> Manifest file found");
118            } else if path.ends_with(".json") && !path.contains("manifest") {
119                let mut contents = String::new();
120                entry.read_to_string(&mut contents).map_err(PusherError::Io)?;
121                config_data = Some(contents);
122                println!("    -> Config file found: {}", path);
123            } else if path.ends_with(".tar") || path.contains("layer") {
124                layer_files.insert(path.clone(), size);
125                println!("    -> Layer file found: {} ({} bytes)", path, size);
126            }
127        }
128        
129        let manifest_str = manifest_data
130            .ok_or_else(|| PusherError::ImageParsing("No manifest.json found in tar".to_string()))?;
131        
132        let manifest_array: Vec<ManifestEntry> = serde_json::from_str(&manifest_str)
133            .map_err(|e| PusherError::ImageParsing(format!("Failed to parse manifest: {}", e)))?;
134        
135        let manifest_entry = manifest_array.into_iter().next()
136            .ok_or_else(|| PusherError::ImageParsing("Empty manifest".to_string()))?;
137        
138        let config = config_data
139            .ok_or_else(|| PusherError::ImageParsing("No config file found in tar".to_string()))?;
140        
141        println!("Parsed manifest:");
142        println!("  Config: {}", manifest_entry.config);
143        println!("  RepoTags: {:?}", manifest_entry.repo_tags);
144        println!("  Layers: {} entries", manifest_entry.layers.len());
145        
146        Ok((manifest_entry, config, layer_files))
147    }
148
149    fn build_layer_info(&self, manifest_entry: &ManifestEntry, layer_files: &HashMap<String, u64>) -> Result<Vec<LayerInfo>> {
150        let mut layers = Vec::new();
151        
152        for layer_path in &manifest_entry.layers {
153            let size = layer_files.get(layer_path)
154                .copied()
155                .unwrap_or(0);
156            
157            // Extract digest from layer path or calculate it
158            let digest = self.extract_digest_from_path(layer_path)?;
159            
160            layers.push(LayerInfo {
161                digest,
162                size,
163                media_type: "application/vnd.docker.image.rootfs.diff.tar.gzip".to_string(),
164                tar_path: layer_path.clone(),
165            });
166        }
167        
168        Ok(layers)
169    }
170
171    fn extract_digest_from_path(&self, layer_path: &str) -> Result<String> {
172        // Try to extract digest from path like "sha256:abc123.tar" or "abc123/layer.tar"
173        if layer_path.contains("sha256:") {
174            if let Some(start) = layer_path.find("sha256:") {
175                let digest_part = &layer_path[start..];
176                if let Some(end) = digest_part.find('.') {
177                    return Ok(digest_part[..end].to_string());
178                } else {
179                    return Ok(digest_part.to_string());
180                }
181            }
182        }
183        
184        // For paths like "abc123.tar", assume abc123 is the short digest
185        if let Some(file_name) = layer_path.split('/').last() {
186            if let Some(name_without_ext) = file_name.strip_suffix(".tar") {
187                if name_without_ext.len() == 64 {
188                    return Ok(format!("sha256:{}", name_without_ext));
189                }
190            }
191        }
192        
193        // For paths like "abc123/layer.tar", use the directory name
194        if let Some(dir_name) = layer_path.split('/').next() {
195            if dir_name.len() == 64 {
196                return Ok(format!("sha256:{}", dir_name));
197            }
198        }
199        
200        Err(PusherError::ImageParsing(format!("Could not extract digest from layer path: {}", layer_path)))
201    }
202
203    fn parse_repo_tag(&self, repo_tags: &[String]) -> Result<(String, String)> {
204        let repo_tag = repo_tags.first()
205            .ok_or_else(|| PusherError::ImageParsing("No repository tags found".to_string()))?;
206            
207        if let Some((repo, tag)) = repo_tag.rsplit_once(':') {
208            Ok((repo.to_string(), tag.to_string()))
209        } else {
210            Ok((repo_tag.clone(), "latest".to_string()))
211        }
212    }
213
214    fn calculate_config_digest(&self, config_data: &str) -> Result<String> {
215        let mut hasher = Sha256::new();
216        hasher.update(config_data.as_bytes());
217        Ok(format!("sha256:{:x}", hasher.finalize()))
218    }
219}