use crate::metadata::{self, ImageMetadata};
use crate::notifier::Notifier;
use crate::tar_extractor;
use anyhow::{anyhow, Context, Result};
use chrono::{DateTime, Utc};
use std::fs;
use std::path::{Path, PathBuf};
#[derive(Debug, Clone)]
pub struct Layer {
pub id: String,
pub command: String,
pub created_at: DateTime<Utc>,
pub is_empty: bool,
pub tarball_path: Option<std::path::PathBuf>, pub digest: String, pub comment: Option<String>, }
pub struct ExtractedImage {
extract_dir: PathBuf,
_temp_dir: tempfile::TempDir,
metadata: ImageMetadata,
layers: Vec<Layer>,
}
impl ExtractedImage {
pub fn from_tarball<P: AsRef<Path>>(tarball_path: P, notifier: &Notifier) -> Result<Self> {
let tarball_path = tarball_path.as_ref();
notifier.debug(&format!("Extracting image tarball: {tarball_path:?}"));
let temp_dir = tempfile::tempdir().context("Failed to create temporary directory")?;
let extract_dir = temp_dir.path().join("extracted");
fs::create_dir_all(&extract_dir)?;
Self::extract_tar_file(tarball_path, &extract_dir)?;
let manifest_path = extract_dir.join("manifest.json");
if !manifest_path.exists() {
return Err(anyhow!(
"Invalid image tarball: manifest.json not found. This does not appear to be a valid OCI/Docker image tarball."
));
}
notifier.debug("Loading image metadata...");
let metadata = Self::load_metadata_from_dir(&extract_dir, "temp")?;
notifier.debug("Loading image layers...");
let layers = Self::load_layers_from_dir(&extract_dir)?;
notifier.info(&format!("Successfully loaded {} layers", layers.len()));
Ok(ExtractedImage {
extract_dir,
_temp_dir: temp_dir,
metadata,
layers,
})
}
pub fn metadata(&self, _image_name: &str) -> Result<ImageMetadata> {
Ok(self.metadata.clone())
}
pub fn os(&self, image_name: &str) -> Result<String> {
Ok(self.metadata(image_name)?.os)
}
pub fn architecture(&self, image_name: &str) -> Result<String> {
Ok(self.metadata(image_name)?.architecture)
}
pub fn layers(&self) -> Result<Vec<Layer>> {
Ok(self.layers.clone())
}
pub fn extract_layer_to<P: AsRef<Path>>(
&self,
layer_tarball: &Path,
output_dir: P,
) -> Result<()> {
let output_dir = output_dir.as_ref();
fs::create_dir_all(output_dir)?;
Self::extract_tar_file(layer_tarball, output_dir)
}
pub fn extract_dir(&self) -> &Path {
&self.extract_dir
}
fn extract_tar_file(tar_path: &Path, extract_dir: &Path) -> Result<()> {
tar_extractor::extract_tar(tar_path, extract_dir)
.context(format!("Failed to extract tar file: {tar_path:?}"))
}
fn load_metadata_from_dir(extract_dir: &Path, image_name: &str) -> Result<ImageMetadata> {
let manifest_path = extract_dir.join("manifest.json");
let manifest_content =
fs::read_to_string(&manifest_path).context("Failed to read manifest.json")?;
let manifest: Vec<serde_json::Value> =
serde_json::from_str(&manifest_content).context("Failed to parse manifest.json")?;
if manifest.is_empty() {
return Err(anyhow!("Empty manifest.json"));
}
let config_file = manifest[0]["Config"]
.as_str()
.ok_or_else(|| anyhow!("Invalid manifest format - missing Config"))?;
let config_path = extract_dir.join(config_file);
let config_content = fs::read_to_string(&config_path)
.context(format!("Failed to read config file: {config_file}"))?;
let config: oci_spec::image::ImageConfiguration =
serde_json::from_str(&config_content).context("Failed to parse image configuration")?;
let mut metadata = metadata::from_oci_config(&config);
let index_path = extract_dir.join("index.json");
if index_path.exists() {
let index_content =
fs::read_to_string(&index_path).context("Failed to read index.json")?;
let index: serde_json::Value =
serde_json::from_str(&index_content).context("Failed to parse index.json")?;
if let Some(manifests) = index["manifests"].as_array() {
if let Some(first_manifest) = manifests.first() {
if let Some(digest) = first_manifest["digest"].as_str() {
metadata.id = digest.to_string();
}
}
}
}
if metadata.id.is_empty() {
if let Some(digest_hash) = config_file.strip_prefix("blobs/sha256/") {
metadata.id = format!("sha256:{digest_hash}");
} else if let Some(digest_hash) = config_file.strip_suffix(".json") {
metadata.id = format!("sha256:{digest_hash}");
}
}
if let Some(tags) = manifest[0]["RepoTags"].as_array() {
metadata.repo_tags = tags
.iter()
.filter_map(|t| t.as_str().map(|s| s.to_string()))
.collect();
}
if metadata.repo_tags.is_empty() {
let path = PathBuf::from(image_name);
if let Some(filename) = path.file_stem() {
if let Some(name) = filename.to_str() {
metadata.repo_tags.push(format!("{name}:latest"));
}
}
}
Ok(metadata)
}
fn load_layers_from_dir(extract_dir: &Path) -> Result<Vec<Layer>> {
let manifest_path = extract_dir.join("manifest.json");
let manifest_content =
fs::read_to_string(&manifest_path).context("Failed to read manifest.json")?;
let manifest: Vec<serde_json::Value> =
serde_json::from_str(&manifest_content).context("Failed to parse manifest.json")?;
if manifest.is_empty() {
return Err(anyhow!("Empty manifest.json"));
}
let config_file = manifest[0]["Config"]
.as_str()
.ok_or_else(|| anyhow!("Invalid manifest format - missing Config"))?;
let config_path = extract_dir.join(config_file);
let config_content = fs::read_to_string(&config_path)
.context(format!("Failed to read config file: {config_file}"))?;
let config: serde_json::Value =
serde_json::from_str(&config_content).context("Failed to parse image configuration")?;
let history = config["history"]
.as_array()
.ok_or_else(|| anyhow!("No history found in image configuration"))?;
let layers_list = manifest[0]["Layers"]
.as_array()
.ok_or_else(|| anyhow!("Invalid manifest format - missing Layers array"))?;
let mut layer_tarballs = Vec::new();
for layer_ref in layers_list {
let layer_path = layer_ref
.as_str()
.ok_or_else(|| anyhow!("Invalid layer reference"))?;
let full_path = extract_dir.join(layer_path);
layer_tarballs.push(full_path);
}
let mut current_tarball_idx = layer_tarballs.len();
let mut layers = Vec::new();
for (i, hist_entry) in history.iter().enumerate().rev() {
let created_at_str = hist_entry["created"]
.as_str()
.unwrap_or("1970-01-01T00:00:00Z");
let created_at = DateTime::parse_from_rfc3339(created_at_str)
.map(|dt| dt.with_timezone(&Utc))
.unwrap_or_else(|_| Utc::now());
let created_by = hist_entry["created_by"].as_str().unwrap_or("");
let command = if created_by.contains("/bin/sh -c #(nop) ") {
created_by
.replace("/bin/sh -c #(nop) ", "")
.trim_start()
.to_string()
} else if created_by.contains("/bin/sh -c ") {
created_by
.replace("/bin/sh -c ", "")
.trim_start()
.to_string()
} else {
created_by.to_string()
};
let is_empty = hist_entry["empty_layer"].as_bool().unwrap_or(false);
let comment = hist_entry["comment"].as_str().map(|s| s.to_string());
let (id, tarball_path, digest) = if !is_empty && current_tarball_idx > 0 {
current_tarball_idx -= 1;
let tarball = &layer_tarballs[current_tarball_idx];
let id = tarball
.file_name()
.map(|name| name.to_string_lossy().to_string())
.unwrap_or_else(|| format!("layer-{i}"));
let digest =
crate::digest_tracker::DigestTracker::extract_digest_from_tarball_path(tarball);
(id, Some(tarball.clone()), digest)
} else {
let id = format!("<empty-layer-{i}>");
let digest = if is_empty {
"empty".to_string()
} else {
"no-tarball".to_string()
};
(id, None, digest)
};
layers.push(Layer {
id,
command,
created_at,
is_empty,
tarball_path,
digest,
comment,
});
}
layers.reverse();
Ok(layers)
}
}