docker_image_pusher/image/
digest.rs

1//! SHA256 digest utilities for Docker image processing
2//!
3//! This module provides centralized functionality for computing, validating, and formatting SHA256 digests used throughout the Docker image pusher.
4//! It ensures that digests are calculated in accordance with Docker/OCI standards, especially for gzip-compressed layers.
5
6use crate::error::{RegistryError, Result};
7use sha2::Digest;
8
9/// Standard SHA256 digest for empty files/layers
10pub const EMPTY_LAYER_DIGEST: &str =
11    "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
12
13/// Docker digest with sha256: prefix for empty layers
14pub const EMPTY_LAYER_DIGEST_FULL: &str =
15    "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
16
17/// Utilities for working with SHA256 digests in Docker context
18pub struct DigestUtils;
19
20impl DigestUtils {
21    /// Compute SHA256 digest from byte data
22    ///
23    /// Note: Docker layer digests must be calculated from gzip-compressed tar streams.
24    /// The input data must be a gzip byte stream, otherwise digest verification will fail.
25    pub fn compute_sha256(data: &[u8]) -> String {
26        let mut hasher = sha2::Sha256::new();
27        hasher.update(data);
28        format!("{:x}", hasher.finalize())
29    }
30
31    /// Compute SHA256 digest from string data
32    pub fn compute_sha256_str(data: &str) -> String {
33        Self::compute_sha256(data.as_bytes())
34    }
35
36    /// Compute full Docker digest (with sha256: prefix) from byte data
37    ///
38    /// Note: Docker layer digests must be calculated from gzip-compressed tar streams.
39    /// The input data must be a gzip byte stream, otherwise digest verification will fail.
40    pub fn compute_docker_digest(data: &[u8]) -> String {
41        format!("sha256:{}", Self::compute_sha256(data))
42    }
43
44    /// Compute full Docker digest (with sha256: prefix) from string data
45    pub fn compute_docker_digest_str(data: &str) -> String {
46        format!("sha256:{}", Self::compute_sha256_str(data))
47    }
48
49    /// Validate SHA256 hex string (64 characters, all hex)
50    pub fn is_valid_sha256_hex(digest: &str) -> bool {
51        digest.len() == 64 && digest.chars().all(|c| c.is_ascii_hexdigit())
52    }
53
54    /// Validate full Docker digest format (sha256:xxxxx)
55    pub fn is_valid_docker_digest(digest: &str) -> bool {
56        if let Some(hex_part) = digest.strip_prefix("sha256:") {
57            Self::is_valid_sha256_hex(hex_part)
58        } else {
59            false
60        }
61    }
62
63    /// Normalize digest to full Docker format (add sha256: prefix if missing)
64    pub fn normalize_digest(digest: &str) -> Result<String> {
65        if digest.starts_with("sha256:") {
66            // Validate existing format
67            if digest.len() != 71 {
68                return Err(RegistryError::Validation(format!(
69                    "Invalid SHA256 digest length: expected 71 characters, got {}",
70                    digest.len()
71                )));
72            }
73            let hex_part = &digest[7..];
74            if !Self::is_valid_sha256_hex(hex_part) {
75                return Err(RegistryError::Validation(format!(
76                    "Invalid SHA256 digest format: contains non-hex characters"
77                )));
78            }
79            Ok(digest.to_string())
80        } else {
81            // Add prefix and validate
82            if !Self::is_valid_sha256_hex(digest) {
83                return Err(RegistryError::Validation(format!(
84                    "Invalid SHA256 digest: expected 64 hex characters, got '{}'",
85                    digest
86                )));
87            }
88            Ok(format!("sha256:{}", digest))
89        }
90    }
91
92    /// Extract SHA256 hex part from full Docker digest
93    pub fn extract_hex_part(digest: &str) -> Result<&str> {
94        if let Some(hex_part) = digest.strip_prefix("sha256:") {
95            if Self::is_valid_sha256_hex(hex_part) {
96                Ok(hex_part)
97            } else {
98                Err(RegistryError::Validation(format!(
99                    "Invalid SHA256 hex part in digest: {}",
100                    digest
101                )))
102            }
103        } else {
104            Err(RegistryError::Validation(format!(
105                "Digest missing sha256: prefix: {}",
106                digest
107            )))
108        }
109    }
110
111    /// Check if a digest represents an empty layer
112    pub fn is_empty_layer_digest(digest: &str) -> bool {
113        digest == EMPTY_LAYER_DIGEST_FULL || digest == EMPTY_LAYER_DIGEST
114    }
115
116    /// Get the standard empty layer digest with full Docker format
117    pub fn empty_layer_digest() -> String {
118        EMPTY_LAYER_DIGEST_FULL.to_string()
119    }
120
121    /// Verify data matches expected digest
122    ///
123    /// Note: Docker layer digest verification must be based on gzip-compressed tar streams.
124    /// The input data must be a gzip byte stream, otherwise verification will fail.
125    pub fn verify_data_integrity(data: &[u8], expected_digest: &str) -> Result<()> {
126        // Calculate SHA256
127        let computed = Self::compute_sha256(data);
128        let expected_hex = Self::extract_hex_part(expected_digest)?;
129
130        if computed != expected_hex {
131            return Err(RegistryError::Validation(format!(
132                "Data integrity check failed: expected {}, computed sha256:{}",
133                expected_digest, computed
134            )));
135        }
136
137        Ok(())
138    }
139
140    /// Verify data integrity by computing digest on the fly from a stream
141    ///
142    /// Note: Docker layer digest verification must be based on gzip-compressed tar streams.
143    /// The input reader must output a gzip byte stream, otherwise verification will fail.
144    pub async fn verify_stream_integrity<R>(mut reader: R, expected_digest: &str) -> Result<Vec<u8>>
145    where
146        R: tokio::io::AsyncRead + Unpin,
147    {
148        use sha2::Digest;
149        use tokio::io::AsyncReadExt;
150
151        let mut hasher = sha2::Sha256::new();
152        let mut buffer = Vec::new();
153        let mut chunk = [0u8; 8192];
154
155        loop {
156            let n = reader
157                .read(&mut chunk)
158                .await
159                .map_err(|e| RegistryError::Io(format!("Failed to read stream: {}", e)))?;
160
161            if n == 0 {
162                break;
163            }
164
165            hasher.update(&chunk[..n]);
166            buffer.extend_from_slice(&chunk[..n]);
167        }
168
169        let computed = format!("{:x}", hasher.finalize());
170        let expected_hex = Self::extract_hex_part(expected_digest)?;
171
172        if computed != expected_hex {
173            return Err(RegistryError::Validation(format!(
174                "Stream integrity check failed: expected {}, computed sha256:{}. Data size: {} bytes",
175                expected_digest,
176                computed,
177                buffer.len()
178            )));
179        }
180
181        Ok(buffer)
182    }
183
184    /// Extract digest from Docker layer path (various formats)
185    pub fn extract_digest_from_layer_path(layer_path: &str) -> Option<String> {
186        // Docker tar文件中的层路径通常是这样的格式:
187        // "abc123def456.../layer.tar"
188        // "blobs/sha256/abc123def456..."
189        // "abc123def456.tar"
190
191        // 首先尝试目录名格式 (最常见的格式)
192        if let Some(slash_pos) = layer_path.find('/') {
193            let digest_part = &layer_path[..slash_pos];
194            if Self::is_valid_sha256_hex(digest_part) {
195                return Some(digest_part.to_string());
196            }
197        }
198        // 尝试blobs格式
199        if layer_path.contains("blobs/sha256/") {
200            if let Some(start) = layer_path.find("blobs/sha256/") {
201                let after_prefix = &layer_path[start + 13..];
202                let end = after_prefix.find('/').unwrap_or(after_prefix.len());
203                let digest_part = &after_prefix[..end];
204                if Self::is_valid_sha256_hex(digest_part) {
205                    return Some(digest_part.to_string());
206                }
207            }
208        }
209
210        // 尝试文件名格式
211        if let Some(dot_pos) = layer_path.rfind('.') {
212            let digest_part = &layer_path[..dot_pos];
213            if Self::is_valid_sha256_hex(digest_part) {
214                return Some(digest_part.to_string());
215            }
216        }
217
218        // 尝试完整路径作为digest (某些特殊情况)
219        if Self::is_valid_sha256_hex(layer_path) {
220            return Some(layer_path.to_string());
221        }
222
223        None
224    }
225
226    /// Generate a fallback digest from path when real digest cannot be extracted
227    pub fn generate_path_based_digest(layer_path: &str) -> String {
228        let mut hasher = sha2::Sha256::new();
229        hasher.update(layer_path.as_bytes());
230        format!("sha256:{:x}", hasher.finalize())
231    }
232
233    /// Format digest for display (truncated for readability)
234    pub fn format_digest_short(digest: &str) -> String {
235        if digest.len() > 23 {
236            format!("{}...", &digest[..23])
237        } else {
238            digest.to_string()
239        }
240    }
241
242    /// Batch validate multiple digests
243    pub fn validate_digests(digests: &[&str]) -> Result<()> {
244        for (i, digest) in digests.iter().enumerate() {
245            if !Self::is_valid_docker_digest(digest) {
246                return Err(RegistryError::Validation(format!(
247                    "Invalid digest format at index {}: {}",
248                    i, digest
249                )));
250            }
251        }
252        Ok(())
253    }
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259
260    #[test]
261    fn test_gzip_digest_matches_docker_standard() {
262        use flate2::Compression;
263        use flate2::write::GzEncoder;
264        use std::io::Write;
265
266        // Simulate tar layer content for testing
267        let tar_data = b"dummy tar layer content for test";
268        // Gzip compression
269        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
270        encoder.write_all(tar_data).unwrap();
271        let gzipped = encoder.finish().unwrap();
272
273        // Calculate digest
274        let digest = DigestUtils::compute_docker_digest(&gzipped);
275        // Manually calculate sha256
276        let expected = format!("sha256:{}", DigestUtils::compute_sha256(&gzipped));
277        assert_eq!(digest, expected);
278    }
279}