docker_image_pusher/
digest.rs

1//! SHA256 digest utilities for Docker image processing
2//!
3//! This module provides centralized functionality for computing, validating, and formatting SHA256 digests used throughout the Docker image pusher.
4//! It ensures that digests are calculated in accordance with Docker/OCI standards, especially for gzip-compressed layers.
5
6use crate::error::{PusherError, Result};
7use sha2::Digest;
8
9/// Standard SHA256 digest for empty files/layers
10pub const EMPTY_LAYER_DIGEST: &str =
11    "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
12
13/// Docker digest with sha256: prefix for empty layers
14pub const EMPTY_LAYER_DIGEST_FULL: &str =
15    "sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855";
16
17/// Utilities for working with SHA256 digests in Docker context
18pub struct DigestUtils;
19
20impl DigestUtils {
21    /// Compute SHA256 digest from byte data
22    ///
23    /// 注意:Docker 镜像层的 digest 必须基于 gzip 压缩后的 tar 字节流计算。
24    /// 传入 data 必须是 gzip 字节流,否则会导致 digest 校验失败。
25    pub fn compute_sha256(data: &[u8]) -> String {
26        let mut hasher = sha2::Sha256::new();
27        hasher.update(data);
28        format!("{:x}", hasher.finalize())
29    }
30
31    /// Compute SHA256 digest from string data
32    pub fn compute_sha256_str(data: &str) -> String {
33        Self::compute_sha256(data.as_bytes())
34    }
35
36    /// Compute full Docker digest (with sha256: prefix) from byte data
37    ///
38    /// 注意:Docker 镜像层的 digest 必须基于 gzip 压缩后的 tar 字节流计算。
39    /// 传入 data 必须是 gzip 字节流,否则会导致 digest 校验失败。
40    pub fn compute_docker_digest(data: &[u8]) -> String {
41        format!("sha256:{}", Self::compute_sha256(data))
42    }
43
44    /// Compute full Docker digest (with sha256: prefix) from string data
45    pub fn compute_docker_digest_str(data: &str) -> String {
46        format!("sha256:{}", Self::compute_sha256_str(data))
47    }
48
49    /// Validate SHA256 hex string (64 characters, all hex)
50    pub fn is_valid_sha256_hex(digest: &str) -> bool {
51        digest.len() == 64 && digest.chars().all(|c| c.is_ascii_hexdigit())
52    }
53
54    /// Validate full Docker digest format (sha256:xxxxx)
55    pub fn is_valid_docker_digest(digest: &str) -> bool {
56        if let Some(hex_part) = digest.strip_prefix("sha256:") {
57            Self::is_valid_sha256_hex(hex_part)
58        } else {
59            false
60        }
61    }
62
63    /// Normalize digest to full Docker format (add sha256: prefix if missing)
64    pub fn normalize_digest(digest: &str) -> Result<String> {
65        if digest.starts_with("sha256:") {
66            // Validate existing format
67            if digest.len() != 71 {
68                return Err(PusherError::Validation(format!(
69                    "Invalid SHA256 digest length: expected 71 characters, got {}",
70                    digest.len()
71                )));
72            }
73            let hex_part = &digest[7..];
74            if !Self::is_valid_sha256_hex(hex_part) {
75                return Err(PusherError::Validation(format!(
76                    "Invalid SHA256 digest format: contains non-hex characters"
77                )));
78            }
79            Ok(digest.to_string())
80        } else {
81            // Add prefix and validate
82            if !Self::is_valid_sha256_hex(digest) {
83                return Err(PusherError::Validation(format!(
84                    "Invalid SHA256 digest: expected 64 hex characters, got '{}'",
85                    digest
86                )));
87            }
88            Ok(format!("sha256:{}", digest))
89        }
90    }
91
92    /// Extract SHA256 hex part from full Docker digest
93    pub fn extract_hex_part(digest: &str) -> Result<&str> {
94        if let Some(hex_part) = digest.strip_prefix("sha256:") {
95            if Self::is_valid_sha256_hex(hex_part) {
96                Ok(hex_part)
97            } else {
98                Err(PusherError::Validation(format!(
99                    "Invalid SHA256 hex part in digest: {}",
100                    digest
101                )))
102            }
103        } else {
104            Err(PusherError::Validation(format!(
105                "Digest missing sha256: prefix: {}",
106                digest
107            )))
108        }
109    }
110
111    /// Check if a digest represents an empty layer
112    pub fn is_empty_layer_digest(digest: &str) -> bool {
113        digest == EMPTY_LAYER_DIGEST_FULL || digest == EMPTY_LAYER_DIGEST
114    }
115
116    /// Get the standard empty layer digest with full Docker format
117    pub fn empty_layer_digest() -> String {
118        EMPTY_LAYER_DIGEST_FULL.to_string()
119    }
120
121    /// Verify data matches expected digest
122    ///
123    /// 注意:Docker 镜像层的 digest 校验必须基于 gzip 压缩后的 tar 字节流。
124    /// 传入 data 必须是 gzip 字节流,否则会导致校验失败。
125    pub fn verify_data_integrity(data: &[u8], expected_digest: &str) -> Result<()> {
126        // 检查数据是否为 gzip 格式(通过魔数 0x1f 0x8b)
127        let is_gzipped = data.len() >= 2 && data[0] == 0x1f && data[1] == 0x8b;
128
129        // 计算 SHA256
130        let computed = Self::compute_sha256(data);
131        let expected_hex = Self::extract_hex_part(expected_digest)?;
132
133        if computed != expected_hex {
134            // 添加额外的调试信息
135            let data_head = if data.len() >= 20 {
136                format!(
137                    "{:02x} {:02x} {:02x} {:02x} {:02x} ...",
138                    data[0], data[1], data[2], data[3], data[4]
139                )
140            } else if !data.is_empty() {
141                format!("{:02x} ...", data[0])
142            } else {
143                "empty".to_string()
144            };
145
146            return Err(PusherError::Validation(format!(
147                "Data integrity check failed: expected {}, computed sha256:{}, data is gzipped: {}, data head: {}",
148                expected_digest, computed, is_gzipped, data_head
149            )));
150        }
151
152        Ok(())
153    }
154
155    /// Verify data integrity by computing digest on the fly from a stream
156    ///
157    /// 注意:Docker 镜像层的 digest 校验必须基于 gzip 压缩后的 tar 字节流。
158    /// 传入 reader 必须输出 gzip 字节流,否则会导致校验失败。
159    pub async fn verify_stream_integrity<R>(mut reader: R, expected_digest: &str) -> Result<Vec<u8>>
160    where
161        R: tokio::io::AsyncRead + Unpin,
162    {
163        use sha2::Digest;
164        use tokio::io::AsyncReadExt;
165
166        let mut hasher = sha2::Sha256::new();
167        let mut buffer = Vec::new();
168        let mut chunk = [0u8; 8192];
169
170        loop {
171            let n = reader
172                .read(&mut chunk)
173                .await
174                .map_err(|e| PusherError::Io(format!("Failed to read stream: {}", e)))?;
175
176            if n == 0 {
177                break;
178            }
179
180            hasher.update(&chunk[..n]);
181            buffer.extend_from_slice(&chunk[..n]);
182        }
183
184        let computed = format!("{:x}", hasher.finalize());
185        let expected_hex = Self::extract_hex_part(expected_digest)?;
186
187        if computed != expected_hex {
188            return Err(PusherError::Validation(format!(
189                "Stream integrity check failed: expected {}, computed sha256:{}. Data size: {} bytes",
190                expected_digest,
191                computed,
192                buffer.len()
193            )));
194        }
195
196        Ok(buffer)
197    }
198
199    /// Extract digest from Docker layer path (various formats)
200    pub fn extract_digest_from_layer_path(layer_path: &str) -> Option<String> {
201        // Docker tar文件中的层路径通常是这样的格式:
202        // "abc123def456.../layer.tar"
203        // "blobs/sha256/abc123def456..."
204        // "abc123def456.tar"
205
206        // 首先尝试目录名格式 (最常见的格式)
207        if let Some(slash_pos) = layer_path.find('/') {
208            let digest_part = &layer_path[..slash_pos];
209            if Self::is_valid_sha256_hex(digest_part) {
210                return Some(digest_part.to_string());
211            }
212        }
213        // 尝试blobs格式
214        if layer_path.contains("blobs/sha256/") {
215            if let Some(start) = layer_path.find("blobs/sha256/") {
216                let after_prefix = &layer_path[start + 13..];
217                let end = after_prefix.find('/').unwrap_or(after_prefix.len());
218                let digest_part = &after_prefix[..end];
219                if Self::is_valid_sha256_hex(digest_part) {
220                    return Some(digest_part.to_string());
221                }
222            }
223        }
224
225        // 尝试文件名格式
226        if let Some(dot_pos) = layer_path.rfind('.') {
227            let digest_part = &layer_path[..dot_pos];
228            if Self::is_valid_sha256_hex(digest_part) {
229                return Some(digest_part.to_string());
230            }
231        }
232
233        // 尝试完整路径作为digest (某些特殊情况)
234        if Self::is_valid_sha256_hex(layer_path) {
235            return Some(layer_path.to_string());
236        }
237
238        None
239    }
240
241    /// Generate a fallback digest from path when real digest cannot be extracted
242    pub fn generate_path_based_digest(layer_path: &str) -> String {
243        let mut hasher = sha2::Sha256::new();
244        hasher.update(layer_path.as_bytes());
245        format!("sha256:{:x}", hasher.finalize())
246    }
247
248    /// Format digest for display (truncated for readability)
249    pub fn format_digest_short(digest: &str) -> String {
250        if digest.len() > 23 {
251            format!("{}...", &digest[..23])
252        } else {
253            digest.to_string()
254        }
255    }
256
257    /// Batch validate multiple digests
258    pub fn validate_digests(digests: &[&str]) -> Result<()> {
259        for (i, digest) in digests.iter().enumerate() {
260            if !Self::is_valid_docker_digest(digest) {
261                return Err(PusherError::Validation(format!(
262                    "Invalid digest format at index {}: {}",
263                    i, digest
264                )));
265            }
266        }
267        Ok(())
268    }
269}
270
271#[cfg(test)]
272mod tests {
273    use super::*;
274
275    #[test]
276    fn test_compute_sha256() {
277        let data = b"hello world";
278        let digest = DigestUtils::compute_sha256(data);
279        assert_eq!(
280            digest,
281            "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
282        );
283    }
284
285    #[test]
286    fn test_compute_docker_digest() {
287        let data = b"hello world";
288        let digest = DigestUtils::compute_docker_digest(data);
289        assert_eq!(
290            digest,
291            "sha256:b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
292        );
293    }
294
295    #[test]
296    fn test_empty_layer_digest() {
297        let empty_data = b"";
298        let computed = DigestUtils::compute_sha256(empty_data);
299        assert_eq!(computed, EMPTY_LAYER_DIGEST);
300    }
301
302    #[test]
303    fn test_validate_digest() {
304        assert!(DigestUtils::is_valid_docker_digest(
305            "sha256:b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
306        ));
307        assert!(!DigestUtils::is_valid_docker_digest("sha256:invalid"));
308        assert!(!DigestUtils::is_valid_docker_digest(
309            "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
310        ));
311    }
312
313    #[test]
314    fn test_normalize_digest() {
315        let hex_only = "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9";
316        let normalized = DigestUtils::normalize_digest(hex_only).unwrap();
317        assert_eq!(
318            normalized,
319            "sha256:b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
320        );
321    }
322
323    #[test]
324    fn test_extract_digest_from_layer_path() {
325        let paths = vec![
326            "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9/layer.tar",
327            "blobs/sha256/b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9",
328            "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9.tar",
329        ];
330
331        for path in paths {
332            let digest = DigestUtils::extract_digest_from_layer_path(path);
333            assert_eq!(
334                digest,
335                Some(
336                    "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9".to_string()
337                )
338            );
339        }
340    }
341
342    #[test]
343    fn test_verify_data_integrity() {
344        let data = b"hello world";
345        let digest = "sha256:b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9";
346        assert!(DigestUtils::verify_data_integrity(data, digest).is_ok());
347
348        let wrong_digest =
349            "sha256:0000000000000000000000000000000000000000000000000000000000000000";
350        assert!(DigestUtils::verify_data_integrity(data, wrong_digest).is_err());
351    }
352
353    #[test]
354    fn test_gzip_digest_matches_docker_standard() {
355        use flate2::Compression;
356        use flate2::write::GzEncoder;
357        use std::io::Write;
358
359        // 模拟一个 tar 层内容
360        let tar_data = b"dummy tar layer content for test";
361        // gzip 压缩
362        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
363        encoder.write_all(tar_data).unwrap();
364        let gzipped = encoder.finish().unwrap();
365
366        // 计算 digest
367        let digest = DigestUtils::compute_docker_digest(&gzipped);
368        // 手动计算 sha256
369        let expected = format!("sha256:{}", DigestUtils::compute_sha256(&gzipped));
370        assert_eq!(digest, expected);
371    }
372
373    #[test]
374    fn test_digest_differs_for_raw_and_gzip() {
375        use flate2::Compression;
376        use flate2::write::GzEncoder;
377        use std::io::Write;
378        let tar_data = b"dummy tar layer content for test2";
379        let mut encoder = GzEncoder::new(Vec::new(), Compression::default());
380        encoder.write_all(tar_data).unwrap();
381        let gzipped = encoder.finish().unwrap();
382        let digest_gzip = DigestUtils::compute_docker_digest(&gzipped);
383        let digest_raw = DigestUtils::compute_docker_digest(tar_data);
384        assert_ne!(
385            digest_gzip, digest_raw,
386            "gzip 和 raw tar 的 digest 必须不同"
387        );
388    }
389}