Skip to main content

pulith_fetch/codec/
verify.rs

1//! Stream verification functionality.
2//!
3//! This module provides stream transformation for verifying
4//! downloaded content integrity using various checksum algorithms.
5
6use crate::error::{Error, Result};
7use pulith_verify::{Hasher, Sha256Hasher};
8
9/// Supported hash algorithms.
10#[derive(Debug, Clone, Copy, PartialEq, Eq)]
11pub enum HashAlgorithm {
12    /// SHA-256 algorithm
13    Sha256,
14    /// SHA-384 algorithm (not yet implemented)
15    Sha384,
16    /// SHA-512 algorithm (not yet implemented)
17    Sha512,
18    /// SHA-1 algorithm (not yet implemented)
19    Sha1,
20    /// MD5 algorithm (not yet implemented)
21    Md5,
22}
23
24impl HashAlgorithm {
25    /// Get the digest length in bytes for this algorithm.
26    pub fn digest_length(&self) -> usize {
27        match self {
28            HashAlgorithm::Sha256 => 32,
29            HashAlgorithm::Sha384 => 48,
30            HashAlgorithm::Sha512 => 64,
31            HashAlgorithm::Sha1 => 20,
32            HashAlgorithm::Md5 => 16,
33        }
34    }
35
36    /// Get the string representation of this algorithm.
37    pub fn as_str(&self) -> &'static str {
38        match self {
39            HashAlgorithm::Sha256 => "sha256",
40            HashAlgorithm::Sha384 => "sha384",
41            HashAlgorithm::Sha512 => "sha512",
42            HashAlgorithm::Sha1 => "sha1",
43            HashAlgorithm::Md5 => "md5",
44        }
45    }
46}
47
48/// Checksum verification configuration.
49#[derive(Debug, Clone)]
50pub struct ChecksumConfig {
51    /// The hash algorithm to use
52    pub algorithm: HashAlgorithm,
53    /// Expected checksum value (hex-encoded)
54    pub expected: String,
55}
56
57impl ChecksumConfig {
58    /// Create a new checksum configuration.
59    pub fn new(algorithm: HashAlgorithm, expected: &str) -> Self {
60        Self {
61            algorithm,
62            expected: expected.to_lowercase(),
63        }
64    }
65
66    /// Parse checksum from string with algorithm prefix (e.g., "sha256:abc123").
67    pub fn from_string(checksum_str: &str) -> Result<Self> {
68        if let Some((algo, hash)) = checksum_str.split_once(':') {
69            let algorithm = match algo.to_lowercase().as_str() {
70                "sha256" => HashAlgorithm::Sha256,
71                "sha384" => HashAlgorithm::Sha384,
72                "sha512" => HashAlgorithm::Sha512,
73                "sha1" => HashAlgorithm::Sha1,
74                "md5" => HashAlgorithm::Md5,
75                _ => {
76                    return Err(Error::InvalidState(format!(
77                        "Unsupported hash algorithm: {}",
78                        algo
79                    )));
80                }
81            };
82
83            if hash.len() != algorithm.digest_length() * 2 {
84                return Err(Error::InvalidState(format!(
85                    "Invalid checksum length for {}: expected {}, got {}",
86                    algo,
87                    algorithm.digest_length() * 2,
88                    hash.len()
89                )));
90            }
91
92            Ok(Self::new(algorithm, hash))
93        } else {
94            // Default to SHA256 if no algorithm specified
95            if checksum_str.len() != 64 {
96                return Err(Error::InvalidState(
97                    "Invalid checksum length for SHA256: expected 64 characters".to_string(),
98                ));
99            }
100            Ok(Self::new(HashAlgorithm::Sha256, checksum_str))
101        }
102    }
103}
104
105/// Stream verifier for checksum verification.
106pub struct StreamVerifier<H: Hasher> {
107    hasher: Option<H>,
108    config: ChecksumConfig,
109    bytes_processed: usize,
110    finalized: bool,
111}
112
113impl StreamVerifier<Sha256Hasher> {
114    /// Create a new stream verifier with the given configuration.
115    pub fn new(config: ChecksumConfig) -> Result<Self> {
116        let hasher = match config.algorithm {
117            HashAlgorithm::Sha256 => Some(Sha256Hasher::new()),
118            _ => {
119                return Err(Error::InvalidState(format!(
120                    "Hash algorithm {:?} not yet implemented",
121                    config.algorithm
122                )));
123            }
124        };
125
126        Ok(Self {
127            hasher,
128            config,
129            bytes_processed: 0,
130            finalized: false,
131        })
132    }
133}
134
135impl<H: Hasher> StreamVerifier<H> {
136    /// Update the verifier with new data.
137    pub fn update(&mut self, data: &[u8]) -> Result<()> {
138        if self.finalized {
139            return Err(Error::InvalidState(
140                "Verifier already finalized".to_string(),
141            ));
142        }
143
144        if let Some(ref mut hasher) = self.hasher {
145            hasher.update(data);
146        }
147        self.bytes_processed += data.len();
148        Ok(())
149    }
150
151    /// Finalize verification and check if the checksum matches.
152    pub fn finalize(&mut self) -> Result<bool> {
153        if self.finalized {
154            return Err(Error::InvalidState(
155                "Verifier already finalized".to_string(),
156            ));
157        }
158
159        if let Some(hasher) = self.hasher.take() {
160            let actual = hasher.finalize();
161            let actual_hex = hex::encode(actual);
162            self.finalized = true;
163            Ok(actual_hex == self.config.expected)
164        } else {
165            Err(Error::InvalidState("No hasher available".to_string()))
166        }
167    }
168
169    /// Get the number of bytes processed so far.
170    pub fn bytes_processed(&self) -> usize {
171        self.bytes_processed
172    }
173
174    /// Get the configured algorithm.
175    pub fn algorithm(&self) -> HashAlgorithm {
176        self.config.algorithm
177    }
178
179    /// Get the expected checksum.
180    pub fn expected_checksum(&self) -> &str {
181        &self.config.expected
182    }
183
184    /// Check if the verifier has been finalized.
185    pub fn is_finalized(&self) -> bool {
186        self.finalized
187    }
188}
189
190/// Multiple checksum verifier for verifying against multiple algorithms.
191pub struct MultiVerifier {
192    verifiers: Vec<StreamVerifier<Sha256Hasher>>,
193    require_all: bool,
194}
195
196impl MultiVerifier {
197    /// Create a new multi-verifier.
198    ///
199    /// If `require_all` is true, all checksums must match.
200    /// If false, at least one checksum must match.
201    pub fn new(configs: Vec<ChecksumConfig>, require_all: bool) -> Result<Self> {
202        let verifiers: Result<Vec<_>> = configs.into_iter().map(StreamVerifier::new).collect();
203
204        Ok(Self {
205            verifiers: verifiers?,
206            require_all,
207        })
208    }
209
210    /// Update all verifiers with new data.
211    pub fn update(&mut self, data: &[u8]) -> Result<()> {
212        for verifier in &mut self.verifiers {
213            verifier.update(data)?;
214        }
215        Ok(())
216    }
217
218    /// Finalize verification and check if checksums match.
219    pub fn finalize(&mut self) -> Result<bool> {
220        let mut results = Vec::new();
221        for verifier in &mut self.verifiers {
222            results.push(verifier.finalize()?);
223        }
224
225        if self.require_all {
226            Ok(results.iter().all(|&r| r))
227        } else {
228            Ok(results.iter().any(|&r| r))
229        }
230    }
231
232    /// Get the number of verifiers.
233    pub fn verifier_count(&self) -> usize {
234        self.verifiers.len()
235    }
236}
237
238/// Convenience function to verify data in one go.
239pub fn verify_checksum(data: &[u8], config: &ChecksumConfig) -> Result<bool> {
240    let mut verifier = StreamVerifier::new(config.clone())?;
241    verifier.update(data)?;
242    verifier.finalize()
243}
244
245/// Convenience function to verify data with multiple checksums.
246pub fn verify_multiple_checksums(
247    data: &[u8],
248    configs: Vec<ChecksumConfig>,
249    require_all: bool,
250) -> Result<bool> {
251    let mut verifier = MultiVerifier::new(configs, require_all)?;
252    verifier.update(data)?;
253    verifier.finalize()
254}
255
256/// Parse multiple checksums from a string.
257///
258/// Supports formats like:
259/// - "sha256:abc123 sha512:def456"
260/// - "sha256:abc123\nsha512:def456"
261pub fn parse_multiple_checksums(checksums_str: &str) -> Result<Vec<ChecksumConfig>> {
262    let mut configs = Vec::new();
263
264    for line in checksums_str.lines() {
265        let line = line.trim();
266        if line.is_empty() || line.starts_with('#') {
267            continue;
268        }
269
270        // Split by whitespace to handle multiple checksums on one line
271        for checksum_str in line.split_whitespace() {
272            configs.push(ChecksumConfig::from_string(checksum_str)?);
273        }
274    }
275
276    Ok(configs)
277}
278
279#[cfg(test)]
280mod tests {
281    use super::*;
282
283    #[test]
284    fn test_checksum_config_creation() {
285        let config = ChecksumConfig::new(HashAlgorithm::Sha256, "abc123");
286        assert_eq!(config.algorithm, HashAlgorithm::Sha256);
287        assert_eq!(config.expected, "abc123");
288    }
289
290    #[test]
291    fn test_checksum_config_from_string() {
292        // With algorithm prefix - need 64 chars for SHA256
293        let config = ChecksumConfig::from_string(
294            "sha256:abc123def456abc123def456abc123def456abc123def456abc123def4567890",
295        )
296        .unwrap();
297        assert_eq!(config.algorithm, HashAlgorithm::Sha256);
298        assert_eq!(
299            config.expected,
300            "abc123def456abc123def456abc123def456abc123def456abc123def4567890"
301        );
302
303        // Without algorithm prefix (defaults to SHA256) - need 64 chars
304        let config = ChecksumConfig::from_string(
305            "abc123def456abc123def456abc123def456abc123def456abc123def4567890",
306        )
307        .unwrap();
308        assert_eq!(config.algorithm, HashAlgorithm::Sha256);
309        assert_eq!(
310            config.expected,
311            "abc123def456abc123def456abc123def456abc123def456abc123def4567890"
312        );
313    }
314
315    #[test]
316    fn test_checksum_config_invalid_algorithm() {
317        let result = ChecksumConfig::from_string("invalid:abc123");
318        assert!(result.is_err());
319    }
320
321    #[test]
322    fn test_checksum_config_invalid_length() {
323        // Invalid SHA256 length
324        let result = ChecksumConfig::from_string("sha256:abc");
325        assert!(result.is_err());
326
327        // Invalid default length
328        let result = ChecksumConfig::from_string("abc");
329        assert!(result.is_err());
330    }
331
332    #[test]
333    fn test_stream_verifier() {
334        let data = b"Hello, World!";
335        let config = ChecksumConfig::new(
336            HashAlgorithm::Sha256,
337            "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f",
338        );
339
340        let mut verifier = StreamVerifier::new(config).unwrap();
341        verifier.update(data).unwrap();
342        let result = verifier.finalize().unwrap();
343
344        assert!(result);
345        assert_eq!(verifier.bytes_processed(), data.len());
346    }
347
348    #[test]
349    fn test_stream_verifier_wrong_checksum() {
350        let data = b"Hello, World!";
351        let config = ChecksumConfig::new(HashAlgorithm::Sha256, "wrong_checksum");
352
353        let mut verifier = StreamVerifier::new(config).unwrap();
354        verifier.update(data).unwrap();
355        let result = verifier.finalize().unwrap();
356
357        assert!(!result);
358    }
359
360    #[test]
361    fn test_stream_verifier_partial_updates() {
362        let data = b"Hello, World!";
363        let config = ChecksumConfig::new(
364            HashAlgorithm::Sha256,
365            "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f",
366        );
367
368        let mut verifier = StreamVerifier::new(config).unwrap();
369
370        // Update in chunks
371        verifier.update(&data[..5]).unwrap();
372        verifier.update(&data[5..]).unwrap();
373
374        let result = verifier.finalize().unwrap();
375        assert!(result);
376    }
377
378    #[test]
379    fn test_multi_verifier_all_required() {
380        let data = b"Hello, World!";
381        let configs = vec![ChecksumConfig::new(
382            HashAlgorithm::Sha256,
383            "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f",
384        )];
385
386        let mut verifier = MultiVerifier::new(configs, true).unwrap();
387        verifier.update(data).unwrap();
388        let result = verifier.finalize().unwrap();
389
390        assert!(result);
391    }
392
393    #[test]
394    fn test_multi_verifier_any_required() {
395        let data = b"Hello, World!";
396        let configs = vec![ChecksumConfig::new(
397            HashAlgorithm::Sha256,
398            "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f",
399        )];
400
401        let mut verifier = MultiVerifier::new(configs, false).unwrap();
402        verifier.update(data).unwrap();
403        let result = verifier.finalize().unwrap();
404
405        assert!(result);
406    }
407
408    #[test]
409    fn test_convenience_functions() {
410        let data = b"Hello, World!";
411        let config = ChecksumConfig::new(
412            HashAlgorithm::Sha256,
413            "dffd6021bb2bd5b0af676290809ec3a53191dd81c7f70a4b28688a362182986f",
414        );
415
416        // Test single checksum verification
417        let result = verify_checksum(data, &config).unwrap();
418        assert!(result);
419
420        // Test multiple checksum verification
421        let configs = vec![config.clone()];
422        let result = verify_multiple_checksums(data, configs, true).unwrap();
423        assert!(result);
424    }
425
426    #[test]
427    fn test_parse_multiple_checksums() {
428        let input = "sha256:abc123def456abc123def456abc123def456abc123def456abc123def4567890";
429
430        let configs = parse_multiple_checksums(input).unwrap();
431        assert_eq!(configs.len(), 1);
432        assert_eq!(configs[0].algorithm, HashAlgorithm::Sha256);
433        assert_eq!(
434            configs[0].expected,
435            "abc123def456abc123def456abc123def456abc123def456abc123def4567890"
436        );
437    }
438}
439
440#[test]
441fn test_empty_data() {
442    let data = b"";
443    let config = ChecksumConfig::new(
444        HashAlgorithm::Sha256,
445        "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
446    );
447
448    let mut verifier = StreamVerifier::new(config).unwrap();
449    verifier.update(data).unwrap();
450    let result = verifier.finalize().unwrap();
451
452    assert!(result);
453}
454
455#[test]
456fn test_large_data() {
457    let data: Vec<u8> = (0..10000).map(|i| (i % 256) as u8).collect();
458    // Compute the actual SHA256 hash
459    let mut hasher = Sha256Hasher::new();
460    hasher.update(&data);
461    let hash = hasher.finalize();
462    let expected = hex::encode(hash);
463
464    let config = ChecksumConfig::new(HashAlgorithm::Sha256, &expected);
465
466    let mut verifier = StreamVerifier::new(config).unwrap();
467    verifier.update(&data).unwrap();
468    let result = verifier.finalize().unwrap();
469
470    assert!(result);
471}
472
473#[test]
474fn test_unsupported_algorithm() {
475    let config = ChecksumConfig::new(HashAlgorithm::Sha512, "abc123");
476    let result = StreamVerifier::new(config);
477    assert!(result.is_err());
478}