Skip to main content

streaming_crypto/core_api/crypto/
digest.rs

1use std::fmt;
2use std::convert::TryFrom;
3use num_enum::TryFromPrimitive;
4
5use sha2::{Digest as _, Sha256, Sha512};
6use sha3::{Sha3_256, Sha3_512};
7
8use crate::{constants::digest_ids, utils::{enum_name_or_hex, to_hex}};
9
10/// Digest-related errors.
11#[derive(Debug, Clone)]
12pub enum DigestError {
13    UnknownAlgorithm { raw: u16 },
14    InvalidFormat,
15    InvalidLength { have: usize, need: usize },
16    DigestMismatch { have: Vec<u8>, need: Vec<u8> },
17    AlreadyFinalized,
18}
19impl fmt::Display for DigestError {
20    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
21        use DigestError::*;
22        match self {
23            UnknownAlgorithm { raw } =>
24                write!(f, "unknown algorithm: {}",
25                    enum_name_or_hex::<DigestAlg>(*raw)),
26            InvalidFormat => write!(f, "invalid header: {}", "Invalid frame header"),
27            InvalidLength { have, need } =>
28                write!(f, "digest buffer too short: {} < {}", have, need),
29            DigestMismatch { have, need } =>
30                write!(f, "digest mismatch: {}, expected: {}", to_hex(have), to_hex(need)),
31
32            AlreadyFinalized => write!(f, "digest verified once: {}", "Invalid digest for frame"),
33        }
34    }
35}
36/// Supported digest algorithms (extensible).
37#[repr(u16)]
38#[derive(Copy, Clone, Debug, PartialEq, Eq, TryFromPrimitive)]
39pub enum DigestAlg {
40    // Sha224   = 0x0001,
41    Sha256   = digest_ids::SHA256,
42    // Sha384   = 0x0003,
43    Sha512   = digest_ids::SHA512,
44    // Sha3_224 = 0x0101,
45    Sha3_256 = digest_ids::SHA3_256,
46    // Sha3_384 = 0x0103,
47    Sha3_512 = digest_ids::SHA3_512,
48    Blake3   = digest_ids::BLAKE3K, // UN-KEYED Blake3
49}
50
51impl DigestAlg {
52    /// Returns digest output length in bytes
53    pub const fn out_len(&self) -> usize {
54        match self {
55            DigestAlg::Sha256    => 32,
56            DigestAlg::Sha512    => 64,
57            DigestAlg::Sha3_256  => 32,
58            DigestAlg::Sha3_512  => 64,
59            DigestAlg::Blake3    => 32, // default output size
60        }
61    }
62
63    /// Returns full wire length for digest frame
64    /// (header + digest output)
65    pub const fn wire_len(&self, overhead: usize) -> usize {
66        self.out_len() + overhead
67    }
68
69    pub fn can_resume(&self) -> bool {
70        match self {
71            DigestAlg::Blake3 => false,
72            _ => true,
73        }
74    }
75}
76
77impl fmt::Display for DigestAlg {
78    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
79        let name = match self {
80            DigestAlg::Sha256       => "Sha256",
81            DigestAlg::Sha512       => "Sha512",
82            DigestAlg::Sha3_256     => "Sha3_256",
83            DigestAlg::Sha3_512     => "Sha3_512",
84            DigestAlg::Blake3       => "Blake3",
85        };
86        f.write_str(name)
87    }
88}
89
90/// Internal hashing state.
91#[derive(Debug, Clone)]
92pub enum DigestState {
93    // Sha224(Sha224),
94    Sha256(Sha256),
95    // Sha384(Sha384),
96    Sha512(Sha512),
97    // Sha3_224(Sha3_224),
98    Sha3_256(Sha3_256),
99    // Sha3_384(Sha3_384),
100    Sha3_512(Sha3_512),
101    Blake3(blake3::Hasher),
102}
103
104impl DigestState {
105    /// Create a new digest state.
106    #[inline]
107    pub fn new(alg: DigestAlg) -> Self {
108        match alg {
109            DigestAlg::Sha256   => DigestState::Sha256(Sha256::new()),
110            DigestAlg::Sha512   => DigestState::Sha512(Sha512::new()),
111            DigestAlg::Sha3_256 => DigestState::Sha3_256(Sha3_256::new()),
112            DigestAlg::Sha3_512 => DigestState::Sha3_512(Sha3_512::new()),
113            DigestAlg::Blake3   => DigestState::Blake3(blake3::Hasher::new()),
114        }
115    }
116
117    /// Helper to get the algorithm type from an existing state
118    pub fn alg(&self) -> DigestAlg {
119        match self {
120            DigestState::Sha256(_)   => DigestAlg::Sha256,
121            DigestState::Sha512(_)   => DigestAlg::Sha512,
122            DigestState::Sha3_256(_) => DigestAlg::Sha3_256,
123            DigestState::Sha3_512(_) => DigestAlg::Sha3_512,
124            DigestState::Blake3(_)   => DigestAlg::Blake3,
125        }
126    }
127
128    #[inline]
129    fn update(&mut self, data: &[u8]) {
130        match self {
131            // DigestState::Sha224(h)   => h.update(data),
132            DigestState::Sha256(h)   => h.update(data),
133            // DigestState::Sha384(h)   => h.update(data),
134            DigestState::Sha512(h)   => h.update(data),
135            // DigestState::Sha3_224(h) => h.update(data),
136            DigestState::Sha3_256(h) => h.update(data),
137            // DigestState::Sha3_384(h) => h.update(data),
138            DigestState::Sha3_512(h) => h.update(data),
139            DigestState::Blake3(h)   => { h.update(data); },
140        }
141    }
142
143    #[inline]
144    fn finalize(self) -> Vec<u8> {
145        match self {
146            // DigestState::Sha224(h)   => h.finalize().to_vec(),
147            DigestState::Sha256(h)   => h.finalize().to_vec(),
148            // DigestState::Sha384(h)   => h.finalize().to_vec(),
149            DigestState::Sha512(h)   => h.finalize().to_vec(),
150            // DigestState::Sha3_224(h) => h.finalize().to_vec(),
151            DigestState::Sha3_256(h) => h.finalize().to_vec(),
152            // DigestState::Sha3_384(h) => h.finalize().to_vec(),
153            DigestState::Sha3_512(h) => h.finalize().to_vec(),
154            DigestState::Blake3(h)   => h.finalize().as_bytes().to_vec(),
155        }
156    }
157}
158
159/// Digest frame decoded from plaintext.
160#[derive(Debug)]
161pub struct DigestFrame {
162    pub algorithm: DigestAlg,
163    pub digest: Vec<u8>,
164}
165
166
167/// [ alg_id: u16 BE ][ digest_len: u16 BE ][ digest bytes ]
168impl DigestFrame {
169    #[inline]
170    pub fn new(alg: DigestAlg, digest: Vec<u8>) -> Self {
171        Self {
172            algorithm: alg,
173            digest
174        }
175    }
176    /// Encode into wire format (plaintext):
177    /// [ alg_id: u16 BE ][ digest_len: u16 BE ][ digest bytes ]
178    pub fn encode(&self) -> Vec<u8> {
179        let mut out = Vec::with_capacity(4 + self.digest.len());
180
181        // algorithm ID
182        let alg_id: u16 = self.algorithm as u16;
183        out.extend_from_slice(&alg_id.to_be_bytes());
184
185        // digest length
186        let len: u16 = self.digest.len() as u16;
187        out.extend_from_slice(&len.to_be_bytes());
188
189        // digest bytes
190        out.extend_from_slice(&self.digest);
191
192        out
193    }
194    /// Wire format (plaintext):
195    /// [ alg_id: u16 BE ][ digest_len: u16 BE ][ digest bytes ]
196    pub fn decode(plaintext: &[u8]) -> Result<Self, DigestError> {
197        if plaintext.len() < 4 {
198            return Err(DigestError::InvalidFormat);
199        }
200
201        let alg_id = u16::from_be_bytes([plaintext[0], plaintext[1]]);
202        let algorithm = match DigestAlg::try_from(alg_id) {
203            Ok(r) => r,
204            Err(_) => {
205                return Err(DigestError::UnknownAlgorithm { raw: alg_id })
206            }
207        };
208
209        let length = u16::from_be_bytes([plaintext[2], plaintext[3]]) as usize;
210        let actual = plaintext.len() - 4;
211
212        if length != actual {
213            return Err(DigestError::InvalidLength {
214                need: length,
215                have: actual,
216            });
217        }
218
219        Ok(Self {
220            algorithm,
221            digest: plaintext[4..].to_vec(),
222        })
223    }
224}
225
226// ✔ extensible
227// ✔ version-safe
228// ✔ consistent with headers
229
230/// Incremental segment digest builder.
231///
232/// This builder hashes **canonical digest input bytes**,
233/// not plaintext and not wire bytes.
234///
235/// Digest input format (canonical):
236///
237/// ```text
238/// segment_index   (u32 LE)
239/// frame_count     (u32 LE)
240/// for each DATA frame, ordered by frame_index:
241///   frame_index   (u32 LE)
242///   ciphertext_len(u32 LE)
243///   ciphertext    (N bytes)
244/// ```
245pub struct SegmentDigestBuilder {
246    pub alg: DigestAlg,
247    pub state: DigestState,
248    pub segment_index: u32,
249    pub frame_count: u32,
250    pub finalized: bool,
251}
252impl SegmentDigestBuilder {
253    /// Create a new digest builder.
254    #[inline]
255    pub fn new(
256        alg: DigestAlg, 
257        segment_index: u32, 
258        frame_count: u32
259    ) -> Self {
260        let mut state = DigestState::new(alg);
261
262        // Feed segment header: MUST be done for a fresh segment
263        state.update(&segment_index.to_le_bytes());
264        state.update(&frame_count.to_le_bytes());
265
266        Self {
267            alg,
268            state,
269            segment_index,
270            frame_count,
271            finalized: false,
272        }
273    }
274
275    /// Create a verifier by resuming from an existing hydrated state.
276    /// Used for frame-level resume within a single segment.
277    pub fn with_state(
278        state: DigestState,
279        segment_index: u32,
280        frame_count: u32,
281    ) -> Self {
282        // FIX: Extract the algorithm from the existing state
283        let alg = state.alg();
284
285        // FIX: We do NOT update the state with segment_index/frame_count here.
286        // If we are resuming, those bytes were already hashed before the 
287        // state was checkpointed. Re-hashing them would cause a digest mismatch.
288
289        Self {
290            alg,
291            state,
292            segment_index,
293            frame_count,
294            finalized: false,
295        }
296    }
297    /// Returns a clone of the current internal state for checkpointing.
298    pub fn state(&self) -> DigestState {
299        self.state.clone() // DigestState must implement Clone
300    }
301
302    #[inline]
303    fn update(&mut self, data: &[u8]) {
304        debug_assert!(!self.finalized);
305        self.state.update(data);
306    }
307
308    /// Feed one DATA frame (strictly ascending `frame_index`).
309    #[inline]
310    pub fn update_frame(&mut self, frame_index: u32, ciphertext: &[u8]) {
311        self.update(&frame_index.to_le_bytes());
312        self.update(&(ciphertext.len() as u32).to_le_bytes());
313        self.update(ciphertext);
314        // println!("builder input: seg={} frame_count={} frame_index={} ct_len={}",
315        //     self.segment_index, self.frame_count, frame_index, ciphertext.len());
316    }
317
318    /// Finalize and return digest bytes.
319    ///
320    /// Can be called only once.
321    #[inline]
322    pub fn finalize(mut self) -> Result<Vec<u8>, DigestError> {
323        if self.finalized {
324            return Err(DigestError::AlreadyFinalized);
325        }
326        self.finalized = true;
327        let actual = self.state.finalize();
328        Ok(actual)
329    }
330
331}
332
333/// Streaming verifier (bit-exact with `DigestBuilder`).
334pub struct SegmentDigestVerifier {
335    _alg: DigestAlg,
336    state: DigestState,
337    actual: Vec<u8>,
338    _segment_index: u32,
339    _frame_count: u32,
340    finalized: bool,
341}
342
343impl SegmentDigestVerifier {
344    /// Create a fresh verifier for a new segment.
345    /// This hashes the segment header (index and frame count) immediately.
346    pub fn new(
347        alg: DigestAlg,
348        segment_index: u32,
349        frame_count: u32,
350    ) -> Self {
351        let mut state = DigestState::new(alg);
352
353        // Feed segment header: MUST be done for a fresh segment
354        state.update(&segment_index.to_le_bytes());
355        state.update(&frame_count.to_le_bytes());
356
357        Self {
358            _alg: alg,
359            state,
360            actual: vec![],
361            _segment_index: segment_index,
362            _frame_count: frame_count,
363            finalized: false,
364        }
365    }
366
367    /// Create a verifier by resuming from an existing hydrated state.
368    /// Used for frame-level resume within a single segment.
369    pub fn with_state(
370        state: DigestState,
371        segment_index: u32,
372        frame_count: u32,
373        actual: Vec<u8>,
374    ) -> Self {
375        // FIX: Extract the algorithm from the existing state
376        let alg = state.alg();
377
378        // FIX: We do NOT update the state with segment_index/frame_count here.
379        // If we are resuming, those bytes were already hashed before the 
380        // state was checkpointed. Re-hashing them would cause a digest mismatch.
381
382        Self {
383            _alg: alg,
384            state,
385            actual,
386            _segment_index: segment_index,
387            _frame_count: frame_count,
388            finalized: false,
389        }
390    }
391    /// Returns a clone of the current internal state for checkpointing.
392    pub fn state(&self) -> DigestState {
393        self.state.clone() // DigestState must implement Clone
394    }
395
396    #[inline]
397    fn update(&mut self, data: &[u8]) {
398        debug_assert!(!self.finalized);
399        self.state.update(data);
400    }
401
402    /// Feed one DATA frame (strictly ascending `frame_index`).
403    #[inline]
404    pub fn update_frame(&mut self, frame_index: u32, ciphertext: &[u8]) {
405        self.update(&frame_index.to_le_bytes());
406        self.update(&(ciphertext.len() as u32).to_le_bytes());
407        self.update(ciphertext);
408        // println!("verifier input: seg={} frame_count={} frame_index={} ct_len={}",
409        //     self.segment_index, self.frame_count, frame_index, ciphertext.len());
410    }
411    
412    #[inline]
413    /// Finalize and store the actual digest after all frames are processed.
414    pub fn finalize(mut self) -> Result<Vec<u8>, DigestError> {
415        if self.finalized {
416            return Err(DigestError::AlreadyFinalized);
417        }
418        self.finalized = true;
419        self.actual = self.state.finalize();
420        Ok(self.actual)
421    }
422
423    /// Compare a previously finalized digest against the expected one.
424    #[inline]
425    pub fn verify(actual: Vec<u8>, expected: Vec<u8>) -> Result<(), DigestError> {
426        if actual == expected {
427            Ok(())
428        } else {
429            Err(DigestError::DigestMismatch { have: actual, need: expected })
430        }
431    }
432}
433