dsfb-gpu-debug-core 0.1.1

Deterministic CPU reference, hash chain, and semantic authority for dsfb-gpu-debug.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
//! Hand-rolled SHA-256.
//!
//! Why hand-rolled: the `dsfb-gpu-debug-core` crate is `no_std` and
//! dependency-free by policy. The case file's hash chain is the load-bearing
//! audit artifact; if it depended on an external SHA-256 crate, the chain
//! would inherit that crate's compatibility and supply-chain surface. A
//! ~150-line in-tree implementation eliminates both.
//!
//! Correctness is validated against the published FIPS 180-4 test vectors
//! plus a small handful of additional edge cases (empty input, one block,
//! exactly 56 bytes, exactly 64 bytes, and a multi-block input).
//!
//! API shape: a streaming `Sha256` builder for incremental hashing of
//! pipeline buffers, and a `sha256(bytes)` one-shot wrapper for the cases
//! where the input is already in memory. Both produce a `[u8; 32]` digest.

#![allow(clippy::module_name_repetitions)]

/// Length in bytes of a SHA-256 digest.
pub const DIGEST_BYTES: usize = 32;

/// SHA-256 round constants (FIPS 180-4 §4.2.2). First 32 bits of the
/// fractional parts of the cube roots of the first 64 primes.
const K: [u32; 64] = [
    0x428a_2f98,
    0x7137_4491,
    0xb5c0_fbcf,
    0xe9b5_dba5,
    0x3956_c25b,
    0x59f1_11f1,
    0x923f_82a4,
    0xab1c_5ed5,
    0xd807_aa98,
    0x1283_5b01,
    0x2431_85be,
    0x550c_7dc3,
    0x72be_5d74,
    0x80de_b1fe,
    0x9bdc_06a7,
    0xc19b_f174,
    0xe49b_69c1,
    0xefbe_4786,
    0x0fc1_9dc6,
    0x240c_a1cc,
    0x2de9_2c6f,
    0x4a74_84aa,
    0x5cb0_a9dc,
    0x76f9_88da,
    0x983e_5152,
    0xa831_c66d,
    0xb003_27c8,
    0xbf59_7fc7,
    0xc6e0_0bf3,
    0xd5a7_9147,
    0x06ca_6351,
    0x1429_2967,
    0x27b7_0a85,
    0x2e1b_2138,
    0x4d2c_6dfc,
    0x5338_0d13,
    0x650a_7354,
    0x766a_0abb,
    0x81c2_c92e,
    0x9272_2c85,
    0xa2bf_e8a1,
    0xa81a_664b,
    0xc24b_8b70,
    0xc76c_51a3,
    0xd192_e819,
    0xd699_0624,
    0xf40e_3585,
    0x106a_a070,
    0x19a4_c116,
    0x1e37_6c08,
    0x2748_774c,
    0x34b0_bcb5,
    0x391c_0cb3,
    0x4ed8_aa4a,
    0x5b9c_ca4f,
    0x682e_6ff3,
    0x748f_82ee,
    0x78a5_636f,
    0x84c8_7814,
    0x8cc7_0208,
    0x90be_fffa,
    0xa450_6ceb,
    0xbef9_a3f7,
    0xc671_78f2,
];

/// Initial hash values (FIPS 180-4 §5.3.3). First 32 bits of the fractional
/// parts of the square roots of the first 8 primes.
const H_INIT: [u32; 8] = [
    0x6a09_e667,
    0xbb67_ae85,
    0x3c6e_f372,
    0xa54f_f53a,
    0x510e_527f,
    0x9b05_688c,
    0x1f83_d9ab,
    0x5be0_cd19,
];

/// Streaming SHA-256 state.
///
/// Buffers up to 64 bytes (one compression-function block) before processing.
/// `len_bits` tracks the total message length so the padding step can write
/// the canonical 64-bit big-endian length suffix even when the caller
/// supplies bytes in many small chunks.
#[derive(Clone)]
pub struct Sha256 {
    state: [u32; 8],
    buffer: [u8; 64],
    buffer_len: usize,
    len_bits: u64,
}

impl Default for Sha256 {
    fn default() -> Self {
        Self::new()
    }
}

impl Sha256 {
    /// Construct a fresh hasher with the standard initial state.
    #[must_use]
    pub const fn new() -> Self {
        Self {
            state: H_INIT,
            buffer: [0u8; 64],
            buffer_len: 0,
            len_bits: 0,
        }
    }

    /// Absorb `data` into the running hash. Safe to call repeatedly with
    /// arbitrary chunk sizes — partial blocks are buffered.
    pub fn update(&mut self, data: &[u8]) {
        // Account for the new bytes in the running message-length counter
        // first, in bits, so the final padding step can write the canonical
        // 64-bit length suffix.
        self.len_bits = self.len_bits.wrapping_add((data.len() as u64) * 8);

        let mut input = data;

        // If there is a partial block sitting in the buffer, fill it first.
        if self.buffer_len > 0 {
            let take = (64 - self.buffer_len).min(input.len());
            self.buffer[self.buffer_len..self.buffer_len + take].copy_from_slice(&input[..take]);
            self.buffer_len += take;
            input = &input[take..];
            if self.buffer_len == 64 {
                let block = self.buffer;
                Self::compress(&mut self.state, &block);
                self.buffer_len = 0;
            }
        }

        // Process whole 64-byte blocks straight out of the input slice. The
        // `split_first_chunk` route is preferred over `try_into().unwrap()`
        // because it doesn't introduce a panic path; the `while` condition
        // already guarantees a block is present.
        while let Some((block, rest)) = input.split_first_chunk::<64>() {
            Self::compress(&mut self.state, block);
            input = rest;
        }

        // Stash the trailing partial block for the next update or finalize.
        if !input.is_empty() {
            self.buffer[..input.len()].copy_from_slice(input);
            self.buffer_len = input.len();
        }
    }

    /// Finalize the hash, returning the 32-byte digest. Consumes `self`.
    #[must_use]
    pub fn finalize(mut self) -> [u8; DIGEST_BYTES] {
        // Standard SHA-2 padding: append 0x80, then enough zero bytes so the
        // total length (including the 8-byte length suffix) is a multiple of
        // 64, then the original message length in bits as a big-endian u64.
        let len_bits = self.len_bits;

        let mut buf = self.buffer;
        let mut buf_len = self.buffer_len;
        buf[buf_len] = 0x80;
        buf_len += 1;

        if buf_len > 56 {
            // Not enough room in this block for the length suffix; pad out
            // and compress, then start a fresh block for the suffix.
            for byte in &mut buf[buf_len..64] {
                *byte = 0;
            }
            Self::compress(&mut self.state, &buf);
            buf = [0u8; 64];
            buf_len = 0;
        }

        for byte in &mut buf[buf_len..56] {
            *byte = 0;
        }
        buf[56..64].copy_from_slice(&len_bits.to_be_bytes());
        Self::compress(&mut self.state, &buf);

        let mut digest = [0u8; 32];
        for (i, word) in self.state.iter().enumerate() {
            digest[i * 4..i * 4 + 4].copy_from_slice(&word.to_be_bytes());
        }
        digest
    }

    /// The SHA-256 compression function. Operates on a single 64-byte block
    /// and a mutable hash state. Direct transcription of FIPS 180-4 §6.2.2.
    ///
    /// The working variables are named `a..h` to match the published
    /// specification verbatim, which makes the line-by-line correspondence
    /// auditable. Renaming them to something more verbose would obscure
    /// that mapping for readers cross-checking against FIPS 180-4.
    #[allow(clippy::many_single_char_names)]
    fn compress(state: &mut [u32; 8], block: &[u8; 64]) {
        let mut w = [0u32; 64];
        for i in 0..16 {
            let off = i * 4;
            w[i] = u32::from_be_bytes([block[off], block[off + 1], block[off + 2], block[off + 3]]);
        }
        for i in 16..64 {
            let s0 = w[i - 15].rotate_right(7) ^ w[i - 15].rotate_right(18) ^ (w[i - 15] >> 3);
            let s1 = w[i - 2].rotate_right(17) ^ w[i - 2].rotate_right(19) ^ (w[i - 2] >> 10);
            w[i] = w[i - 16]
                .wrapping_add(s0)
                .wrapping_add(w[i - 7])
                .wrapping_add(s1);
        }

        let mut a = state[0];
        let mut b = state[1];
        let mut c = state[2];
        let mut d = state[3];
        let mut e = state[4];
        let mut f = state[5];
        let mut g = state[6];
        let mut h = state[7];

        for i in 0..64 {
            let big_sigma1 = e.rotate_right(6) ^ e.rotate_right(11) ^ e.rotate_right(25);
            let ch = (e & f) ^ ((!e) & g);
            let t1 = h
                .wrapping_add(big_sigma1)
                .wrapping_add(ch)
                .wrapping_add(K[i])
                .wrapping_add(w[i]);
            let big_sigma0 = a.rotate_right(2) ^ a.rotate_right(13) ^ a.rotate_right(22);
            let maj = (a & b) ^ (a & c) ^ (b & c);
            let t2 = big_sigma0.wrapping_add(maj);

            h = g;
            g = f;
            f = e;
            e = d.wrapping_add(t1);
            d = c;
            c = b;
            b = a;
            a = t1.wrapping_add(t2);
        }

        state[0] = state[0].wrapping_add(a);
        state[1] = state[1].wrapping_add(b);
        state[2] = state[2].wrapping_add(c);
        state[3] = state[3].wrapping_add(d);
        state[4] = state[4].wrapping_add(e);
        state[5] = state[5].wrapping_add(f);
        state[6] = state[6].wrapping_add(g);
        state[7] = state[7].wrapping_add(h);
    }
}

/// One-shot SHA-256 of an in-memory byte slice. Useful when the caller has
/// already assembled the canonical byte representation of a stage's output.
#[must_use]
pub fn sha256(bytes: &[u8]) -> [u8; DIGEST_BYTES] {
    let mut hasher = Sha256::new();
    hasher.update(bytes);
    hasher.finalize()
}

/// Format a 32-byte digest as a lowercase hex string with the `sha256:`
/// prefix the contract format expects. Used by the case-file serializer.
#[must_use]
pub fn format_digest(digest: &[u8; DIGEST_BYTES]) -> [u8; 71] {
    const HEX: &[u8; 16] = b"0123456789abcdef";
    let mut out = [0u8; 71];
    out[..7].copy_from_slice(b"sha256:");
    for (i, byte) in digest.iter().enumerate() {
        out[7 + i * 2] = HEX[(byte >> 4) as usize];
        out[7 + i * 2 + 1] = HEX[(byte & 0x0F) as usize];
    }
    out
}

#[cfg(test)]
mod tests {
    use super::*;

    /// Known-vector test from FIPS 180-4 / NIST CSRC: empty input.
    /// Expected digest: e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
    #[test]
    fn sha256_empty_string() {
        let digest = sha256(b"");
        let expected = [
            0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14, 0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f,
            0xb9, 0x24, 0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c, 0xa4, 0x95, 0x99, 0x1b,
            0x78, 0x52, 0xb8, 0x55,
        ];
        assert_eq!(digest, expected);
    }

    /// Known-vector test from FIPS 180-4: "abc".
    /// Expected digest: ba7816bf8f01cfea414140de5dae2223b00361a396177a9cb410ff61f20015ad
    #[test]
    fn sha256_abc() {
        let digest = sha256(b"abc");
        let expected = [
            0xba, 0x78, 0x16, 0xbf, 0x8f, 0x01, 0xcf, 0xea, 0x41, 0x41, 0x40, 0xde, 0x5d, 0xae,
            0x22, 0x23, 0xb0, 0x03, 0x61, 0xa3, 0x96, 0x17, 0x7a, 0x9c, 0xb4, 0x10, 0xff, 0x61,
            0xf2, 0x00, 0x15, 0xad,
        ];
        assert_eq!(digest, expected);
    }

    /// Known-vector test from FIPS 180-4: the 56-byte boundary input.
    /// Exercises the multi-block padding path.
    /// "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
    /// Expected digest: 248d6a61d20638b8e5c026930c3e6039a33ce45964ff2167f6ecedd419db06c1
    #[test]
    fn sha256_56_byte_input_exercises_padding() {
        let msg: &[u8] = b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
        assert_eq!(msg.len(), 56);
        let digest = sha256(msg);
        let expected = [
            0x24, 0x8d, 0x6a, 0x61, 0xd2, 0x06, 0x38, 0xb8, 0xe5, 0xc0, 0x26, 0x93, 0x0c, 0x3e,
            0x60, 0x39, 0xa3, 0x3c, 0xe4, 0x59, 0x64, 0xff, 0x21, 0x67, 0xf6, 0xec, 0xed, 0xd4,
            0x19, 0xdb, 0x06, 0xc1,
        ];
        assert_eq!(digest, expected);
    }

    /// Million-a vector compressed to a smaller version for unit-test runtime:
    /// 1000 bytes of 'a'. Validated against the openssl/coreutils reference
    /// implementation.
    #[test]
    fn sha256_thousand_a() {
        let mut data = [0u8; 1000];
        data.fill(b'a');
        let digest = sha256(&data);
        // sha256(1000 * 'a') — verified out-of-band against a reference impl.
        let expected = [
            0x41, 0xed, 0xec, 0xe4, 0x2d, 0x63, 0xe8, 0xd9, 0xbf, 0x51, 0x5a, 0x9b, 0xa6, 0x93,
            0x2e, 0x1c, 0x20, 0xcb, 0xc9, 0xf5, 0xa5, 0xd1, 0x34, 0x64, 0x5a, 0xdb, 0x5d, 0xb1,
            0xb9, 0x73, 0x7e, 0xa3,
        ];
        assert_eq!(digest, expected);
    }

    /// Streaming-mode equivalence: hashing a single buffer one byte at a time
    /// must produce the same digest as hashing it in one call.
    #[test]
    fn streaming_matches_one_shot() {
        let msg: &[u8] = b"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq";
        let one_shot = sha256(msg);
        let mut h = Sha256::new();
        for byte in msg {
            h.update(core::slice::from_ref(byte));
        }
        let streamed = h.finalize();
        assert_eq!(streamed, one_shot);
    }

    /// Streaming with mid-block boundaries equals one-shot.
    #[test]
    fn streaming_with_split_chunks_matches() {
        let msg: &[u8] = b"the quick brown fox jumps over the lazy dog the quick brown fox jumps over the lazy dog";
        let one_shot = sha256(msg);
        let mut h = Sha256::new();
        h.update(&msg[..3]);
        h.update(&msg[3..30]);
        h.update(&msg[30..64]);
        h.update(&msg[64..]);
        assert_eq!(h.finalize(), one_shot);
    }

    /// The hex formatter prefixes "sha256:" and emits 64 lowercase hex chars.
    #[test]
    fn format_digest_emits_sha256_prefix() {
        let digest = sha256(b"");
        let formatted = format_digest(&digest);
        assert!(formatted.starts_with(b"sha256:"));
        assert_eq!(formatted.len(), 71);
        // 64 hex characters of the empty-string digest.
        assert_eq!(
            &formatted[7..],
            b"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
        );
    }
}