viadkim/
message_hash.rs

1// viadkim – implementation of the DKIM specification
2// Copyright © 2022–2024 David Bürgin <dbuergin@gluet.ch>
3//
4// This program is free software: you can redistribute it and/or modify it under
5// the terms of the GNU General Public License as published by the Free Software
6// Foundation, either version 3 of the License, or (at your option) any later
7// version.
8//
9// This program is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11// FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
12// details.
13//
14// You should have received a copy of the GNU General Public License along with
15// this program. If not, see <https://www.gnu.org/licenses/>.
16
17//! Computation of the message hashes.
18//!
19//! Use [`compute_data_hash`] to compute the *data hash*, and [`BodyHasher`] to
20//! compute the *body hash* (see RFC 6476, section 3.7).
21
22use crate::{
23    canonicalize::{self, BodyCanonicalizer},
24    crypto::{self, CountingHasher, HashAlgorithm, HashStatus, InsufficientInput},
25    header::{FieldName, HeaderFields},
26    signature::{CanonicalizationAlgorithm, DkimSignature},
27};
28use std::{
29    collections::{HashMap, HashSet},
30    error::Error,
31    fmt::{self, Display, Formatter},
32};
33
34/// Computes the *data hash* for the given inputs.
35pub fn compute_data_hash(
36    hash_alg: HashAlgorithm,
37    canon_alg: CanonicalizationAlgorithm,
38    headers: &HeaderFields,
39    selected_headers: &[FieldName],
40    dkim_sig_header_name: &str,
41    formatted_dkim_sig_header_value: &str,
42) -> Box<[u8]> {
43    // canonicalize selected headers
44    let mut cheaders = canonicalize::canonicalize_headers(canon_alg, headers, selected_headers);
45
46    // canonicalize DKIM-Signature header
47    canonicalize::canonicalize_header(
48        &mut cheaders,
49        canon_alg,
50        dkim_sig_header_name,
51        formatted_dkim_sig_header_value,
52    );
53
54    // produce message digest of the canonicalized value
55    crypto::digest(hash_alg, &cheaders)
56}
57
58/// The stance of a body hasher regarding additional body content.
59#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
60#[must_use]
61pub enum BodyHasherStance {
62    // Note: the stance does not represent the ultimate truth: `Done` means it
63    // is definitely done, but `Interested` is not necessarily true, because the
64    // `BodyCanonicalizer`s are stateful and may already have the final pieces.
65
66    /// When `Interested` is returned after digesting input, then the client
67    /// should feed more inputs to the body hasher, if there are any available
68    /// still.
69    Interested,
70
71    /// When `Done` is returned after digesting input, the body hasher requires
72    /// no further inputs to answer all body hash requests, and the client need
73    /// not feed any additional inputs to the body hasher even if there is any
74    /// remaining.
75    Done,
76}
77
78/// A key referencing a body hash request in a `BodyHasher`.
79pub type BodyHasherKey = (Option<usize>, HashAlgorithm, CanonicalizationAlgorithm);
80
81/// Constructs a `BodyHasherKey` from DKIM signature data.
82///
83/// # Panics
84///
85/// Panics if the DKIM signature’s `body_length` value does not fit in a
86/// `usize`.
87pub fn body_hasher_key(sig: &DkimSignature) -> BodyHasherKey {
88    let body_len = sig.body_length
89        .map(|len| len.try_into().expect("integer overflow"));
90    let hash_alg = sig.algorithm.hash_algorithm();
91    let canon_alg = sig.canonicalization.body;
92    (body_len, hash_alg, canon_alg)
93}
94
95/// A builder for body hashers.
96#[derive(Clone)]
97pub struct BodyHasherBuilder {
98    fail_on_truncate: bool,  // truncated inputs must yield InputTruncated
99    registrations: HashSet<BodyHasherKey>,
100}
101
102impl BodyHasherBuilder {
103    /// Creates a new builder for a body hasher.
104    ///
105    /// The argument controls whether the resulting body hasher will accept or
106    /// reject only partially hashed message bodies.
107    pub fn new(fail_on_partially_hashed_input: bool) -> Self {
108        Self {
109            fail_on_truncate: fail_on_partially_hashed_input,
110            registrations: HashSet::new(),
111        }
112    }
113
114    /// Registers a body hash request, ie the tuple (length limit of body
115    /// content, hash algorithm, body canonicalization algorithm).
116    pub fn register_canonicalization(
117        &mut self,
118        len: Option<usize>,
119        hash: HashAlgorithm,
120        canon: CanonicalizationAlgorithm,
121    ) {
122        self.registrations.insert((len, hash, canon));
123    }
124
125    /// Builds a body hasher that can answer all registered body hash requests.
126    pub fn build(self) -> BodyHasher {
127        use CanonicalizationAlgorithm::*;
128
129        let hashers = self.registrations.into_iter()
130            .map(|key @ (len, hash, _)| (key, (CountingHasher::new(hash, len), false)))
131            .collect();
132
133        BodyHasher {
134            fail_on_truncate: self.fail_on_truncate,
135            hashers,
136            canonicalizer_simple: BodyCanonicalizer::new(Simple),
137            canonicalizer_relaxed: BodyCanonicalizer::new(Relaxed),
138        }
139    }
140}
141
142/// A producer of *body hash* results.
143///
144/// The body hasher canonicalizes and hashes chunks of the message body, until
145/// all body hash requests can be answered. The main benefits of the body hasher
146/// design are deduplication of the canonicalization and hashing effort, and
147/// shortcutting the computation when only part of the message body is of
148/// interest.
149pub struct BodyHasher {
150    fail_on_truncate: bool,
151    // For each registration/key, map to a hasher and a flag that records
152    // whether input was truncated, ie only partially consumed.
153    hashers: HashMap<BodyHasherKey, (CountingHasher, bool)>,
154    canonicalizer_simple: BodyCanonicalizer,
155    canonicalizer_relaxed: BodyCanonicalizer,
156}
157
158impl BodyHasher {
159    /// Canonicalizes and hashes a chunk of the message body.
160    pub fn hash_chunk(&mut self, chunk: &[u8]) -> BodyHasherStance {
161        let mut canonicalized_chunk_simple = None;
162        let mut canonicalized_chunk_relaxed = None;
163
164        let mut all_done = true;
165
166        let active_hashers = self.hashers.iter_mut().filter(|(_, (hasher, truncated))| {
167            !hasher.is_done() || (self.fail_on_truncate && !truncated)
168        });
169
170        for ((_, _, canon), (hasher, truncated)) in active_hashers {
171            let canonicalized_chunk = match canon {
172                CanonicalizationAlgorithm::Simple => canonicalized_chunk_simple
173                    .get_or_insert_with(|| self.canonicalizer_simple.canonicalize_chunk(chunk)),
174                CanonicalizationAlgorithm::Relaxed => canonicalized_chunk_relaxed
175                    .get_or_insert_with(|| self.canonicalizer_relaxed.canonicalize_chunk(chunk)),
176            };
177
178            match hasher.update(canonicalized_chunk) {
179                HashStatus::AllConsumed => {
180                    if self.fail_on_truncate || !hasher.is_done() {
181                        all_done = false;
182                    }
183                }
184                HashStatus::Truncated => {
185                    *truncated = true;
186                }
187            }
188        }
189
190        if all_done {
191            BodyHasherStance::Done
192        } else {
193            BodyHasherStance::Interested
194        }
195    }
196
197    /// Finalises any body hash calculations still in progress and returns the
198    /// results.
199    pub fn finish(self) -> BodyHashResults {
200        let mut finish_canonicalization_simple = Some(|| self.canonicalizer_simple.finish());
201        let mut finish_canonicalization_relaxed = Some(|| self.canonicalizer_relaxed.finish());
202        let mut canonicalized_chunk_simple = None;
203        let mut canonicalized_chunk_relaxed = None;
204
205        let mut results = HashMap::new();
206
207        for (key @ (_, _, canon), (mut hasher, mut truncated)) in self.hashers {
208            if !hasher.is_done() || (self.fail_on_truncate && !truncated) {
209                let canonicalized_chunk = match canon {
210                    CanonicalizationAlgorithm::Simple => {
211                        match finish_canonicalization_simple.take() {
212                            Some(f) => canonicalized_chunk_simple.insert(f()),
213                            None => canonicalized_chunk_simple.as_ref().unwrap(),
214                        }
215                    }
216                    CanonicalizationAlgorithm::Relaxed => {
217                        match finish_canonicalization_relaxed.take() {
218                            Some(f) => canonicalized_chunk_relaxed.insert(f()),
219                            None => canonicalized_chunk_relaxed.as_ref().unwrap(),
220                        }
221                    }
222                };
223
224                if let HashStatus::Truncated = hasher.update(canonicalized_chunk) {
225                    truncated = true;
226                }
227            }
228
229            let res = if self.fail_on_truncate && truncated {
230                Err(BodyHashError::InputTruncated)
231            } else {
232                hasher.finish().map_err(|InsufficientInput| BodyHashError::InsufficientInput)
233            };
234
235            results.insert(key, res);
236        }
237
238        BodyHashResults { results }
239    }
240}
241
242/// An error that occurs when computing the *body hash*.
243///
244/// These errors can only occur when the *l=* tag is used.
245#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
246pub enum BodyHashError {
247    /// Less than the expected input data have been fed to the body hasher.
248    InsufficientInput,
249    /// The input was only partially consumed by the body hasher.
250    InputTruncated,
251}
252
253impl Display for BodyHashError {
254    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
255        match self {
256            Self::InsufficientInput => write!(f, "insufficient input data"),
257            Self::InputTruncated => write!(f, "input not digested entirely"),
258        }
259    }
260}
261
262impl Error for BodyHashError {}
263
264/// A result answering a body hash request.
265///
266/// The result contains the final body hash (digest) and the number of bytes
267/// digested.
268pub type BodyHashResult = Result<(Box<[u8]>, usize), BodyHashError>;
269
270/// Results produced by a body hasher.
271#[derive(Clone, Debug, Eq, PartialEq)]
272pub struct BodyHashResults {
273    results: HashMap<BodyHasherKey, BodyHashResult>,
274}
275
276impl BodyHashResults {
277    /// Returns the result computed for a given body hash request.
278    pub fn get(&self, key: &BodyHasherKey) -> Option<&BodyHashResult> {
279        self.results.get(key)
280    }
281}
282
283#[cfg(test)]
284mod tests {
285    use super::*;
286    use crate::{signature::CanonicalizationAlgorithm::*, util};
287    use rand::{
288        distributions::{Distribution, Slice},
289        Rng,
290    };
291    use std::ops::RangeInclusive;
292
293    fn key_simple() -> BodyHasherKey {
294        (None, HashAlgorithm::Sha256, Simple)
295    }
296
297    fn limited_key_simple(n: usize) -> BodyHasherKey {
298        (Some(n), HashAlgorithm::Sha256, Simple)
299    }
300
301    fn key_relaxed() -> BodyHasherKey {
302        (None, HashAlgorithm::Sha256, Relaxed)
303    }
304
305    fn limited_key_relaxed(n: usize) -> BodyHasherKey {
306        (Some(n), HashAlgorithm::Sha256, Relaxed)
307    }
308
309    #[test]
310    fn body_hasher_simple() {
311        let key1 @ (_, _, canon_alg1) = key_simple();
312        let key2 @ (len, hash_alg, canon_alg2) = key_relaxed();
313
314        let mut hasher = BodyHasherBuilder::new(false);
315        hasher.register_canonicalization(len, hash_alg, canon_alg1);
316        hasher.register_canonicalization(len, hash_alg, canon_alg2);
317        let mut hasher = hasher.build();
318
319        assert_eq!(hasher.hash_chunk(b"abc \r\n"), BodyHasherStance::Interested);
320
321        let results = hasher.finish();
322
323        let res1 = results.get(&key1).unwrap();
324        assert_eq!(res1.as_ref().unwrap().1, 6);
325        let res2 = results.get(&key2).unwrap();
326        assert_eq!(res2.as_ref().unwrap().1, 5);
327    }
328
329    #[test]
330    fn body_hasher_fail_on_partial() {
331        let key1 @ (len, hash_alg, canon_alg1) = limited_key_relaxed(4);
332
333        let mut hasher = BodyHasherBuilder::new(true);
334        hasher.register_canonicalization(len, hash_alg, canon_alg1);
335        let mut hasher = hasher.build();
336
337        assert_eq!(hasher.hash_chunk(b"ab"), BodyHasherStance::Interested);
338        assert_eq!(hasher.hash_chunk(b"c"), BodyHasherStance::Interested);
339
340        // Now canonicalization adds a final CRLF, exceeding the limit 4:
341        let results = hasher.finish();
342
343        let res1 = results.get(&key1).unwrap();
344        assert_eq!(res1, &Err(BodyHashError::InputTruncated));
345    }
346
347    #[test]
348    fn body_hasher_hash_with_length() {
349        let key1 @ (len, hash_alg, canon_alg1) = limited_key_simple(28);
350
351        let mut hasher = BodyHasherBuilder::new(false);
352        hasher.register_canonicalization(len, hash_alg, canon_alg1);
353        let mut hasher = hasher.build();
354
355        assert_eq!(hasher.hash_chunk(b"well  hello \r\n"), BodyHasherStance::Interested);
356        assert_eq!(hasher.hash_chunk(b"\r\n what's up \r"), BodyHasherStance::Interested);
357        assert_eq!(hasher.hash_chunk(b"\n\r\n"), BodyHasherStance::Done);
358
359        let results = hasher.finish();
360
361        let res1 = results.get(&key1).unwrap();
362        assert_eq!(
363            res1.as_ref().unwrap().0,
364            sha256_digest(b"well  hello \r\n\r\n what's up \r")
365        );
366    }
367
368    #[test]
369    fn body_hasher_known_hash_sample() {
370        let key1 @ (len, hash_alg, canon_alg1) = key_relaxed();
371
372        let mut hasher = BodyHasherBuilder::new(false);
373        hasher.register_canonicalization(len, hash_alg, canon_alg1);
374        let mut hasher = hasher.build();
375
376        let body = b"\
377Hello Proff,\r\n\
378\r\n\
379Let\xe2\x80\x99s try this again, with line\r\n\
380breaks and empty lines even.\r\n\
381\r\n\
382Ciao, und bis bald\r\n\
383\r\n\
384\r\n\
385-- \r\n\
386David\r\n\
387";
388
389        assert_eq!(hasher.hash_chunk(body), BodyHasherStance::Interested);
390
391        let results = hasher.finish();
392
393        let res1 = results.get(&key1).unwrap();
394        assert_eq!(
395            util::encode_base64(&res1.as_ref().unwrap().0),
396            "RMSbeRTj/zCxWeWQXpEIbiqxH0Jqg5eYs4ORzOt3MT0="
397        );
398    }
399
400    #[cfg(feature = "pre-rfc8301")]
401    #[test]
402    fn body_hasher_reuse_canonicalized_chunk() {
403        let key1 @ (len, hash_alg1, canon_alg1) = key_relaxed();
404        let key2 @ (_, hash_alg2, canon_alg2) = (None, HashAlgorithm::Sha1, Relaxed);
405
406        let mut hasher = BodyHasherBuilder::new(false);
407        hasher.register_canonicalization(len, hash_alg1, canon_alg1);
408        hasher.register_canonicalization(len, hash_alg2, canon_alg2);
409        let mut hasher = hasher.build();
410
411        assert_eq!(hasher.hash_chunk(b"abc \r\n"), BodyHasherStance::Interested);
412
413        let results = hasher.finish();
414
415        let res1 = results.get(&key1).unwrap();
416        let res2 = results.get(&key2).unwrap();
417        assert_eq!(res1.as_ref().unwrap().1, res2.as_ref().unwrap().1);
418    }
419
420    fn sha256_digest(msg: &[u8]) -> Box<[u8]> {
421        crypto::digest(HashAlgorithm::Sha256, msg)
422    }
423
424    #[test]
425    #[ignore = "randomly generated test inputs"]
426    fn fuzz_body_hasher_plain() {
427        fuzz_body_hasher(false);
428    }
429
430    #[test]
431    #[ignore = "randomly generated test inputs"]
432    fn fuzz_body_hasher_fail_on_truncate() {
433        fuzz_body_hasher(true);
434    }
435
436    fn fuzz_body_hasher(fail_on_truncate: bool) {
437        let elems = ["x", "y", " ", "\r\n"];
438        let chunk_len = 0..=4;
439        let chunk_count = 1..=4;
440        let param_count = 1..=6;
441        let limit = 0..=13;
442
443        run_fuzz(1000, fail_on_truncate, &elems, chunk_len, chunk_count, param_count, limit);
444    }
445
446    fn run_fuzz(
447        repetitions: usize,
448        fail_on_truncate: bool,
449        elems: &[&str],
450        chunk_len: RangeInclusive<u8>,
451        chunk_count: RangeInclusive<u8>,
452        param_count: RangeInclusive<u8>,
453        limit: RangeInclusive<u8>,
454    ) {
455        let elems = Slice::new(elems).unwrap();
456
457        let hashes = Slice::new(&[
458            HashAlgorithm::Sha256,
459            #[cfg(feature = "pre-rfc8301")]
460            HashAlgorithm::Sha1,
461        ])
462        .unwrap();
463
464        let canons = Slice::new(&[Simple, Relaxed]).unwrap();
465
466        let mut rng = rand::thread_rng();
467
468        for _ in 0..repetitions {
469            // generate body chunks
470            let mut chunks = vec![];
471            for _ in 0..rng.gen_range(chunk_count.clone()) {
472                let n = rng.gen_range(chunk_len.clone()).into();
473                let s: String = elems.sample_iter(&mut rng).copied().take(n).collect();
474                chunks.push(s);
475            }
476            let chunks: Vec<_> = chunks.iter().map(|s| s.as_str()).collect();
477
478            // generate hasher keys/params
479            let mut params = vec![];
480            for _ in 0..rng.gen_range(param_count.clone()) {
481                let l = if rng.gen_bool(1.0 / 4.0) {
482                    None
483                } else {
484                    Some(rng.gen_range(limit.clone()).into())
485                };
486                let h = hashes.sample(&mut rng);
487                let c = canons.sample(&mut rng);
488                params.push((l, *h, *c));
489            }
490
491            compare_impls(fail_on_truncate, &chunks, &params);
492        }
493    }
494
495    fn compare_impls(fail_on_truncate: bool, chunks: &[&str], params: &[BodyHasherKey]) {
496        // implementation based on BodyHasher
497        let mut hasher = BodyHasherBuilder::new(fail_on_truncate);
498        for &(l, h, c) in params {
499            hasher.register_canonicalization(l, h, c);
500        }
501        let mut hasher = hasher.build();
502
503        for ch in chunks {
504            if let BodyHasherStance::Done = hasher.hash_chunk(ch.as_bytes()) {
505                break;
506            }
507        }
508
509        let results = hasher.finish();
510
511        // alternative implementation
512        let s: String = chunks.iter().copied().collect();
513        let alt_impl = move |(l, h, c)| {
514            let mut bc = BodyCanonicalizer::new(c);
515            let mut result = bc.canonicalize_chunk(s.as_bytes()).into_owned();
516            result.extend(bc.finish().into_owned());
517
518            if let Some(n) = l {
519                if n > result.len() {
520                    return Err(BodyHashError::InsufficientInput);
521                }
522                if fail_on_truncate && n < result.len() {
523                    return Err(BodyHashError::InputTruncated);
524                }
525                result.truncate(n);
526            }
527
528            let hash = crypto::digest(h, &result);
529
530            Ok((hash, result.len()))
531        };
532
533        // compare results
534        for &key in params {
535            let r1 = results.get(&key).unwrap();
536            let r2 = alt_impl(key);
537
538            assert_eq!(
539                r1, &r2,
540                "divergent results for inputs {chunks:?} and {key:?}",
541            );
542        }
543    }
544}