mail_auth/dkim/
canonicalize.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use super::{Canonicalization, Signature};
8use crate::common::{
9    crypto::HashContext,
10    headers::{HeaderStream, Writable, Writer},
11};
12
13/// Incremental body hasher for streaming DKIM signing.
14///
15/// This struct allows body content to be fed in chunks while maintaining
16/// the canonicalization state between calls.
17pub struct BodyHasher<H> {
18    hasher: H,
19    canonicalization: Canonicalization,
20    body_length_limit: u64,
21    bytes_hashed: u64,
22    // Canonicalization state
23    crlf_seq: usize,
24    last_ch: u8,
25    is_empty: bool,
26    done: bool,
27}
28
29impl<H: Writer> BodyHasher<H> {
30    /// Creates a new incremental body hasher.
31    ///
32    /// # Arguments
33    /// * `hasher` - The hash context to write canonicalized body to
34    /// * `canonicalization` - The body canonicalization algorithm to use
35    /// * `body_length_limit` - Maximum bytes to hash (0 = unlimited)
36    pub fn new(hasher: H, canonicalization: Canonicalization, body_length_limit: u64) -> Self {
37        Self {
38            hasher,
39            canonicalization,
40            body_length_limit,
41            bytes_hashed: 0,
42            crlf_seq: 0,
43            last_ch: 0,
44            is_empty: true,
45            done: false,
46        }
47    }
48
49    /// Feed a chunk of body data to the hasher.
50    ///
51    /// Data is canonicalized according to the configured algorithm and
52    /// written to the underlying hash context.
53    pub fn write(&mut self, chunk: &[u8]) {
54        if self.done {
55            return;
56        }
57
58        // Apply body length limit if set
59        let chunk = if self.body_length_limit > 0 {
60            let remaining = self.body_length_limit.saturating_sub(self.bytes_hashed);
61            if remaining == 0 {
62                return;
63            }
64            let limit = std::cmp::min(remaining as usize, chunk.len());
65            &chunk[..limit]
66        } else {
67            chunk
68        };
69
70        self.bytes_hashed += chunk.len() as u64;
71
72        match self.canonicalization {
73            Canonicalization::Relaxed => {
74                for &ch in chunk {
75                    match ch {
76                        b' ' | b'\t' => {
77                            while self.crlf_seq > 0 {
78                                self.hasher.write(b"\r\n");
79                                self.crlf_seq -= 1;
80                            }
81                            self.is_empty = false;
82                        }
83                        b'\n' => {
84                            self.crlf_seq += 1;
85                        }
86                        b'\r' => {}
87                        _ => {
88                            while self.crlf_seq > 0 {
89                                self.hasher.write(b"\r\n");
90                                self.crlf_seq -= 1;
91                            }
92
93                            if self.last_ch == b' ' || self.last_ch == b'\t' {
94                                self.hasher.write(b" ");
95                            }
96
97                            self.hasher.write(&[ch]);
98                            self.is_empty = false;
99                        }
100                    }
101                    self.last_ch = ch;
102                }
103            }
104            Canonicalization::Simple => {
105                for &ch in chunk {
106                    match ch {
107                        b'\n' => {
108                            self.crlf_seq += 1;
109                        }
110                        b'\r' => {}
111                        _ => {
112                            while self.crlf_seq > 0 {
113                                self.hasher.write(b"\r\n");
114                                self.crlf_seq -= 1;
115                            }
116                            self.hasher.write(&[ch]);
117                            self.is_empty = false;
118                        }
119                    }
120                }
121            }
122        }
123    }
124
125    /// Finalize the body hash.
126    ///
127    /// Applies the final canonicalization rules (trailing CRLF handling)
128    /// and returns the completed hash context along with the number of
129    /// body bytes that were processed.
130    pub fn finish(mut self) -> (H, u64)
131    where
132        H: HashContext,
133    {
134        if !self.done {
135            self.done = true;
136            match self.canonicalization {
137                Canonicalization::Relaxed => {
138                    if !self.is_empty {
139                        self.hasher.write(b"\r\n");
140                    }
141                }
142                Canonicalization::Simple => {
143                    self.hasher.write(b"\r\n");
144                }
145            }
146        }
147        (self.hasher, self.bytes_hashed)
148    }
149}
150
151pub struct CanonicalBody<'a> {
152    canonicalization: Canonicalization,
153    body: &'a [u8],
154}
155
156impl Writable for CanonicalBody<'_> {
157    fn write(self, hasher: &mut impl Writer) {
158        let mut crlf_seq = 0;
159
160        match self.canonicalization {
161            Canonicalization::Relaxed => {
162                let mut last_ch = 0;
163                let mut is_empty = true;
164
165                for &ch in self.body {
166                    match ch {
167                        b' ' | b'\t' => {
168                            while crlf_seq > 0 {
169                                hasher.write(b"\r\n");
170                                crlf_seq -= 1;
171                            }
172                            is_empty = false;
173                        }
174                        b'\n' => {
175                            crlf_seq += 1;
176                        }
177                        b'\r' => {}
178                        _ => {
179                            while crlf_seq > 0 {
180                                hasher.write(b"\r\n");
181                                crlf_seq -= 1;
182                            }
183
184                            if last_ch == b' ' || last_ch == b'\t' {
185                                hasher.write(b" ");
186                            }
187
188                            hasher.write(&[ch]);
189                            is_empty = false;
190                        }
191                    }
192
193                    last_ch = ch;
194                }
195
196                if !is_empty {
197                    hasher.write(b"\r\n");
198                }
199            }
200            Canonicalization::Simple => {
201                for &ch in self.body {
202                    match ch {
203                        b'\n' => {
204                            crlf_seq += 1;
205                        }
206                        b'\r' => {}
207                        _ => {
208                            while crlf_seq > 0 {
209                                hasher.write(b"\r\n");
210                                crlf_seq -= 1;
211                            }
212                            hasher.write(&[ch]);
213                        }
214                    }
215                }
216
217                hasher.write(b"\r\n");
218            }
219        }
220    }
221}
222
223impl Canonicalization {
224    pub fn canonicalize_headers<'a>(
225        &self,
226        headers: impl Iterator<Item = (&'a [u8], &'a [u8])>,
227        hasher: &mut impl Writer,
228    ) {
229        match self {
230            Canonicalization::Relaxed => {
231                for (name, value) in headers {
232                    for &ch in name {
233                        if !ch.is_ascii_whitespace() {
234                            hasher.write(&[ch.to_ascii_lowercase()]);
235                        }
236                    }
237
238                    hasher.write(b":");
239                    let mut bw = 0;
240                    let mut last_ch = 0;
241
242                    for &ch in value {
243                        if !ch.is_ascii_whitespace() {
244                            if [b' ', b'\t'].contains(&last_ch) && bw > 0 {
245                                hasher.write_len(b" ", &mut bw);
246                            }
247                            hasher.write_len(&[ch], &mut bw);
248                        }
249                        last_ch = ch;
250                    }
251
252                    if last_ch == b'\n' {
253                        hasher.write(b"\r\n");
254                    }
255                }
256            }
257            Canonicalization::Simple => {
258                for (name, value) in headers {
259                    hasher.write(name);
260                    hasher.write(b":");
261                    hasher.write(value);
262                }
263            }
264        }
265    }
266
267    pub fn canonical_headers<'a>(
268        &self,
269        headers: Vec<(&'a [u8], &'a [u8])>,
270    ) -> CanonicalHeaders<'a> {
271        CanonicalHeaders {
272            canonicalization: *self,
273            headers,
274        }
275    }
276
277    pub fn canonical_body<'a>(&self, body: &'a [u8], l: u64) -> CanonicalBody<'a> {
278        CanonicalBody {
279            canonicalization: *self,
280            body: if l == 0 || body.is_empty() {
281                body
282            } else {
283                &body[..std::cmp::min(l as usize, body.len())]
284            },
285        }
286    }
287
288    pub fn serialize_name(&self, writer: &mut impl Writer) {
289        writer.write(match self {
290            Canonicalization::Relaxed => b"relaxed",
291            Canonicalization::Simple => b"simple",
292        });
293    }
294}
295
296impl Signature {
297    pub fn canonicalize<'x>(
298        &self,
299        mut message: impl HeaderStream<'x>,
300    ) -> (usize, CanonicalHeaders<'x>, Vec<String>, CanonicalBody<'x>) {
301        let mut headers = Vec::with_capacity(self.h.len());
302        let mut found_headers = vec![false; self.h.len()];
303        let mut signed_headers = Vec::with_capacity(self.h.len());
304
305        while let Some((name, value)) = message.next_header() {
306            if let Some(pos) = self
307                .h
308                .iter()
309                .position(|header| name.eq_ignore_ascii_case(header.as_bytes()))
310            {
311                headers.push((name, value));
312                found_headers[pos] = true;
313                signed_headers.push(std::str::from_utf8(name).unwrap().into());
314            }
315        }
316
317        let body = message.body();
318        let body_len = body.len();
319        let canonical_headers = self.ch.canonical_headers(headers);
320        let canonical_body = self.ch.canonical_body(body, u64::MAX);
321
322        // Add any missing headers
323        signed_headers.reverse();
324        for (header, found) in self.h.iter().zip(found_headers) {
325            if !found {
326                signed_headers.push(header.to_string());
327            }
328        }
329
330        (body_len, canonical_headers, signed_headers, canonical_body)
331    }
332}
333
334pub struct CanonicalHeaders<'a> {
335    canonicalization: Canonicalization,
336    headers: Vec<(&'a [u8], &'a [u8])>,
337}
338
339impl Writable for CanonicalHeaders<'_> {
340    fn write(self, writer: &mut impl Writer) {
341        self.canonicalization
342            .canonicalize_headers(self.headers.into_iter().rev(), writer)
343    }
344}
345
346#[cfg(test)]
347mod test {
348    use mail_builder::encoders::base64::base64_encode;
349
350    use super::{BodyHasher, CanonicalBody, CanonicalHeaders};
351    use crate::{
352        common::{
353            crypto::{HashContext, HashImpl, Sha256},
354            headers::{HeaderIterator, Writable},
355        },
356        dkim::Canonicalization,
357    };
358
359    #[test]
360    #[allow(clippy::needless_collect)]
361    fn dkim_canonicalize() {
362        for (message, (relaxed_headers, relaxed_body), (simple_headers, simple_body)) in [
363            (
364                concat!(
365                    "A: X\r\n",
366                    "B : Y\t\r\n",
367                    "\tZ  \r\n",
368                    "\r\n",
369                    " C \r\n",
370                    "D \t E\r\n"
371                ),
372                (
373                    concat!("a:X\r\n", "b:Y Z\r\n",),
374                    concat!(" C\r\n", "D E\r\n"),
375                ),
376                ("A: X\r\nB : Y\t\r\n\tZ  \r\n", " C \r\nD \t E\r\n"),
377            ),
378            (
379                concat!(
380                    "  From : John\tdoe <jdoe@domain.com>\t\r\n",
381                    "SUB JECT:\ttest  \t  \r\n\r\n",
382                    " body \t   \r\n",
383                    "\r\n",
384                    "\r\n",
385                ),
386                (
387                    concat!("from:John doe <jdoe@domain.com>\r\n", "subject:test\r\n"),
388                    " body\r\n",
389                ),
390                (
391                    concat!(
392                        "  From : John\tdoe <jdoe@domain.com>\t\r\n",
393                        "SUB JECT:\ttest  \t  \r\n"
394                    ),
395                    " body \t   \r\n",
396                ),
397            ),
398            (
399                "H: value\t\r\n\r\n",
400                ("h:value\r\n", ""),
401                ("H: value\t\r\n", "\r\n"),
402            ),
403            (
404                "\tx\t: \t\t\tz\r\n\r\nabc",
405                ("x:z\r\n", "abc\r\n"),
406                ("\tx\t: \t\t\tz\r\n", "abc\r\n"),
407            ),
408            (
409                "Subject: hello\r\n\r\n\r\n",
410                ("subject:hello\r\n", ""),
411                ("Subject: hello\r\n", "\r\n"),
412            ),
413        ] {
414            let mut header_iterator = HeaderIterator::new(message.as_bytes());
415            let parsed_headers = (&mut header_iterator).collect::<Vec<_>>();
416            let raw_body = header_iterator
417                .body_offset()
418                .map(|pos| &message.as_bytes()[pos..])
419                .unwrap_or_default();
420
421            for (canonicalization, expected_headers, expected_body) in [
422                (Canonicalization::Relaxed, relaxed_headers, relaxed_body),
423                (Canonicalization::Simple, simple_headers, simple_body),
424            ] {
425                let mut headers = Vec::new();
426                CanonicalHeaders {
427                    canonicalization,
428                    headers: parsed_headers.iter().cloned().rev().collect(),
429                }
430                .write(&mut headers);
431                assert_eq!(expected_headers, String::from_utf8(headers).unwrap());
432
433                let mut body = Vec::new();
434                CanonicalBody {
435                    canonicalization,
436                    body: raw_body,
437                }
438                .write(&mut body);
439                assert_eq!(expected_body, String::from_utf8(body).unwrap());
440            }
441        }
442
443        // Test empty body hashes
444        for (canonicalization, hash) in [
445            (
446                Canonicalization::Relaxed,
447                "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=",
448            ),
449            (
450                Canonicalization::Simple,
451                "frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN/XKdLCPjaYaY=",
452            ),
453        ] {
454            for body in ["\r\n", ""] {
455                let mut hasher = Sha256::hasher();
456                CanonicalBody {
457                    canonicalization,
458                    body: body.as_bytes(),
459                }
460                .write(&mut hasher);
461
462                #[cfg(feature = "sha1")]
463                {
464                    use sha1::Digest;
465                    assert_eq!(
466                        String::from_utf8(base64_encode(hasher.finalize().as_ref()).unwrap())
467                            .unwrap(),
468                        hash,
469                    );
470                }
471
472                #[cfg(all(feature = "ring", not(feature = "sha1")))]
473                assert_eq!(
474                    String::from_utf8(base64_encode(hasher.finish().as_ref()).unwrap()).unwrap(),
475                    hash,
476                );
477            }
478        }
479    }
480
481    #[test]
482    fn body_hasher_matches_canonical_body() {
483        // Test that BodyHasher produces identical results to CanonicalBody
484        for (body, canonicalization) in [
485            (" C \r\nD \t E\r\n", Canonicalization::Relaxed),
486            (" C \r\nD \t E\r\n", Canonicalization::Simple),
487            (" body \t   \r\n\r\n\r\n", Canonicalization::Relaxed),
488            (" body \t   \r\n\r\n\r\n", Canonicalization::Simple),
489            ("", Canonicalization::Relaxed),
490            ("", Canonicalization::Simple),
491            ("\r\n", Canonicalization::Relaxed),
492            ("\r\n", Canonicalization::Simple),
493            ("abc", Canonicalization::Relaxed),
494            ("abc", Canonicalization::Simple),
495            ("hello world\r\n", Canonicalization::Relaxed),
496            ("hello world\r\n", Canonicalization::Simple),
497        ] {
498            // Hash using CanonicalBody
499            let mut expected_hasher = Sha256::hasher();
500            CanonicalBody {
501                canonicalization,
502                body: body.as_bytes(),
503            }
504            .write(&mut expected_hasher);
505            let expected_hash = expected_hasher.complete();
506
507            // Hash using BodyHasher (single chunk)
508            let mut body_hasher = BodyHasher::new(Sha256::hasher(), canonicalization, 0);
509            body_hasher.write(body.as_bytes());
510            let (actual_hasher, _) = body_hasher.finish();
511            let actual_hash = actual_hasher.complete();
512
513            assert_eq!(
514                expected_hash.as_ref(),
515                actual_hash.as_ref(),
516                "BodyHasher (single chunk) mismatch for body {:?} with {:?} canonicalization",
517                body,
518                canonicalization
519            );
520        }
521    }
522
523    #[test]
524    fn body_hasher_chunked_matches_single() {
525        // Test that chunked input produces same result as single input
526        let body = " C \r\nD \t E\r\nMore content here\r\n\r\n";
527
528        for canonicalization in [Canonicalization::Relaxed, Canonicalization::Simple] {
529            // Single chunk
530            let mut single_hasher = BodyHasher::new(Sha256::hasher(), canonicalization, 0);
531            single_hasher.write(body.as_bytes());
532            let (single_result, single_len) = single_hasher.finish();
533            let single_hash = single_result.complete();
534
535            // Multiple chunks - split at various points
536            for chunk_size in [1, 2, 3, 5, 7, 10] {
537                let mut chunked_hasher = BodyHasher::new(Sha256::hasher(), canonicalization, 0);
538                for chunk in body.as_bytes().chunks(chunk_size) {
539                    chunked_hasher.write(chunk);
540                }
541                let (chunked_result, chunked_len) = chunked_hasher.finish();
542                let chunked_hash = chunked_result.complete();
543
544                assert_eq!(
545                    single_hash.as_ref(),
546                    chunked_hash.as_ref(),
547                    "Chunked (size {}) mismatch for {:?} canonicalization",
548                    chunk_size,
549                    canonicalization
550                );
551                assert_eq!(single_len, chunked_len);
552            }
553        }
554    }
555
556    #[test]
557    fn body_hasher_length_limit() {
558        let body = "Hello World! This is a test body.\r\n";
559
560        for canonicalization in [Canonicalization::Relaxed, Canonicalization::Simple] {
561            // Hash with limit of 10 bytes
562            let mut limited_hasher = BodyHasher::new(Sha256::hasher(), canonicalization, 10);
563            limited_hasher.write(body.as_bytes());
564            let (limited_result, limited_len) = limited_hasher.finish();
565            let limited_hash = limited_result.complete();
566
567            // Hash the first 10 bytes using CanonicalBody
568            let mut expected_hasher = Sha256::hasher();
569            CanonicalBody {
570                canonicalization,
571                body: &body.as_bytes()[..10],
572            }
573            .write(&mut expected_hasher);
574            let expected_hash = expected_hasher.complete();
575
576            assert_eq!(
577                expected_hash.as_ref(),
578                limited_hash.as_ref(),
579                "Body length limit mismatch for {:?} canonicalization",
580                canonicalization
581            );
582            assert_eq!(limited_len, 10);
583        }
584    }
585
586    #[test]
587    fn body_hasher_split_crlf() {
588        // Test that CRLF split across chunks is handled correctly
589        let body = "Line1\r\nLine2\r\n";
590
591        for canonicalization in [Canonicalization::Relaxed, Canonicalization::Simple] {
592            // Single chunk reference
593            let mut single_hasher = BodyHasher::new(Sha256::hasher(), canonicalization, 0);
594            single_hasher.write(body.as_bytes());
595            let (single_result, _) = single_hasher.finish();
596            let single_hash = single_result.complete();
597
598            // Split right in the middle of \r\n
599            let mut split_hasher = BodyHasher::new(Sha256::hasher(), canonicalization, 0);
600            split_hasher.write(b"Line1\r");
601            split_hasher.write(b"\nLine2\r");
602            split_hasher.write(b"\n");
603            let (split_result, _) = split_hasher.finish();
604            let split_hash = split_result.complete();
605
606            assert_eq!(
607                single_hash.as_ref(),
608                split_hash.as_ref(),
609                "Split CRLF mismatch for {:?} canonicalization",
610                canonicalization
611            );
612        }
613    }
614}