Skip to main content

mail_auth/dkim/
canonicalize.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use super::{Canonicalization, Signature};
8use crate::common::{
9    crypto::HashContext,
10    headers::{HeaderStream, Writable, Writer},
11};
12
13/// Incremental body hasher for streaming DKIM signing.
14///
15/// This struct allows body content to be fed in chunks while maintaining
16/// the canonicalization state between calls.
17pub struct BodyHasher<H> {
18    hasher: H,
19    canonicalization: Canonicalization,
20    body_length_limit: u64,
21    bytes_hashed: u64,
22    // Canonicalization state
23    crlf_seq: usize,
24    last_ch: u8,
25    is_empty: bool,
26    done: bool,
27}
28
29impl<H: Writer> BodyHasher<H> {
30    /// Creates a new incremental body hasher.
31    ///
32    /// # Arguments
33    /// * `hasher` - The hash context to write canonicalized body to
34    /// * `canonicalization` - The body canonicalization algorithm to use
35    /// * `body_length_limit` - Maximum bytes to hash (0 = unlimited)
36    pub fn new(hasher: H, canonicalization: Canonicalization, body_length_limit: u64) -> Self {
37        Self {
38            hasher,
39            canonicalization,
40            body_length_limit,
41            bytes_hashed: 0,
42            crlf_seq: 0,
43            last_ch: 0,
44            is_empty: true,
45            done: false,
46        }
47    }
48
49    /// Feed a chunk of body data to the hasher.
50    ///
51    /// Data is canonicalized according to the configured algorithm and
52    /// written to the underlying hash context.
53    pub fn write(&mut self, chunk: &[u8]) {
54        if self.done {
55            return;
56        }
57
58        // Apply body length limit if set
59        let chunk = if self.body_length_limit > 0 {
60            let remaining = self.body_length_limit.saturating_sub(self.bytes_hashed);
61            if remaining == 0 {
62                return;
63            }
64            let limit = std::cmp::min(remaining as usize, chunk.len());
65            &chunk[..limit]
66        } else {
67            chunk
68        };
69
70        self.bytes_hashed += chunk.len() as u64;
71
72        match self.canonicalization {
73            Canonicalization::Relaxed => {
74                for &ch in chunk {
75                    match ch {
76                        b' ' | b'\t' => {
77                            while self.crlf_seq > 0 {
78                                self.hasher.write(b"\r\n");
79                                self.crlf_seq -= 1;
80                            }
81                            self.is_empty = false;
82                        }
83                        b'\n' => {
84                            self.crlf_seq += 1;
85                        }
86                        b'\r' => {}
87                        _ => {
88                            while self.crlf_seq > 0 {
89                                self.hasher.write(b"\r\n");
90                                self.crlf_seq -= 1;
91                            }
92
93                            if self.last_ch == b' ' || self.last_ch == b'\t' {
94                                self.hasher.write(b" ");
95                            }
96
97                            self.hasher.write(&[ch]);
98                            self.is_empty = false;
99                        }
100                    }
101                    self.last_ch = ch;
102                }
103            }
104            Canonicalization::Simple => {
105                for &ch in chunk {
106                    match ch {
107                        b'\n' => {
108                            self.crlf_seq += 1;
109                        }
110                        b'\r' => {}
111                        _ => {
112                            while self.crlf_seq > 0 {
113                                self.hasher.write(b"\r\n");
114                                self.crlf_seq -= 1;
115                            }
116                            self.hasher.write(&[ch]);
117                            self.is_empty = false;
118                        }
119                    }
120                }
121            }
122        }
123    }
124
125    /// Finalize the body hash.
126    ///
127    /// Applies the final canonicalization rules (trailing CRLF handling)
128    /// and returns the completed hash context along with the number of
129    /// body bytes that were processed.
130    pub fn finish(mut self) -> (H, u64)
131    where
132        H: HashContext,
133    {
134        if !self.done {
135            self.done = true;
136            match self.canonicalization {
137                Canonicalization::Relaxed => {
138                    if !self.is_empty {
139                        self.hasher.write(b"\r\n");
140                    }
141                }
142                Canonicalization::Simple => {
143                    self.hasher.write(b"\r\n");
144                }
145            }
146        }
147        (self.hasher, self.bytes_hashed)
148    }
149}
150
151pub struct CanonicalBody<'a> {
152    canonicalization: Canonicalization,
153    body: &'a [u8],
154}
155
156impl Writable for CanonicalBody<'_> {
157    fn write(self, hasher: &mut impl Writer) {
158        let mut crlf_seq = 0;
159
160        match self.canonicalization {
161            Canonicalization::Relaxed => {
162                let mut last_ch = 0;
163                let mut is_empty = true;
164
165                for &ch in self.body {
166                    match ch {
167                        b' ' | b'\t' => {
168                            while crlf_seq > 0 {
169                                hasher.write(b"\r\n");
170                                crlf_seq -= 1;
171                            }
172                            is_empty = false;
173                        }
174                        b'\n' => {
175                            crlf_seq += 1;
176                        }
177                        b'\r' => {}
178                        _ => {
179                            while crlf_seq > 0 {
180                                hasher.write(b"\r\n");
181                                crlf_seq -= 1;
182                            }
183
184                            if last_ch == b' ' || last_ch == b'\t' {
185                                hasher.write(b" ");
186                            }
187
188                            hasher.write(&[ch]);
189                            is_empty = false;
190                        }
191                    }
192
193                    last_ch = ch;
194                }
195
196                if !is_empty {
197                    hasher.write(b"\r\n");
198                }
199            }
200            Canonicalization::Simple => {
201                for &ch in self.body {
202                    match ch {
203                        b'\n' => {
204                            crlf_seq += 1;
205                        }
206                        b'\r' => {}
207                        _ => {
208                            while crlf_seq > 0 {
209                                hasher.write(b"\r\n");
210                                crlf_seq -= 1;
211                            }
212                            hasher.write(&[ch]);
213                        }
214                    }
215                }
216
217                hasher.write(b"\r\n");
218            }
219        }
220    }
221}
222
223impl Canonicalization {
224    pub fn canonicalize_headers<'a>(
225        &self,
226        headers: impl Iterator<Item = (&'a [u8], &'a [u8])>,
227        hasher: &mut impl Writer,
228    ) {
229        match self {
230            Canonicalization::Relaxed => {
231                for (name, value) in headers {
232                    for &ch in name {
233                        if !ch.is_ascii_whitespace() {
234                            hasher.write(&[ch.to_ascii_lowercase()]);
235                        }
236                    }
237
238                    hasher.write(b":");
239                    let mut bw = 0;
240                    let mut last_ch = 0;
241
242                    for &ch in value {
243                        if !ch.is_ascii_whitespace() {
244                            if [b' ', b'\t'].contains(&last_ch) && bw > 0 {
245                                hasher.write_len(b" ", &mut bw);
246                            }
247                            hasher.write_len(&[ch], &mut bw);
248                        }
249                        last_ch = ch;
250                    }
251
252                    if last_ch == b'\n' {
253                        hasher.write(b"\r\n");
254                    }
255                }
256            }
257            Canonicalization::Simple => {
258                for (name, value) in headers {
259                    hasher.write(name);
260                    hasher.write(b":");
261                    hasher.write(value);
262                }
263            }
264        }
265    }
266
267    pub fn canonical_headers<'a>(
268        &self,
269        headers: Vec<(&'a [u8], &'a [u8])>,
270    ) -> CanonicalHeaders<'a> {
271        CanonicalHeaders {
272            canonicalization: *self,
273            headers,
274        }
275    }
276
277    pub fn canonical_body<'a>(&self, body: &'a [u8], l: u64) -> CanonicalBody<'a> {
278        CanonicalBody {
279            canonicalization: *self,
280            body: if l == 0 || body.is_empty() {
281                body
282            } else {
283                &body[..std::cmp::min(l as usize, body.len())]
284            },
285        }
286    }
287
288    pub fn serialize_name(&self, writer: &mut impl Writer) {
289        writer.write(match self {
290            Canonicalization::Relaxed => b"relaxed",
291            Canonicalization::Simple => b"simple",
292        });
293    }
294}
295
296impl Signature {
297    pub fn canonicalize<'x>(
298        &self,
299        mut message: impl HeaderStream<'x>,
300    ) -> (usize, CanonicalHeaders<'x>, Vec<String>, CanonicalBody<'x>) {
301        let mut headers = Vec::with_capacity(self.h.len());
302        let mut found_headers = vec![false; self.h.len()];
303        let mut signed_headers = Vec::with_capacity(self.h.len());
304
305        while let Some((name, value)) = message.next_header() {
306            if let Some(pos) = self
307                .h
308                .iter()
309                .position(|header| name.eq_ignore_ascii_case(header.as_bytes()))
310            {
311                headers.push((name, value));
312                found_headers[pos] = true;
313                signed_headers.push(std::str::from_utf8(name).unwrap().into());
314            }
315        }
316
317        let body = message.body();
318        let body_len = body.len();
319        let canonical_headers = self.ch.canonical_headers(headers);
320        let canonical_body = self.ch.canonical_body(body, u64::MAX);
321
322        // Add any missing headers
323        signed_headers.reverse();
324        for (header, found) in self.h.iter().zip(found_headers) {
325            if !found {
326                signed_headers.push(header.to_string());
327            }
328        }
329
330        (body_len, canonical_headers, signed_headers, canonical_body)
331    }
332}
333
334pub struct CanonicalHeaders<'a> {
335    canonicalization: Canonicalization,
336    headers: Vec<(&'a [u8], &'a [u8])>,
337}
338
339impl Writable for CanonicalHeaders<'_> {
340    fn write(self, writer: &mut impl Writer) {
341        self.canonicalization
342            .canonicalize_headers(self.headers.into_iter().rev(), writer)
343    }
344}
345
346#[cfg(test)]
347mod test {
348    use mail_builder::encoders::base64::base64_encode;
349
350    use super::{BodyHasher, CanonicalBody, CanonicalHeaders};
351    use crate::{
352        common::{
353            crypto::{HashContext, HashImpl, Sha256},
354            headers::{HeaderIterator, Writable},
355        },
356        dkim::Canonicalization,
357    };
358
359    #[test]
360    #[allow(clippy::needless_collect)]
361    fn dkim_canonicalize() {
362        for (message, (relaxed_headers, relaxed_body), (simple_headers, simple_body)) in [
363            (
364                concat!(
365                    "A: X\r\n",
366                    "B : Y\t\r\n",
367                    "\tZ  \r\n",
368                    "\r\n",
369                    " C \r\n",
370                    "D \t E\r\n"
371                ),
372                (
373                    concat!("a:X\r\n", "b:Y Z\r\n",),
374                    concat!(" C\r\n", "D E\r\n"),
375                ),
376                ("A: X\r\nB : Y\t\r\n\tZ  \r\n", " C \r\nD \t E\r\n"),
377            ),
378            (
379                concat!(
380                    "  From : John\tdoe <jdoe@domain.com>\t\r\n",
381                    "SUB JECT:\ttest  \t  \r\n\r\n",
382                    " body \t   \r\n",
383                    "\r\n",
384                    "\r\n",
385                ),
386                (
387                    concat!("from:John doe <jdoe@domain.com>\r\n", "subject:test\r\n"),
388                    " body\r\n",
389                ),
390                (
391                    concat!(
392                        "  From : John\tdoe <jdoe@domain.com>\t\r\n",
393                        "SUB JECT:\ttest  \t  \r\n"
394                    ),
395                    " body \t   \r\n",
396                ),
397            ),
398            (
399                "H: value\t\r\n\r\n",
400                ("h:value\r\n", ""),
401                ("H: value\t\r\n", "\r\n"),
402            ),
403            (
404                "\tx\t: \t\t\tz\r\n\r\nabc",
405                ("x:z\r\n", "abc\r\n"),
406                ("\tx\t: \t\t\tz\r\n", "abc\r\n"),
407            ),
408            (
409                "Subject: hello\r\n\r\n\r\n",
410                ("subject:hello\r\n", ""),
411                ("Subject: hello\r\n", "\r\n"),
412            ),
413        ] {
414            let mut header_iterator = HeaderIterator::new(message.as_bytes());
415            let parsed_headers = (&mut header_iterator).collect::<Vec<_>>();
416            let raw_body = header_iterator
417                .body_offset()
418                .map(|pos| &message.as_bytes()[pos..])
419                .unwrap_or_default();
420
421            for (canonicalization, expected_headers, expected_body) in [
422                (Canonicalization::Relaxed, relaxed_headers, relaxed_body),
423                (Canonicalization::Simple, simple_headers, simple_body),
424            ] {
425                let mut headers = Vec::new();
426                CanonicalHeaders {
427                    canonicalization,
428                    headers: parsed_headers.iter().cloned().rev().collect(),
429                }
430                .write(&mut headers);
431                assert_eq!(expected_headers, String::from_utf8(headers).unwrap());
432
433                let mut body = Vec::new();
434                CanonicalBody {
435                    canonicalization,
436                    body: raw_body,
437                }
438                .write(&mut body);
439                assert_eq!(expected_body, String::from_utf8(body).unwrap());
440            }
441        }
442
443        // Test empty body hashes
444        for (canonicalization, hash) in [
445            (
446                Canonicalization::Relaxed,
447                "47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=",
448            ),
449            (
450                Canonicalization::Simple,
451                "frcCV1k9oG9oKj3dpUqdJg1PxRT2RSN/XKdLCPjaYaY=",
452            ),
453        ] {
454            for body in ["\r\n", ""] {
455                let mut hasher = Sha256::hasher();
456                CanonicalBody {
457                    canonicalization,
458                    body: body.as_bytes(),
459                }
460                .write(&mut hasher);
461
462                assert_eq!(
463                    String::from_utf8(base64_encode(hasher.finish().as_ref()).unwrap()).unwrap(),
464                    hash,
465                );
466            }
467        }
468    }
469
470    #[test]
471    fn body_hasher_matches_canonical_body() {
472        // Test that BodyHasher produces identical results to CanonicalBody
473        for (body, canonicalization) in [
474            (" C \r\nD \t E\r\n", Canonicalization::Relaxed),
475            (" C \r\nD \t E\r\n", Canonicalization::Simple),
476            (" body \t   \r\n\r\n\r\n", Canonicalization::Relaxed),
477            (" body \t   \r\n\r\n\r\n", Canonicalization::Simple),
478            ("", Canonicalization::Relaxed),
479            ("", Canonicalization::Simple),
480            ("\r\n", Canonicalization::Relaxed),
481            ("\r\n", Canonicalization::Simple),
482            ("abc", Canonicalization::Relaxed),
483            ("abc", Canonicalization::Simple),
484            ("hello world\r\n", Canonicalization::Relaxed),
485            ("hello world\r\n", Canonicalization::Simple),
486        ] {
487            // Hash using CanonicalBody
488            let mut expected_hasher = Sha256::hasher();
489            CanonicalBody {
490                canonicalization,
491                body: body.as_bytes(),
492            }
493            .write(&mut expected_hasher);
494            let expected_hash = expected_hasher.complete();
495
496            // Hash using BodyHasher (single chunk)
497            let mut body_hasher = BodyHasher::new(Sha256::hasher(), canonicalization, 0);
498            body_hasher.write(body.as_bytes());
499            let (actual_hasher, _) = body_hasher.finish();
500            let actual_hash = actual_hasher.complete();
501
502            assert_eq!(
503                expected_hash.as_ref(),
504                actual_hash.as_ref(),
505                "BodyHasher (single chunk) mismatch for body {:?} with {:?} canonicalization",
506                body,
507                canonicalization
508            );
509        }
510    }
511
512    #[test]
513    fn body_hasher_chunked_matches_single() {
514        // Test that chunked input produces same result as single input
515        let body = " C \r\nD \t E\r\nMore content here\r\n\r\n";
516
517        for canonicalization in [Canonicalization::Relaxed, Canonicalization::Simple] {
518            // Single chunk
519            let mut single_hasher = BodyHasher::new(Sha256::hasher(), canonicalization, 0);
520            single_hasher.write(body.as_bytes());
521            let (single_result, single_len) = single_hasher.finish();
522            let single_hash = single_result.complete();
523
524            // Multiple chunks - split at various points
525            for chunk_size in [1, 2, 3, 5, 7, 10] {
526                let mut chunked_hasher = BodyHasher::new(Sha256::hasher(), canonicalization, 0);
527                for chunk in body.as_bytes().chunks(chunk_size) {
528                    chunked_hasher.write(chunk);
529                }
530                let (chunked_result, chunked_len) = chunked_hasher.finish();
531                let chunked_hash = chunked_result.complete();
532
533                assert_eq!(
534                    single_hash.as_ref(),
535                    chunked_hash.as_ref(),
536                    "Chunked (size {}) mismatch for {:?} canonicalization",
537                    chunk_size,
538                    canonicalization
539                );
540                assert_eq!(single_len, chunked_len);
541            }
542        }
543    }
544
545    #[test]
546    fn body_hasher_length_limit() {
547        let body = "Hello World! This is a test body.\r\n";
548
549        for canonicalization in [Canonicalization::Relaxed, Canonicalization::Simple] {
550            // Hash with limit of 10 bytes
551            let mut limited_hasher = BodyHasher::new(Sha256::hasher(), canonicalization, 10);
552            limited_hasher.write(body.as_bytes());
553            let (limited_result, limited_len) = limited_hasher.finish();
554            let limited_hash = limited_result.complete();
555
556            // Hash the first 10 bytes using CanonicalBody
557            let mut expected_hasher = Sha256::hasher();
558            CanonicalBody {
559                canonicalization,
560                body: &body.as_bytes()[..10],
561            }
562            .write(&mut expected_hasher);
563            let expected_hash = expected_hasher.complete();
564
565            assert_eq!(
566                expected_hash.as_ref(),
567                limited_hash.as_ref(),
568                "Body length limit mismatch for {:?} canonicalization",
569                canonicalization
570            );
571            assert_eq!(limited_len, 10);
572        }
573    }
574
575    #[test]
576    fn body_hasher_split_crlf() {
577        // Test that CRLF split across chunks is handled correctly
578        let body = "Line1\r\nLine2\r\n";
579
580        for canonicalization in [Canonicalization::Relaxed, Canonicalization::Simple] {
581            // Single chunk reference
582            let mut single_hasher = BodyHasher::new(Sha256::hasher(), canonicalization, 0);
583            single_hasher.write(body.as_bytes());
584            let (single_result, _) = single_hasher.finish();
585            let single_hash = single_result.complete();
586
587            // Split right in the middle of \r\n
588            let mut split_hasher = BodyHasher::new(Sha256::hasher(), canonicalization, 0);
589            split_hasher.write(b"Line1\r");
590            split_hasher.write(b"\nLine2\r");
591            split_hasher.write(b"\n");
592            let (split_result, _) = split_hasher.finish();
593            let split_hash = split_result.complete();
594
595            assert_eq!(
596                single_hash.as_ref(),
597                split_hash.as_ref(),
598                "Split CRLF mismatch for {:?} canonicalization",
599                canonicalization
600            );
601        }
602    }
603}