Skip to main content

mailrs_arc/
chain.rs

1//! ARC chain extraction.
2//!
3//! Given a raw RFC 5322 message, group every `ARC-Authentication-Results`
4//! / `ARC-Message-Signature` / `ARC-Seal` header by its `i=N` instance
5//! number to form a per-instance [`ArcSet`], then validate the chain is
6//! contiguous from `i=1` upward with no gaps (RFC 8617 §5.2 step 3).
7
8use crate::error::ArcError;
9use crate::header::{ArcAuthResults, ArcMessageSignature, ArcSeal, MAX_INSTANCE};
10
11/// One ARC instance's triplet of headers.
12#[derive(Debug, Clone)]
13pub struct ArcSet {
14    /// Instance number (1..=50).
15    pub i: u32,
16    /// Parsed `ARC-Authentication-Results` header.
17    pub aar: ArcAuthResults,
18    /// Parsed `ARC-Message-Signature` header.
19    pub ams: ArcMessageSignature,
20    /// Parsed `ARC-Seal` header.
21    pub seal: ArcSeal,
22    /// Verbatim header values, in the order they appeared in the
23    /// original message. Needed by the seal verifier (it canonicalizes
24    /// the prior chain headers as input to its hash).
25    pub raw_aar: String,
26    /// see [`Self::raw_aar`].
27    pub raw_ams: String,
28    /// see [`Self::raw_aar`].
29    pub raw_seal: String,
30}
31
32/// A complete ARC chain extracted from a message.
33#[derive(Debug, Clone)]
34pub struct ArcChain {
35    /// Sets in ascending instance order: `sets[0].i == 1`, etc.
36    pub sets: Vec<ArcSet>,
37}
38
39impl ArcChain {
40    /// Walk the header block of `raw_message` and assemble every
41    /// complete ARC instance into an [`ArcSet`]. The header block ends
42    /// at the first CRLF CRLF (or LF LF) per RFC 5322 §2.1.
43    ///
44    /// Returns `Ok(None)` if there are zero ARC headers (the message
45    /// is unsigned by any forwarder — DMARC then evaluates as normal).
46    ///
47    /// Returns `Err(ArcError::IncompleteSet)` if any instance has only
48    /// 1 or 2 of the 3 required headers, and
49    /// `Err(ArcError::NonContiguousChain)` if instances don't form a
50    /// contiguous run starting at 1.
51    pub fn extract(raw_message: &[u8]) -> Result<Option<Self>, ArcError> {
52        let header_block = take_header_block(raw_message);
53        let mut by_instance: std::collections::BTreeMap<u32, PartialSet> =
54            std::collections::BTreeMap::new();
55
56        for (name, value) in unfold_headers(header_block) {
57            let name_lower = name.to_ascii_lowercase();
58            match name_lower.as_str() {
59                "arc-authentication-results" => {
60                    let aar = ArcAuthResults::parse(&value)?;
61                    let i = aar.instance;
62                    by_instance.entry(i).or_default().aar = Some((aar, value));
63                }
64                "arc-message-signature" => {
65                    let ams = ArcMessageSignature::parse(&value)?;
66                    let i = ams.instance;
67                    by_instance.entry(i).or_default().ams = Some((ams, value));
68                }
69                "arc-seal" => {
70                    let seal = ArcSeal::parse(&value)?;
71                    let i = seal.instance;
72                    by_instance.entry(i).or_default().seal = Some((seal, value));
73                }
74                _ => {}
75            }
76        }
77
78        if by_instance.is_empty() {
79            return Ok(None);
80        }
81
82        // Validate completeness + contiguity. RFC 8617 §5.1 forbids
83        // sparse chains. Walk i=1, 2, 3, … and require each set to be
84        // complete.
85        let mut sets: Vec<ArcSet> = Vec::with_capacity(by_instance.len());
86        for expected_i in 1..=MAX_INSTANCE {
87            match by_instance.remove(&expected_i) {
88                Some(partial) => sets.push(partial.complete(expected_i)?),
89                None => {
90                    if by_instance.is_empty() {
91                        // Reached the end naturally.
92                        break;
93                    }
94                    return Err(ArcError::NonContiguousChain { missing: expected_i });
95                }
96            }
97        }
98        if !by_instance.is_empty() {
99            return Err(ArcError::ChainTooLong(sets.len() + by_instance.len()));
100        }
101
102        Ok(Some(Self { sets }))
103    }
104
105    /// Highest instance number in the chain.
106    pub fn highest_instance(&self) -> u32 {
107        self.sets.last().map(|s| s.i).unwrap_or(0)
108    }
109}
110
111#[derive(Default)]
112struct PartialSet {
113    aar: Option<(ArcAuthResults, String)>,
114    ams: Option<(ArcMessageSignature, String)>,
115    seal: Option<(ArcSeal, String)>,
116}
117
118impl PartialSet {
119    fn complete(self, i: u32) -> Result<ArcSet, ArcError> {
120        let (aar, raw_aar) = self
121            .aar
122            .ok_or(ArcError::IncompleteSet { instance: i, missing: "aar" })?;
123        let (ams, raw_ams) = self
124            .ams
125            .ok_or(ArcError::IncompleteSet { instance: i, missing: "ams" })?;
126        let (seal, raw_seal) = self
127            .seal
128            .ok_or(ArcError::IncompleteSet { instance: i, missing: "seal" })?;
129        Ok(ArcSet { i, aar, ams, seal, raw_aar, raw_ams, raw_seal })
130    }
131}
132
133/// Extract the header block — everything before the first CRLF CRLF
134/// (or LF LF) — and return it as a `&[u8]`. If the separator isn't
135/// found, the whole buffer is treated as headers (unusual but legal
136/// for a header-only message).
137fn take_header_block(raw: &[u8]) -> &[u8] {
138    // Look for CRLF CRLF first; fall back to LF LF.
139    if let Some(pos) = find_subseq(raw, b"\r\n\r\n") {
140        &raw[..pos]
141    } else if let Some(pos) = find_subseq(raw, b"\n\n") {
142        &raw[..pos]
143    } else {
144        raw
145    }
146}
147
148fn find_subseq(hay: &[u8], needle: &[u8]) -> Option<usize> {
149    if needle.is_empty() || hay.len() < needle.len() {
150        return None;
151    }
152    for i in 0..=(hay.len() - needle.len()) {
153        if &hay[i..i + needle.len()] == needle {
154            return Some(i);
155        }
156    }
157    None
158}
159
160/// Iterator over `(name, unfolded_value)` headers from a header block.
161/// Continuation lines (CRLF + WSP) are joined back into the value, per
162/// RFC 5322 §2.2.3.
163fn unfold_headers(block: &[u8]) -> Vec<(String, String)> {
164    let mut out = Vec::new();
165    let mut lines: Vec<Vec<u8>> = Vec::new();
166    let mut cur: Vec<u8> = Vec::new();
167
168    for &b in block {
169        if b == b'\n' {
170            // Trim trailing \r if present.
171            if cur.last() == Some(&b'\r') {
172                cur.pop();
173            }
174            lines.push(std::mem::take(&mut cur));
175        } else {
176            cur.push(b);
177        }
178    }
179    if !cur.is_empty() {
180        lines.push(cur);
181    }
182
183    let mut i = 0usize;
184    while i < lines.len() {
185        let line = &lines[i];
186        if line.is_empty() {
187            i += 1;
188            continue;
189        }
190        // Find ':' separating name from value.
191        let Some(colon) = line.iter().position(|&c| c == b':') else {
192            i += 1;
193            continue;
194        };
195        let name = std::str::from_utf8(&line[..colon])
196            .unwrap_or_default()
197            .trim()
198            .to_string();
199        let mut value: Vec<u8> = line[colon + 1..].to_vec();
200        // Trim leading WSP after the colon — RFC 5322 says exactly one
201        // SP is canonical but in the wild it's "any amount".
202        while value.first().map(|b| matches!(b, b' ' | b'\t')).unwrap_or(false) {
203            value.remove(0);
204        }
205        // Pull in continuation lines.
206        i += 1;
207        while i < lines.len()
208            && lines[i]
209                .first()
210                .map(|b| matches!(b, b' ' | b'\t'))
211                .unwrap_or(false)
212        {
213            value.push(b' ');
214            // Skip leading WSP of the continuation line, then append.
215            let mut j = 0;
216            while j < lines[i].len() && matches!(lines[i][j], b' ' | b'\t') {
217                j += 1;
218            }
219            value.extend_from_slice(&lines[i][j..]);
220            i += 1;
221        }
222        let value_str = String::from_utf8_lossy(&value).into_owned();
223        out.push((name, value_str));
224    }
225    out
226}
227
228#[cfg(test)]
229mod tests {
230    use super::*;
231
232    const AAR1: &str = "ARC-Authentication-Results: i=1; spf=pass smtp.mailfrom=alice@example.com\r\n";
233    const AMS1: &str = "ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=example.com; s=mail; h=From:To:Subject; bh=BH1; b=SIG1\r\n";
234    const AS1: &str = "ARC-Seal: i=1; a=rsa-sha256; cv=none; d=example.com; s=mail; b=SEAL1\r\n";
235
236    const AAR2: &str = "ARC-Authentication-Results: i=2; dkim=pass header.d=forwarder.example\r\n";
237    const AMS2: &str = "ARC-Message-Signature: i=2; a=rsa-sha256; c=relaxed/relaxed; d=forwarder.example; s=mail; h=From:To:Subject; bh=BH2; b=SIG2\r\n";
238    const AS2: &str = "ARC-Seal: i=2; a=rsa-sha256; cv=pass; d=forwarder.example; s=mail; b=SEAL2\r\n";
239
240    fn message_with(headers: &[&str]) -> Vec<u8> {
241        let mut out = Vec::new();
242        for h in headers {
243            out.extend_from_slice(h.as_bytes());
244        }
245        out.extend_from_slice(b"From: alice@example.com\r\nSubject: t\r\n\r\nbody");
246        out
247    }
248
249    #[test]
250    fn extract_no_arc_returns_none() {
251        let msg = b"From: a@b.c\r\nSubject: hi\r\n\r\nbody";
252        let chain = ArcChain::extract(msg).unwrap();
253        assert!(chain.is_none());
254    }
255
256    #[test]
257    fn extract_single_set_chain() {
258        let msg = message_with(&[AAR1, AMS1, AS1]);
259        let chain = ArcChain::extract(&msg).unwrap().unwrap();
260        assert_eq!(chain.sets.len(), 1);
261        assert_eq!(chain.sets[0].i, 1);
262        assert_eq!(chain.sets[0].seal.cv, crate::header::ArcSealCv::None);
263    }
264
265    #[test]
266    fn extract_two_hop_chain() {
267        let msg = message_with(&[AAR1, AMS1, AS1, AAR2, AMS2, AS2]);
268        let chain = ArcChain::extract(&msg).unwrap().unwrap();
269        assert_eq!(chain.sets.len(), 2);
270        assert_eq!(chain.sets[0].i, 1);
271        assert_eq!(chain.sets[1].i, 2);
272        assert_eq!(chain.sets[1].seal.cv, crate::header::ArcSealCv::Pass);
273    }
274
275    #[test]
276    fn extract_header_order_independent() {
277        // Even if a forwarder put i=2 headers before i=1 (unusual but
278        // legal), extraction must succeed and order them by instance.
279        let msg = message_with(&[AAR2, AMS2, AS2, AAR1, AMS1, AS1]);
280        let chain = ArcChain::extract(&msg).unwrap().unwrap();
281        assert_eq!(chain.sets.len(), 2);
282        assert_eq!(chain.sets[0].i, 1);
283        assert_eq!(chain.sets[1].i, 2);
284    }
285
286    #[test]
287    fn extract_rejects_incomplete_set() {
288        // i=1 missing the seal.
289        let msg = message_with(&[AAR1, AMS1]);
290        let r = ArcChain::extract(&msg);
291        assert!(matches!(
292            r,
293            Err(ArcError::IncompleteSet { instance: 1, missing: "seal" })
294        ));
295    }
296
297    #[test]
298    fn extract_rejects_non_contiguous_chain() {
299        // i=1 + i=3 with i=2 missing entirely.
300        const AAR3: &str = "ARC-Authentication-Results: i=3; dkim=pass\r\n";
301        const AMS3: &str = "ARC-Message-Signature: i=3; a=rsa-sha256; d=x.example; s=mail; h=From; bh=BH3; b=SIG3\r\n";
302        const AS3: &str = "ARC-Seal: i=3; a=rsa-sha256; cv=pass; d=x.example; s=mail; b=SEAL3\r\n";
303        let msg = message_with(&[AAR1, AMS1, AS1, AAR3, AMS3, AS3]);
304        let r = ArcChain::extract(&msg);
305        assert!(matches!(r, Err(ArcError::NonContiguousChain { missing: 2 })));
306    }
307
308    #[test]
309    fn extract_handles_folded_headers() {
310        let folded =
311            "ARC-Message-Signature: i=1; a=rsa-sha256;\r\n c=relaxed/relaxed;\r\n d=example.com;\r\n s=mail; h=From:To:Subject; bh=BH1; b=SIG1\r\n";
312        let msg = {
313            let mut v = Vec::new();
314            v.extend_from_slice(AAR1.as_bytes());
315            v.extend_from_slice(folded.as_bytes());
316            v.extend_from_slice(AS1.as_bytes());
317            v.extend_from_slice(b"From: alice@example.com\r\n\r\nbody");
318            v
319        };
320        let chain = ArcChain::extract(&msg).unwrap().unwrap();
321        assert_eq!(chain.sets.len(), 1);
322        assert_eq!(chain.sets[0].ams.canon_body, crate::header::Canon::Relaxed);
323    }
324
325    #[test]
326    fn highest_instance_returns_last() {
327        let msg = message_with(&[AAR1, AMS1, AS1, AAR2, AMS2, AS2]);
328        let chain = ArcChain::extract(&msg).unwrap().unwrap();
329        assert_eq!(chain.highest_instance(), 2);
330    }
331
332    #[test]
333    fn extract_preserves_raw_values() {
334        let msg = message_with(&[AAR1, AMS1, AS1]);
335        let chain = ArcChain::extract(&msg).unwrap().unwrap();
336        let set = &chain.sets[0];
337        assert!(set.raw_aar.contains("spf=pass"));
338        assert!(set.raw_ams.contains("BH1"));
339        assert!(set.raw_seal.contains("SEAL1"));
340    }
341}