Skip to main content

email_auth/arc/
parser.rs

1use base64::Engine;
2
3use crate::dkim::types::{Algorithm, CanonicalizationMethod};
4
5use super::types::{
6    ArcAuthenticationResults, ArcMessageSignature, ArcSeal, ArcSet, ChainValidationStatus,
7};
8
9/// ARC parse error.
10#[derive(Debug)]
11pub struct ArcParseError {
12    pub detail: String,
13}
14
15impl ArcParseError {
16    fn new(detail: impl Into<String>) -> Self {
17        Self {
18            detail: detail.into(),
19        }
20    }
21}
22
23/// Parse tag=value pairs from a header value (shared with DKIM).
24fn parse_tags(value: &str) -> Vec<(String, String)> {
25    let mut tags = Vec::new();
26    for part in value.split(';') {
27        let trimmed = part.trim();
28        if trimmed.is_empty() {
29            continue;
30        }
31        if let Some(eq_pos) = trimmed.find('=') {
32            let tag_name = trimmed[..eq_pos].trim().to_ascii_lowercase();
33            let tag_value = trimmed[eq_pos + 1..].trim().to_string();
34            tags.push((tag_name, tag_value));
35        }
36    }
37    tags
38}
39
40/// Decode base64 with whitespace removal.
41fn decode_base64(value: &str) -> Result<Vec<u8>, ArcParseError> {
42    let cleaned: String = value.chars().filter(|c| !c.is_whitespace()).collect();
43    base64::engine::general_purpose::STANDARD
44        .decode(&cleaned)
45        .map_err(|e| ArcParseError::new(format!("malformed base64: {}", e)))
46}
47
48/// Parse ARC-Authentication-Results header value.
49/// Format: `i=<N>; <authres-payload>`
50pub fn parse_aar(value: &str) -> Result<ArcAuthenticationResults, ArcParseError> {
51    let trimmed = value.trim();
52
53    // Find first semicolon — everything before is i=N
54    let semi_pos = trimmed
55        .find(';')
56        .ok_or_else(|| ArcParseError::new("AAR missing semicolon after i= tag"))?;
57
58    let i_part = trimmed[..semi_pos].trim();
59    let payload = trimmed[semi_pos + 1..].trim().to_string();
60
61    // Parse i= tag
62    let instance = parse_instance_from_part(i_part)?;
63
64    Ok(ArcAuthenticationResults {
65        instance,
66        payload,
67        raw_header: value.to_string(),
68    })
69}
70
71/// Parse i=<N> from a tag part like "i=1" or " i = 2 ".
72fn parse_instance_from_part(part: &str) -> Result<u32, ArcParseError> {
73    let trimmed = part.trim();
74    if !trimmed.to_ascii_lowercase().starts_with("i=") && !trimmed.to_ascii_lowercase().starts_with("i =") {
75        return Err(ArcParseError::new(format!(
76            "expected i= tag, got '{}'",
77            trimmed
78        )));
79    }
80    let eq_pos = trimmed
81        .find('=')
82        .ok_or_else(|| ArcParseError::new("no = in i= tag"))?;
83    let num_str = trimmed[eq_pos + 1..].trim();
84    let instance: u32 = num_str
85        .parse()
86        .map_err(|_| ArcParseError::new(format!("invalid instance number: '{}'", num_str)))?;
87    validate_instance(instance)?;
88    Ok(instance)
89}
90
91fn validate_instance(instance: u32) -> Result<(), ArcParseError> {
92    if instance < 1 || instance > 50 {
93        return Err(ArcParseError::new(format!(
94            "instance {} outside valid range 1-50",
95            instance
96        )));
97    }
98    Ok(())
99}
100
101/// Parse ARC-Message-Signature header value.
102pub fn parse_ams(value: &str) -> Result<ArcMessageSignature, ArcParseError> {
103    let tags = parse_tags(value);
104
105    // Check for duplicates
106    check_duplicate_tags(&tags)?;
107
108    let mut instance = None;
109    let mut algorithm = None;
110    let mut signature = None;
111    let mut body_hash = None;
112    let mut domain = None;
113    let mut selector = None;
114    let mut signed_headers = None;
115    let mut header_canon = CanonicalizationMethod::Relaxed;
116    let mut body_canon = CanonicalizationMethod::Relaxed;
117    let mut timestamp = None;
118    let mut body_length = None;
119
120    for (tag, val) in &tags {
121        match tag.as_str() {
122            "i" => {
123                let i: u32 = val
124                    .parse()
125                    .map_err(|_| ArcParseError::new(format!("invalid i= value: '{}'", val)))?;
126                validate_instance(i)?;
127                instance = Some(i);
128            }
129            "a" => {
130                algorithm = Some(
131                    Algorithm::parse(val)
132                        .ok_or_else(|| ArcParseError::new(format!("unknown algorithm: '{}'", val)))?,
133                );
134            }
135            "b" => {
136                signature = Some(decode_base64(val)?);
137            }
138            "bh" => {
139                body_hash = Some(decode_base64(val)?);
140            }
141            "d" => {
142                domain = Some(val.to_string());
143            }
144            "s" => {
145                selector = Some(val.to_string());
146            }
147            "h" => {
148                let hdrs: Vec<String> = val
149                    .split(':')
150                    .map(|h| h.trim().to_string())
151                    .filter(|h| !h.is_empty())
152                    .collect();
153                // RFC 8617 §5.1: h= MUST NOT include ARC-* or Authentication-Results
154                for hdr in &hdrs {
155                    let lower = hdr.to_ascii_lowercase();
156                    if lower == "arc-authentication-results"
157                        || lower == "arc-message-signature"
158                        || lower == "arc-seal"
159                        || lower == "authentication-results"
160                    {
161                        return Err(ArcParseError::new(format!(
162                            "AMS h= must not include '{}' (RFC 8617)",
163                            hdr
164                        )));
165                    }
166                }
167                signed_headers = Some(hdrs);
168            }
169            "c" => {
170                let parts: Vec<&str> = val.split('/').collect();
171                header_canon = CanonicalizationMethod::parse(parts[0])
172                    .unwrap_or(CanonicalizationMethod::Relaxed);
173                if parts.len() > 1 {
174                    body_canon = CanonicalizationMethod::parse(parts[1])
175                        .unwrap_or(CanonicalizationMethod::Relaxed);
176                }
177            }
178            "t" => {
179                timestamp = val.parse::<u64>().ok();
180            }
181            "l" => {
182                body_length = val.parse::<u64>().ok();
183            }
184            _ => {} // Unknown tags ignored
185        }
186    }
187
188    Ok(ArcMessageSignature {
189        instance: instance.ok_or_else(|| ArcParseError::new("missing required tag: i"))?,
190        algorithm: algorithm.ok_or_else(|| ArcParseError::new("missing required tag: a"))?,
191        signature: signature.ok_or_else(|| ArcParseError::new("missing required tag: b"))?,
192        body_hash: body_hash.ok_or_else(|| ArcParseError::new("missing required tag: bh"))?,
193        domain: domain.ok_or_else(|| ArcParseError::new("missing required tag: d"))?,
194        selector: selector.ok_or_else(|| ArcParseError::new("missing required tag: s"))?,
195        signed_headers: signed_headers
196            .ok_or_else(|| ArcParseError::new("missing required tag: h"))?,
197        header_canonicalization: header_canon,
198        body_canonicalization: body_canon,
199        timestamp,
200        body_length,
201        raw_header: value.to_string(),
202    })
203}
204
205/// Parse ARC-Seal header value.
206pub fn parse_seal(value: &str) -> Result<ArcSeal, ArcParseError> {
207    let tags = parse_tags(value);
208
209    check_duplicate_tags(&tags)?;
210
211    let mut instance = None;
212    let mut cv = None;
213    let mut algorithm = None;
214    let mut signature = None;
215    let mut domain = None;
216    let mut selector = None;
217    let mut timestamp = None;
218    let mut has_h_tag = false;
219
220    for (tag, val) in &tags {
221        match tag.as_str() {
222            "i" => {
223                let i: u32 = val
224                    .parse()
225                    .map_err(|_| ArcParseError::new(format!("invalid i= value: '{}'", val)))?;
226                validate_instance(i)?;
227                instance = Some(i);
228            }
229            "cv" => {
230                cv = Some(match val.to_ascii_lowercase().as_str() {
231                    "none" => ChainValidationStatus::None,
232                    "pass" => ChainValidationStatus::Pass,
233                    "fail" => ChainValidationStatus::Fail,
234                    _ => {
235                        return Err(ArcParseError::new(format!(
236                            "invalid cv= value: '{}'",
237                            val
238                        )))
239                    }
240                });
241            }
242            "a" => {
243                algorithm = Some(
244                    Algorithm::parse(val)
245                        .ok_or_else(|| ArcParseError::new(format!("unknown algorithm: '{}'", val)))?,
246                );
247            }
248            "b" => {
249                signature = Some(decode_base64(val)?);
250            }
251            "d" => {
252                domain = Some(val.to_string());
253            }
254            "s" => {
255                selector = Some(val.to_string());
256            }
257            "t" => {
258                timestamp = val.parse::<u64>().ok();
259            }
260            "h" => {
261                has_h_tag = true;
262            }
263            _ => {} // Ignore unknown
264        }
265    }
266
267    // h= tag MUST NOT be present in AS
268    if has_h_tag {
269        return Err(ArcParseError::new(
270            "ARC-Seal must not contain h= tag (RFC 8617 §4.1.3)",
271        ));
272    }
273
274    Ok(ArcSeal {
275        instance: instance.ok_or_else(|| ArcParseError::new("missing required tag: i"))?,
276        cv: cv.ok_or_else(|| ArcParseError::new("missing required tag: cv"))?,
277        algorithm: algorithm.ok_or_else(|| ArcParseError::new("missing required tag: a"))?,
278        signature: signature.ok_or_else(|| ArcParseError::new("missing required tag: b"))?,
279        domain: domain.ok_or_else(|| ArcParseError::new("missing required tag: d"))?,
280        selector: selector.ok_or_else(|| ArcParseError::new("missing required tag: s"))?,
281        timestamp,
282        raw_header: value.to_string(),
283    })
284}
285
286/// Check for duplicate tag names.
287fn check_duplicate_tags(tags: &[(String, String)]) -> Result<(), ArcParseError> {
288    for i in 0..tags.len() {
289        for j in (i + 1)..tags.len() {
290            if tags[i].0 == tags[j].0 {
291                return Err(ArcParseError::new(format!(
292                    "duplicate tag: '{}'",
293                    tags[i].0
294                )));
295            }
296        }
297    }
298    Ok(())
299}
300
301/// Collect and group ARC headers from message headers into ARC Sets.
302/// Returns ordered Vec<ArcSet> (ascending by instance) or error.
303pub fn collect_arc_sets(
304    headers: &[(&str, &str)],
305) -> Result<Vec<ArcSet>, ArcParseError> {
306    let mut aars: Vec<ArcAuthenticationResults> = Vec::new();
307    let mut amss: Vec<ArcMessageSignature> = Vec::new();
308    let mut seals: Vec<ArcSeal> = Vec::new();
309
310    for (name, value) in headers {
311        let lower = name.to_ascii_lowercase();
312        match lower.as_str() {
313            "arc-authentication-results" => {
314                aars.push(parse_aar(value)?);
315            }
316            "arc-message-signature" => {
317                amss.push(parse_ams(value)?);
318            }
319            "arc-seal" => {
320                seals.push(parse_seal(value)?);
321            }
322            _ => {}
323        }
324    }
325
326    if aars.is_empty() && amss.is_empty() && seals.is_empty() {
327        return Ok(Vec::new());
328    }
329
330    // Check max 50
331    let max_instance = aars
332        .iter()
333        .map(|a| a.instance)
334        .chain(amss.iter().map(|a| a.instance))
335        .chain(seals.iter().map(|a| a.instance))
336        .max()
337        .unwrap_or(0);
338
339    if max_instance > 50 {
340        return Err(ArcParseError::new(format!(
341            "instance {} exceeds maximum of 50",
342            max_instance
343        )));
344    }
345
346    // Group by instance
347    let mut sets: Vec<ArcSet> = Vec::new();
348    for i in 1..=max_instance {
349        let aar: Vec<_> = aars.iter().filter(|a| a.instance == i).collect();
350        let ams: Vec<_> = amss.iter().filter(|a| a.instance == i).collect();
351        let seal: Vec<_> = seals.iter().filter(|a| a.instance == i).collect();
352
353        // Each instance must have exactly one of each
354        if aar.len() != 1 || ams.len() != 1 || seal.len() != 1 {
355            if aar.is_empty() && ams.is_empty() && seal.is_empty() {
356                return Err(ArcParseError::new(format!(
357                    "gap in ARC instance sequence: missing instance {}",
358                    i
359                )));
360            }
361            if aar.len() > 1 || ams.len() > 1 || seal.len() > 1 {
362                return Err(ArcParseError::new(format!(
363                    "duplicate ARC headers for instance {}",
364                    i
365                )));
366            }
367            return Err(ArcParseError::new(format!(
368                "incomplete ARC set for instance {}: AAR={}, AMS={}, AS={}",
369                i,
370                aar.len(),
371                ams.len(),
372                seal.len()
373            )));
374        }
375
376        sets.push(ArcSet {
377            instance: i,
378            aar: aar[0].clone(),
379            ams: ams[0].clone(),
380            seal: seal[0].clone(),
381        });
382    }
383
384    Ok(sets)
385}
386
387#[cfg(test)]
388mod tests {
389    use super::*;
390
391    // ─── CHK-875: Valid AAR with instance 1 ──────────────────────────
392
393    #[test]
394    fn parse_valid_aar() {
395        let aar = parse_aar("i=1; spf=pass smtp.mailfrom=example.com").unwrap();
396        assert_eq!(aar.instance, 1);
397        assert_eq!(aar.payload, "spf=pass smtp.mailfrom=example.com");
398    }
399
400    // ─── CHK-876: Valid AMS with all required tags ───────────────────
401
402    #[test]
403    fn parse_valid_ams() {
404        let ams = parse_ams(
405            "i=1; a=rsa-sha256; d=example.com; s=sel1; \
406             b=dGVzdA==; bh=dGVzdA==; h=from:to:subject",
407        )
408        .unwrap();
409        assert_eq!(ams.instance, 1);
410        assert_eq!(ams.algorithm, Algorithm::RsaSha256);
411        assert_eq!(ams.domain, "example.com");
412        assert_eq!(ams.selector, "sel1");
413        assert_eq!(ams.signed_headers, vec!["from", "to", "subject"]);
414    }
415
416    // ─── CHK-877: Valid AS with all cv values ────────────────────────
417
418    #[test]
419    fn parse_seal_cv_none() {
420        let seal = parse_seal("i=1; cv=none; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==").unwrap();
421        assert_eq!(seal.cv, ChainValidationStatus::None);
422    }
423
424    #[test]
425    fn parse_seal_cv_pass() {
426        let seal = parse_seal("i=2; cv=pass; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==").unwrap();
427        assert_eq!(seal.cv, ChainValidationStatus::Pass);
428    }
429
430    #[test]
431    fn parse_seal_cv_fail() {
432        let seal = parse_seal("i=3; cv=fail; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==").unwrap();
433        assert_eq!(seal.cv, ChainValidationStatus::Fail);
434    }
435
436    // ─── CHK-878: AS with h= tag → Fail ─────────────────────────────
437
438    #[test]
439    fn seal_with_h_tag_fails() {
440        let result =
441            parse_seal("i=1; cv=none; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==; h=from:to");
442        assert!(result.is_err());
443        assert!(result.unwrap_err().detail.contains("h= tag"));
444    }
445
446    // ─── CHK-879: Missing required tag → Fail ────────────────────────
447
448    #[test]
449    fn ams_missing_i_tag() {
450        let result = parse_ams("a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==; bh=dGVzdA==; h=from");
451        assert!(result.is_err());
452        assert!(result.unwrap_err().detail.contains("missing required tag: i"));
453    }
454
455    #[test]
456    fn seal_missing_cv_tag() {
457        let result = parse_seal("i=1; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==");
458        assert!(result.is_err());
459        assert!(result.unwrap_err().detail.contains("missing required tag: cv"));
460    }
461
462    // ─── CHK-880: Instance 0 or 51 → Fail ───────────────────────────
463
464    #[test]
465    fn instance_0_fails() {
466        let result = parse_aar("i=0; test=pass");
467        assert!(result.is_err());
468    }
469
470    #[test]
471    fn instance_51_fails() {
472        let result = parse_aar("i=51; test=pass");
473        assert!(result.is_err());
474    }
475
476    // ─── CHK-881: Duplicate tags → Fail ──────────────────────────────
477
478    #[test]
479    fn ams_duplicate_tag_fails() {
480        let result = parse_ams(
481            "i=1; a=rsa-sha256; a=ed25519-sha256; d=ex.com; s=s1; b=dGVzdA==; bh=dGVzdA==; h=from",
482        );
483        assert!(result.is_err());
484        assert!(result.unwrap_err().detail.contains("duplicate tag"));
485    }
486
487    // ─── CHK-790: AMS no v= tag ─────────────────────────────────────
488
489    #[test]
490    fn ams_no_version_tag() {
491        // AMS should parse fine without v= — it's not required
492        let ams = parse_ams(
493            "i=1; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==; bh=dGVzdA==; h=from",
494        )
495        .unwrap();
496        assert_eq!(ams.instance, 1);
497    }
498
499    // ─── CHK-791: h= MUST NOT include ARC-* or Auth-Results ─────────
500
501    #[test]
502    fn ams_h_rejects_arc_headers() {
503        let result = parse_ams(
504            "i=1; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==; bh=dGVzdA==; h=from:arc-seal",
505        );
506        assert!(result.is_err());
507        assert!(result.unwrap_err().detail.contains("must not"));
508    }
509
510    #[test]
511    fn ams_h_rejects_authentication_results() {
512        let result = parse_ams(
513            "i=1; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==; bh=dGVzdA==; h=from:Authentication-Results",
514        );
515        assert!(result.is_err());
516    }
517
518    // ─── CHK-821: Unknown algorithm → Fail ───────────────────────────
519
520    #[test]
521    fn unknown_algorithm_fails() {
522        let result = parse_ams(
523            "i=1; a=bad-algo; d=ex.com; s=s1; b=dGVzdA==; bh=dGVzdA==; h=from",
524        );
525        assert!(result.is_err());
526        assert!(result.unwrap_err().detail.contains("unknown algorithm"));
527    }
528
529    // ─── CHK-820: Malformed base64 → Fail ────────────────────────────
530
531    #[test]
532    fn malformed_base64_fails() {
533        let result = parse_ams(
534            "i=1; a=rsa-sha256; d=ex.com; s=s1; b=!!!not-base64!!!; bh=dGVzdA==; h=from",
535        );
536        assert!(result.is_err());
537        assert!(result.unwrap_err().detail.contains("base64"));
538    }
539
540    // ─── CHK-804-808: collect_arc_sets tests ─────────────────────────
541
542    #[test]
543    fn collect_empty_headers() {
544        let sets = collect_arc_sets(&[("from", "test@example.com")]).unwrap();
545        assert!(sets.is_empty());
546    }
547
548    #[test]
549    fn collect_single_valid_set() {
550        let headers = vec![
551            ("ARC-Authentication-Results", "i=1; spf=pass"),
552            (
553                "ARC-Message-Signature",
554                "i=1; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==; bh=dGVzdA==; h=from",
555            ),
556            (
557                "ARC-Seal",
558                "i=1; cv=none; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==",
559            ),
560        ];
561        let sets = collect_arc_sets(&headers).unwrap();
562        assert_eq!(sets.len(), 1);
563        assert_eq!(sets[0].instance, 1);
564    }
565
566    // ─── CHK-824: Instance gaps → Fail ───────────────────────────────
567
568    #[test]
569    fn collect_instance_gap_fails() {
570        let headers = vec![
571            ("ARC-Authentication-Results", "i=1; spf=pass"),
572            (
573                "ARC-Message-Signature",
574                "i=1; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==; bh=dGVzdA==; h=from",
575            ),
576            (
577                "ARC-Seal",
578                "i=1; cv=none; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==",
579            ),
580            // Skip instance 2, jump to 3
581            ("ARC-Authentication-Results", "i=3; spf=pass"),
582            (
583                "ARC-Message-Signature",
584                "i=3; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==; bh=dGVzdA==; h=from",
585            ),
586            (
587                "ARC-Seal",
588                "i=3; cv=pass; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==",
589            ),
590        ];
591        let result = collect_arc_sets(&headers);
592        assert!(result.is_err());
593        assert!(result.unwrap_err().detail.contains("gap"));
594    }
595
596    // ─── CHK-823: Duplicate headers same instance → Fail ─────────────
597
598    #[test]
599    fn collect_duplicate_instance_fails() {
600        let headers = vec![
601            ("ARC-Authentication-Results", "i=1; spf=pass"),
602            ("ARC-Authentication-Results", "i=1; dkim=pass"), // duplicate
603            (
604                "ARC-Message-Signature",
605                "i=1; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==; bh=dGVzdA==; h=from",
606            ),
607            (
608                "ARC-Seal",
609                "i=1; cv=none; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==",
610            ),
611        ];
612        let result = collect_arc_sets(&headers);
613        assert!(result.is_err());
614        assert!(result.unwrap_err().detail.contains("duplicate"));
615    }
616
617    // ─── AMS c= tag parsing ─────────────────────────────────────────
618
619    #[test]
620    fn ams_c_tag_parsing() {
621        let ams = parse_ams(
622            "i=1; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==; bh=dGVzdA==; h=from; c=relaxed/simple",
623        )
624        .unwrap();
625        assert_eq!(ams.header_canonicalization, CanonicalizationMethod::Relaxed);
626        assert_eq!(ams.body_canonicalization, CanonicalizationMethod::Simple);
627    }
628
629    // ─── AMS optional tags ──────────────────────────────────────────
630
631    #[test]
632    fn ams_optional_tags() {
633        let ams = parse_ams(
634            "i=1; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==; bh=dGVzdA==; h=from; t=1700000000; l=100",
635        )
636        .unwrap();
637        assert_eq!(ams.timestamp, Some(1700000000));
638        assert_eq!(ams.body_length, Some(100));
639    }
640
641    // ─── AS optional t= tag ─────────────────────────────────────────
642
643    #[test]
644    fn seal_optional_timestamp() {
645        let seal = parse_seal(
646            "i=1; cv=none; a=rsa-sha256; d=ex.com; s=s1; b=dGVzdA==; t=1700000000",
647        )
648        .unwrap();
649        assert_eq!(seal.timestamp, Some(1700000000));
650    }
651}