Skip to main content

email_message/
message_id.rs

1use std::fmt::Display;
2use std::str::FromStr;
3
4use crate::email::EmailAddressParseError;
5
6/// A validated RFC 5322 `Message-ID` field value.
7#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
8#[derive(Clone, Debug, PartialEq, Eq, Hash)]
9pub struct MessageId(String);
10
11impl MessageId {
12    #[must_use]
13    pub fn as_str(&self) -> &str {
14        self.0.as_str()
15    }
16}
17
18impl Display for MessageId {
19    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
20        f.write_str(self.as_str())
21    }
22}
23
24/// Reasons a string cannot be parsed as an RFC 5322 `Message-ID`.
25///
26/// ```rust
27/// use email_message::{MessageId, MessageIdParseError};
28///
29/// // Brackets are mandatory.
30/// assert_eq!(
31///     "abc@example.com".parse::<MessageId>().unwrap_err(),
32///     MessageIdParseError::MissingBrackets,
33/// );
34///
35/// // Local part validates against the addr-spec dot-atom grammar:
36/// // a leading dot is illegal.
37/// assert!(matches!(
38///     "<.bad@example.com>".parse::<MessageId>().unwrap_err(),
39///     MessageIdParseError::InvalidContent { .. },
40/// ));
41///
42/// // A well-formed Message-ID round-trips its bracketed form.
43/// let parsed = "<good@example.com>".parse::<MessageId>().unwrap();
44/// assert_eq!(parsed.as_str(), "<good@example.com>");
45/// ```
46#[derive(Debug, thiserror::Error)]
47#[non_exhaustive]
48pub enum MessageIdParseError {
49    #[error("Message-ID must be enclosed in angle brackets")]
50    MissingBrackets,
51    #[error("Message-ID contains whitespace")]
52    ContainsWhitespace,
53    #[error("Message-ID is missing the local part")]
54    MissingLocal,
55    #[error("Message-ID is missing the domain part")]
56    MissingDomain,
57    #[error("Message-ID local-part or domain is malformed")]
58    #[non_exhaustive]
59    InvalidContent {
60        #[source]
61        source: EmailAddressParseError,
62    },
63    #[error(
64        "Message-ID `id-left` uses the obsolete quoted-string form; the kernel commits to RFC 5322 dot-atom-text only"
65    )]
66    ObsoleteIdLeftForm,
67}
68
69impl PartialEq for MessageIdParseError {
70    fn eq(&self, other: &Self) -> bool {
71        // Pragmatic equality: variants compare by tag, ignoring the
72        // boxed `source` chain on `InvalidContent`. Sufficient for tests
73        // and avoids forcing `Eq` on the `addr_spec::ParseError` we
74        // transitively carry.
75        matches!(
76            (self, other),
77            (Self::MissingBrackets, Self::MissingBrackets)
78                | (Self::ContainsWhitespace, Self::ContainsWhitespace)
79                | (Self::MissingLocal, Self::MissingLocal)
80                | (Self::MissingDomain, Self::MissingDomain)
81                | (Self::InvalidContent { .. }, Self::InvalidContent { .. })
82                | (Self::ObsoleteIdLeftForm, Self::ObsoleteIdLeftForm)
83        )
84    }
85}
86
87impl Eq for MessageIdParseError {}
88
89impl FromStr for MessageId {
90    type Err = MessageIdParseError;
91
92    fn from_str(s: &str) -> Result<Self, Self::Err> {
93        let value = s.trim();
94        if !(value.starts_with('<') && value.ends_with('>') && value.len() >= 2) {
95            return Err(MessageIdParseError::MissingBrackets);
96        }
97
98        if value.chars().any(char::is_whitespace) {
99            return Err(MessageIdParseError::ContainsWhitespace);
100        }
101
102        let inner = &value[1..value.len() - 1];
103
104        // RFC 5322 §3.6.4 `id-left = dot-atom-text / obs-id-left`. The
105        // kernel commits to dot-atom-text; `obs-id-left` (which permits
106        // `quoted-string`) is the obsolete branch we deliberately reject
107        // so equality between canonical and quoted-string spellings
108        // doesn't drift (the type derives `Eq`/`Hash` over the stored
109        // bytes).
110        if inner.starts_with('"') {
111            return Err(MessageIdParseError::ObsoleteIdLeftForm);
112        }
113
114        // Empty local / empty domain are caught by addr-spec's normalize
115        // (it rejects `@example.com`, `abc@`, and `abc` for missing-`@`).
116        // We still distinguish the missing-local / missing-domain /
117        // no-`@` cases for ergonomic error messages: addr-spec returns a
118        // generic parse error for all three, but the kernel can be more
119        // specific on the obvious shape problems.
120        if let Some((local, domain)) = inner.split_once('@') {
121            if local.is_empty() {
122                return Err(MessageIdParseError::MissingLocal);
123            }
124            if domain.is_empty() {
125                return Err(MessageIdParseError::MissingDomain);
126            }
127        } else {
128            return Err(MessageIdParseError::MissingDomain);
129        }
130
131        // RFC 5321 §2.4: domain case-insensitive, local-part case-sensitive.
132        // RFC 5321 §4.1.3: literal-form domains keep their bytes. Mirrors the
133        // case-folding `EmailAddress::from_str` performs so two MessageIds that are
134        // RFC 5321-equivalent compare equal under derived `Eq`/`Hash`.
135        let parsed = addr_spec::AddrSpec::from_str(inner).map_err(|error| {
136            MessageIdParseError::InvalidContent {
137                source: EmailAddressParseError::from(error),
138            }
139        })?;
140        let is_literal = parsed.is_literal();
141        let (local, domain) = parsed.into_serialized_parts();
142        let normalized = if is_literal {
143            format!("<{local}@{domain}>")
144        } else {
145            format!("<{local}@{}>", domain.to_ascii_lowercase())
146        };
147
148        Ok(Self(normalized))
149    }
150}
151
152impl TryFrom<&str> for MessageId {
153    type Error = MessageIdParseError;
154
155    fn try_from(value: &str) -> Result<Self, Self::Error> {
156        Self::from_str(value)
157    }
158}
159
160impl From<MessageId> for String {
161    fn from(value: MessageId) -> Self {
162        value.0
163    }
164}
165
166#[cfg(feature = "serde")]
167impl serde::Serialize for MessageId {
168    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
169    where
170        S: serde::Serializer,
171    {
172        serializer.serialize_str(self.as_str())
173    }
174}
175
176#[cfg(feature = "serde")]
177impl<'de> serde::Deserialize<'de> for MessageId {
178    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
179    where
180        D: serde::Deserializer<'de>,
181    {
182        let value = String::deserialize(deserializer)?;
183        value.parse().map_err(serde::de::Error::custom)
184    }
185}
186
187#[cfg(feature = "arbitrary")]
188impl<'a> arbitrary::Arbitrary<'a> for MessageId {
189    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
190        let local = u64::arbitrary(u)?;
191        let domain = u32::arbitrary(u)?;
192        Ok(Self(format!("<{local}@{domain}.test>")))
193    }
194}
195
196#[cfg(test)]
197mod tests {
198    use super::{MessageId, MessageIdParseError};
199
200    #[test]
201    fn message_id_from_str_accepts_valid_values() {
202        let parsed = "<abc@example.com>".parse::<MessageId>();
203        assert!(parsed.is_ok(), "expected valid message id");
204    }
205
206    #[test]
207    fn message_id_from_str_rejects_missing_brackets() {
208        let parsed = "abc@example.com".parse::<MessageId>();
209        assert_eq!(parsed.unwrap_err(), MessageIdParseError::MissingBrackets);
210    }
211
212    #[test]
213    fn message_id_from_str_rejects_missing_at() {
214        // `<abc>` has no `@`; the `id-right` (domain) portion is
215        // structurally absent.
216        let parsed = "<abc>".parse::<MessageId>();
217        assert_eq!(parsed.unwrap_err(), MessageIdParseError::MissingDomain);
218    }
219
220    #[test]
221    fn message_id_from_str_rejects_whitespace() {
222        let parsed = "<abc @example.com>".parse::<MessageId>();
223        assert_eq!(parsed.unwrap_err(), MessageIdParseError::ContainsWhitespace);
224    }
225
226    #[test]
227    fn message_id_from_str_rejects_empty_local_part() {
228        let parsed = "<@example.com>".parse::<MessageId>();
229        assert_eq!(parsed.unwrap_err(), MessageIdParseError::MissingLocal);
230    }
231
232    #[test]
233    fn message_id_from_str_rejects_empty_domain() {
234        let parsed = "<abc@>".parse::<MessageId>();
235        assert_eq!(parsed.unwrap_err(), MessageIdParseError::MissingDomain);
236    }
237
238    #[test]
239    fn message_id_from_str_rejects_dot_atom_violations() {
240        // Leading dot, double dot, trailing dot in the local-part are
241        // dot-atom violations; previously slipped through.
242        for input in [
243            "<.bad@example.com>",
244            "<a..b@example.com>",
245            "<a.@example.com>",
246        ] {
247            let parsed = input.parse::<MessageId>();
248            assert!(
249                matches!(parsed, Err(MessageIdParseError::InvalidContent { .. })),
250                "expected InvalidContent for {input}, got {parsed:?}"
251            );
252        }
253    }
254
255    /// RFC 5322 §3.6.4 `id-left = dot-atom-text / obs-id-left`. The kernel
256    /// commits to dot-atom-text only; `obs-id-left` (which permits
257    /// `quoted-string`) is the obsolete branch. Accepting quoted-string
258    /// here would mean two semantically equal IDs (canonical vs
259    /// quoted-string spelling) hash and compare unequal because
260    /// `MessageId` derives `Eq`/`Hash` over the stored bytes.
261    #[test]
262    fn message_id_from_str_rejects_quoted_string_id_left() {
263        let parsed = "<\"weird\"@example.com>".parse::<MessageId>();
264        assert_eq!(parsed.unwrap_err(), MessageIdParseError::ObsoleteIdLeftForm);
265    }
266
267    #[test]
268    fn message_id_from_str_rejects_quoted_at_in_local_part() {
269        let parsed = "<\"a@b\"@example.com>".parse::<MessageId>();
270        assert_eq!(parsed.unwrap_err(), MessageIdParseError::ObsoleteIdLeftForm);
271    }
272
273    /// Two RFC 5321-equivalent message ids that differ only in domain casing
274    /// must compare equal and hash identically. Mirrors `EmailAddress`'s case-folding
275    /// guarantee.
276    #[test]
277    fn message_id_from_str_case_folds_domain() {
278        use std::collections::hash_map::DefaultHasher;
279        use std::hash::{Hash, Hasher};
280
281        let upper = "<foo@Example.COM>"
282            .parse::<MessageId>()
283            .expect("upper-case domain should parse");
284        let lower = "<foo@example.com>"
285            .parse::<MessageId>()
286            .expect("lower-case domain should parse");
287
288        assert_eq!(upper, lower);
289        assert_eq!(upper.as_str(), "<foo@example.com>");
290
291        let mut h_upper = DefaultHasher::new();
292        upper.hash(&mut h_upper);
293        let mut h_lower = DefaultHasher::new();
294        lower.hash(&mut h_lower);
295        assert_eq!(h_upper.finish(), h_lower.finish());
296    }
297}