mail_parser/
lib.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6#![doc = include_str!("../README.md")]
7#![deny(rust_2018_idioms)]
8#[forbid(unsafe_code)]
9pub mod core;
10pub mod decoders;
11pub mod mailbox;
12pub mod parsers;
13
14use parsers::MessageStream;
15use std::{borrow::Cow, collections::HashMap, hash::Hash, net::IpAddr};
16
17/// RFC5322/RFC822 message parser.
18#[derive(Debug, PartialEq, Eq, Clone)]
19#[allow(unpredictable_function_pointer_comparisons)]
20pub struct MessageParser {
21    pub(crate) header_map: HashMap<HeaderName<'static>, HdrParseFnc>,
22    pub(crate) def_hdr_parse_fnc: HdrParseFnc,
23}
24
25pub(crate) type HdrParseFnc = for<'x> fn(&mut MessageStream<'x>) -> crate::HeaderValue<'x>;
26
27/// An RFC5322/RFC822 message.
28#[derive(Debug, Default, PartialEq, Clone)]
29#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
30#[cfg_attr(
31    feature = "rkyv",
32    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
33)]
34pub struct Message<'x> {
35    #[cfg_attr(feature = "serde", serde(default))]
36    pub html_body: Vec<MessagePartId>,
37    #[cfg_attr(feature = "serde", serde(default))]
38    pub text_body: Vec<MessagePartId>,
39    #[cfg_attr(feature = "serde", serde(default))]
40    pub attachments: Vec<MessagePartId>,
41
42    #[cfg_attr(feature = "serde", serde(default))]
43    pub parts: Vec<MessagePart<'x>>,
44
45    #[cfg_attr(feature = "serde", serde(skip))]
46    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Skip))]
47    pub raw_message: Cow<'x, [u8]>,
48}
49
50/// MIME Message Part
51#[derive(Debug, PartialEq, Default, Clone)]
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53#[cfg_attr(
54    feature = "rkyv",
55    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
56)]
57pub struct MessagePart<'x> {
58    #[cfg_attr(feature = "serde", serde(default))]
59    pub headers: Vec<Header<'x>>,
60    pub is_encoding_problem: bool,
61    #[cfg_attr(feature = "serde", serde(default))]
62    //#[cfg_attr(feature = "rkyv", rkyv(omit_bounds))]
63    pub body: PartType<'x>,
64    #[cfg_attr(feature = "serde", serde(skip))]
65    pub encoding: Encoding,
66    pub offset_header: u32,
67    pub offset_body: u32,
68    pub offset_end: u32,
69}
70
71/// MIME Part encoding type
72#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
73#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
74#[cfg_attr(
75    feature = "rkyv",
76    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
77)]
78#[repr(u8)]
79pub enum Encoding {
80    #[default]
81    None = 0,
82    QuotedPrintable = 1,
83    Base64 = 2,
84}
85
86impl From<u8> for Encoding {
87    fn from(v: u8) -> Self {
88        match v {
89            1 => Encoding::QuotedPrintable,
90            2 => Encoding::Base64,
91            _ => Encoding::None,
92        }
93    }
94}
95
96/// Unique ID representing a MIME part within a message.
97pub type MessagePartId = u32;
98
99/// A text, binary or nested e-mail MIME message part.
100///
101/// - Text: Any text/* part
102/// - Binary: Any other part type that is not text.
103/// - Message: Nested RFC5322 message.
104/// - MultiPart: Multipart part.
105///
106#[derive(Debug, PartialEq, Clone)]
107#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
108#[cfg_attr(
109    feature = "rkyv",
110    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
111)]
112#[cfg_attr(
113    feature = "rkyv",
114    rkyv(serialize_bounds(
115        __S: rkyv::ser::Writer + rkyv::ser::Allocator,
116        __S::Error: rkyv::rancor::Source,
117    ))
118)]
119#[cfg_attr(
120    feature = "rkyv",
121    rkyv(deserialize_bounds(__D::Error: rkyv::rancor::Source))
122)]
123#[cfg_attr(
124    feature = "rkyv",
125    rkyv(bytecheck(
126        bounds(
127            __C: rkyv::validation::ArchiveContext,
128        )
129    ))
130)]
131pub enum PartType<'x> {
132    /// Any text/* part
133    Text(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, str>),
134
135    /// A text/html part
136    Html(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, str>),
137
138    /// Any other part type that is not text.
139    Binary(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, [u8]>),
140
141    /// Any inline binary data that.
142    InlineBinary(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, [u8]>),
143
144    /// Nested RFC5322 message.
145    Message(#[cfg_attr(feature = "rkyv", rkyv(omit_bounds))] Message<'x>),
146
147    /// Multipart part
148    Multipart(Vec<MessagePartId>),
149}
150
151impl Default for PartType<'_> {
152    fn default() -> Self {
153        PartType::Multipart(Vec::with_capacity(0))
154    }
155}
156
157/// An RFC5322 or RFC2369 internet address.
158#[derive(Debug, PartialEq, Eq, Clone)]
159#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
160#[cfg_attr(
161    feature = "rkyv",
162    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
163)]
164pub struct Addr<'x> {
165    /// The address name including comments
166    #[cfg_attr(feature = "serde", serde(default))]
167    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
168    pub name: Option<Cow<'x, str>>,
169
170    /// An e-mail address (RFC5322/RFC2369) or URL (RFC2369)
171    #[cfg_attr(feature = "serde", serde(default))]
172    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
173    pub address: Option<Cow<'x, str>>,
174}
175
176/// An RFC5322 address group.
177#[derive(Debug, PartialEq, Eq, Clone)]
178#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
179#[cfg_attr(
180    feature = "rkyv",
181    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
182)]
183pub struct Group<'x> {
184    /// Group name
185    #[cfg_attr(feature = "serde", serde(default))]
186    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
187    pub name: Option<Cow<'x, str>>,
188
189    /// Addresses member of the group
190    #[cfg_attr(feature = "serde", serde(default))]
191    pub addresses: Vec<Addr<'x>>,
192}
193
194/// A message header.
195#[derive(Debug, PartialEq, Eq, Clone)]
196#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
197#[cfg_attr(
198    feature = "rkyv",
199    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
200)]
201#[cfg_attr(feature = "rkyv", rkyv(compare(PartialEq)))]
202pub struct Header<'x> {
203    pub name: HeaderName<'x>,
204    pub value: HeaderValue<'x>,
205    pub offset_field: u32,
206    pub offset_start: u32,
207    pub offset_end: u32,
208}
209
210/// A header field
211#[derive(Debug, Clone, PartialOrd, Ord)]
212#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
213#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
214#[cfg_attr(
215    feature = "rkyv",
216    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
217)]
218#[cfg_attr(feature = "rkyv", rkyv(compare(PartialEq)))]
219#[non_exhaustive]
220pub enum HeaderName<'x> {
221    Subject,
222    From,
223    To,
224    Cc,
225    Date,
226    Bcc,
227    ReplyTo,
228    Sender,
229    Comments,
230    InReplyTo,
231    Keywords,
232    Received,
233    MessageId,
234    References,
235    ReturnPath,
236    MimeVersion,
237    ContentDescription,
238    ContentId,
239    ContentLanguage,
240    ContentLocation,
241    ContentTransferEncoding,
242    ContentType,
243    ContentDisposition,
244    ResentTo,
245    ResentFrom,
246    ResentBcc,
247    ResentCc,
248    ResentSender,
249    ResentDate,
250    ResentMessageId,
251    ListArchive,
252    ListHelp,
253    ListId,
254    ListOwner,
255    ListPost,
256    ListSubscribe,
257    ListUnsubscribe,
258    Other(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, str>),
259    DkimSignature,
260    ArcAuthenticationResults,
261    ArcMessageSignature,
262    ArcSeal,
263}
264
265/// Parsed header value.
266#[derive(Debug, PartialEq, Eq, Clone, Default)]
267#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
268#[cfg_attr(
269    feature = "rkyv",
270    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
271)]
272pub enum HeaderValue<'x> {
273    /// Address list or group
274    Address(Address<'x>),
275
276    /// String
277    Text(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, str>),
278
279    /// List of strings
280    TextList(
281        #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
282        Vec<Cow<'x, str>>,
283    ),
284
285    /// Datetime
286    DateTime(DateTime),
287
288    /// Content-Type or Content-Disposition header
289    ContentType(ContentType<'x>),
290
291    /// Received header
292    Received(Box<Received<'x>>),
293
294    #[default]
295    Empty,
296}
297
298#[derive(Debug, PartialEq, Eq, Clone)]
299#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
300#[cfg_attr(
301    feature = "rkyv",
302    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
303)]
304pub enum Address<'x> {
305    /// Address list
306    List(Vec<Addr<'x>>),
307    /// Group of addresses
308    Group(Vec<Group<'x>>),
309}
310
311/// Header form
312#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
313pub enum HeaderForm {
314    Raw,
315    Text,
316    Addresses,
317    GroupedAddresses,
318    MessageIds,
319    Date,
320    URLs,
321}
322/// An RFC2047 Content-Type or RFC2183 Content-Disposition MIME header field.
323#[derive(Debug, PartialEq, Eq, Clone)]
324#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
325#[cfg_attr(
326    feature = "rkyv",
327    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
328)]
329pub struct ContentType<'x> {
330    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))]
331    pub c_type: Cow<'x, str>,
332    #[cfg_attr(feature = "serde", serde(default))]
333    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
334    pub c_subtype: Option<Cow<'x, str>>,
335    #[cfg_attr(feature = "serde", serde(default))]
336    pub attributes: Option<Vec<Attribute<'x>>>,
337}
338
339#[derive(Debug, PartialEq, Eq, Clone)]
340#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
341#[cfg_attr(
342    feature = "rkyv",
343    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
344)]
345pub struct Attribute<'x> {
346    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))]
347    pub name: Cow<'x, str>,
348    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))]
349    pub value: Cow<'x, str>,
350}
351
352/// An RFC5322 datetime.
353#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
354#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
355#[cfg_attr(
356    feature = "rkyv",
357    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
358)]
359pub struct DateTime {
360    pub year: u16,
361    pub month: u8,
362    pub day: u8,
363    pub hour: u8,
364    pub minute: u8,
365    pub second: u8,
366    pub tz_before_gmt: bool,
367    pub tz_hour: u8,
368    pub tz_minute: u8,
369}
370
371#[derive(Debug, Clone, PartialEq, Eq, Default)]
372#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
373#[cfg_attr(
374    feature = "rkyv",
375    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
376)]
377pub struct Received<'x> {
378    #[cfg_attr(feature = "serde", serde(default))]
379    pub from: Option<Host<'x>>,
380    #[cfg_attr(feature = "serde", serde(default))]
381    pub from_ip: Option<IpAddr>,
382    #[cfg_attr(feature = "serde", serde(default))]
383    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
384    pub from_iprev: Option<Cow<'x, str>>,
385    #[cfg_attr(feature = "serde", serde(default))]
386    pub by: Option<Host<'x>>,
387    #[cfg_attr(feature = "serde", serde(default))]
388    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
389    pub for_: Option<Cow<'x, str>>,
390    #[cfg_attr(feature = "serde", serde(default))]
391    pub with: Option<Protocol>,
392    #[cfg_attr(feature = "serde", serde(default))]
393    pub tls_version: Option<TlsVersion>,
394    #[cfg_attr(feature = "serde", serde(default))]
395    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
396    pub tls_cipher: Option<Cow<'x, str>>,
397    #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
398    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
399    pub id: Option<Cow<'x, str>>,
400    #[cfg_attr(feature = "serde", serde(default))]
401    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
402    pub ident: Option<Cow<'x, str>>,
403    #[cfg_attr(feature = "serde", serde(default))]
404    pub helo: Option<Host<'x>>,
405    #[cfg_attr(feature = "serde", serde(default))]
406    pub helo_cmd: Option<Greeting>,
407    #[cfg_attr(feature = "serde", serde(default))]
408    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
409    pub via: Option<Cow<'x, str>>,
410    pub date: Option<DateTime>,
411}
412
413#[derive(Debug, Clone, PartialEq, Eq)]
414#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
415#[cfg_attr(
416    feature = "rkyv",
417    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
418)]
419pub enum Host<'x> {
420    Name(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, str>),
421    IpAddr(IpAddr),
422}
423
424#[derive(Debug, Clone, Copy, PartialEq, Eq)]
425#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
426#[cfg_attr(
427    feature = "rkyv",
428    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
429)]
430pub enum TlsVersion {
431    SSLv2,
432    SSLv3,
433    TLSv1_0,
434    TLSv1_1,
435    TLSv1_2,
436    TLSv1_3,
437    DTLSv1_0,
438    DTLSv1_2,
439    DTLSv1_3,
440}
441
442#[derive(Debug, Clone, Copy, PartialEq, Eq)]
443#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
444#[cfg_attr(
445    feature = "rkyv",
446    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
447)]
448pub enum Greeting {
449    Helo,
450    Ehlo,
451    Lhlo,
452}
453
454#[derive(Debug, Clone, Copy, PartialEq, Eq)]
455#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
456#[cfg_attr(
457    feature = "rkyv",
458    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
459)]
460#[allow(clippy::upper_case_acronyms)]
461pub enum Protocol {
462    // IANA Mail Transmission Types
463    SMTP,
464    ESMTP,
465    ESMTPA,
466    ESMTPS,
467    ESMTPSA,
468    LMTP,
469    LMTPA,
470    LMTPS,
471    LMTPSA,
472    MMS,
473    UTF8SMTP,
474    UTF8SMTPA,
475    UTF8SMTPS,
476    UTF8SMTPSA,
477    UTF8LMTP,
478    UTF8LMTPA,
479    UTF8LMTPS,
480    UTF8LMTPSA,
481
482    // Non-Standard Mail Transmission Types
483    HTTP,
484    HTTPS,
485    IMAP,
486    POP3,
487    Local, // includes stdin, socket, etc.
488}
489
490/// MIME Header field access trait
491pub trait MimeHeaders<'x> {
492    /// Returns the Content-Description field
493    fn content_description(&self) -> Option<&str>;
494    /// Returns the Content-Disposition field
495    fn content_disposition(&self) -> Option<&ContentType<'_>>;
496    /// Returns the Content-ID field
497    fn content_id(&self) -> Option<&str>;
498    /// Returns the Content-Encoding field
499    fn content_transfer_encoding(&self) -> Option<&str>;
500    /// Returns the Content-Type field
501    fn content_type(&self) -> Option<&ContentType<'_>>;
502    /// Returns the Content-Language field
503    fn content_language(&self) -> &HeaderValue<'_>;
504    /// Returns the Content-Location field
505    fn content_location(&self) -> Option<&str>;
506    /// Returns the attachment name, if any.
507    fn attachment_name(&self) -> Option<&str> {
508        self.content_disposition()
509            .and_then(|cd| cd.attribute("filename"))
510            .or_else(|| self.content_type().and_then(|ct| ct.attribute("name")))
511    }
512    // Returns true is the content type matches
513    fn is_content_type(&self, type_: &str, subtype: &str) -> bool {
514        self.content_type().is_some_and(|ct| {
515            ct.c_type.eq_ignore_ascii_case(type_)
516                && ct
517                    .c_subtype
518                    .as_ref()
519                    .is_some_and(|st| st.eq_ignore_ascii_case(subtype))
520        })
521    }
522}
523
524pub trait GetHeader<'x> {
525    fn header_value(&self, name: &HeaderName<'_>) -> Option<&HeaderValue<'x>>;
526    fn header(&self, name: impl Into<HeaderName<'x>>) -> Option<&Header<'x>>;
527}
528
529struct BodyPartIterator<'x> {
530    message: &'x Message<'x>,
531    list: &'x [MessagePartId],
532    pos: i32,
533}
534
535struct AttachmentIterator<'x> {
536    message: &'x Message<'x>,
537    pos: i32,
538}