mail_parser/
lib.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6#![doc = include_str!("../README.md")]
7#![deny(rust_2018_idioms)]
8#[forbid(unsafe_code)]
9pub mod core;
10pub mod decoders;
11pub mod mailbox;
12pub mod parsers;
13
14use parsers::MessageStream;
15use std::{borrow::Cow, collections::HashMap, hash::Hash, net::IpAddr};
16
17/// RFC5322/RFC822 message parser.
18#[derive(Debug, PartialEq, Eq, Clone)]
19pub struct MessageParser {
20    pub(crate) header_map: HashMap<HeaderName<'static>, HdrParseFnc>,
21    pub(crate) def_hdr_parse_fnc: HdrParseFnc,
22}
23
24pub(crate) type HdrParseFnc = for<'x> fn(&mut MessageStream<'x>) -> crate::HeaderValue<'x>;
25
26/// An RFC5322/RFC822 message.
27#[derive(Debug, Default, PartialEq, Clone)]
28#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
29#[cfg_attr(
30    feature = "rkyv",
31    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
32)]
33pub struct Message<'x> {
34    #[cfg_attr(feature = "serde", serde(default))]
35    pub html_body: Vec<MessagePartId>,
36    #[cfg_attr(feature = "serde", serde(default))]
37    pub text_body: Vec<MessagePartId>,
38    #[cfg_attr(feature = "serde", serde(default))]
39    pub attachments: Vec<MessagePartId>,
40
41    #[cfg_attr(feature = "serde", serde(default))]
42    pub parts: Vec<MessagePart<'x>>,
43
44    #[cfg_attr(feature = "serde", serde(skip))]
45    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Skip))]
46    pub raw_message: Cow<'x, [u8]>,
47}
48
49/// MIME Message Part
50#[derive(Debug, PartialEq, Default, Clone)]
51#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
52#[cfg_attr(
53    feature = "rkyv",
54    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
55)]
56pub struct MessagePart<'x> {
57    #[cfg_attr(feature = "serde", serde(default))]
58    pub headers: Vec<Header<'x>>,
59    pub is_encoding_problem: bool,
60    #[cfg_attr(feature = "serde", serde(default))]
61    //#[cfg_attr(feature = "rkyv", rkyv(omit_bounds))]
62    pub body: PartType<'x>,
63    #[cfg_attr(feature = "serde", serde(skip))]
64    pub encoding: Encoding,
65    pub offset_header: u32,
66    pub offset_body: u32,
67    pub offset_end: u32,
68}
69
70/// MIME Part encoding type
71#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
72#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
73#[cfg_attr(
74    feature = "rkyv",
75    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
76)]
77#[repr(u8)]
78pub enum Encoding {
79    #[default]
80    None = 0,
81    QuotedPrintable = 1,
82    Base64 = 2,
83}
84
85impl From<u8> for Encoding {
86    fn from(v: u8) -> Self {
87        match v {
88            1 => Encoding::QuotedPrintable,
89            2 => Encoding::Base64,
90            _ => Encoding::None,
91        }
92    }
93}
94
95/// Unique ID representing a MIME part within a message.
96pub type MessagePartId = u32;
97
98/// A text, binary or nested e-mail MIME message part.
99///
100/// - Text: Any text/* part
101/// - Binary: Any other part type that is not text.
102/// - Message: Nested RFC5322 message.
103/// - MultiPart: Multipart part.
104///
105#[derive(Debug, PartialEq, Clone)]
106#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
107#[cfg_attr(
108    feature = "rkyv",
109    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
110)]
111#[cfg_attr(
112    feature = "rkyv",
113    rkyv(serialize_bounds(
114        __S: rkyv::ser::Writer + rkyv::ser::Allocator,
115        __S::Error: rkyv::rancor::Source,
116    ))
117)]
118#[cfg_attr(
119    feature = "rkyv",
120    rkyv(deserialize_bounds(__D::Error: rkyv::rancor::Source))
121)]
122#[cfg_attr(
123    feature = "rkyv",
124    rkyv(bytecheck(
125        bounds(
126            __C: rkyv::validation::ArchiveContext,
127        )
128    ))
129)]
130pub enum PartType<'x> {
131    /// Any text/* part
132    Text(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, str>),
133
134    /// A text/html part
135    Html(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, str>),
136
137    /// Any other part type that is not text.
138    Binary(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, [u8]>),
139
140    /// Any inline binary data that.
141    InlineBinary(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, [u8]>),
142
143    /// Nested RFC5322 message.
144    Message(#[cfg_attr(feature = "rkyv", rkyv(omit_bounds))] Message<'x>),
145
146    /// Multipart part
147    Multipart(Vec<MessagePartId>),
148}
149
150impl Default for PartType<'_> {
151    fn default() -> Self {
152        PartType::Multipart(Vec::with_capacity(0))
153    }
154}
155
156/// An RFC5322 or RFC2369 internet address.
157#[derive(Debug, PartialEq, Eq, Clone)]
158#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
159#[cfg_attr(
160    feature = "rkyv",
161    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
162)]
163
164pub struct Addr<'x> {
165    /// The address name including comments
166    #[cfg_attr(feature = "serde", serde(default))]
167    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
168    pub name: Option<Cow<'x, str>>,
169
170    /// An e-mail address (RFC5322/RFC2369) or URL (RFC2369)
171    #[cfg_attr(feature = "serde", serde(default))]
172    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
173    pub address: Option<Cow<'x, str>>,
174}
175
176/// An RFC5322 address group.
177#[derive(Debug, PartialEq, Eq, Clone)]
178#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
179#[cfg_attr(
180    feature = "rkyv",
181    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
182)]
183
184pub struct Group<'x> {
185    /// Group name
186    #[cfg_attr(feature = "serde", serde(default))]
187    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
188    pub name: Option<Cow<'x, str>>,
189
190    /// Addresses member of the group
191    #[cfg_attr(feature = "serde", serde(default))]
192    pub addresses: Vec<Addr<'x>>,
193}
194
195/// A message header.
196#[derive(Debug, PartialEq, Eq, Clone)]
197#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
198#[cfg_attr(
199    feature = "rkyv",
200    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
201)]
202#[cfg_attr(feature = "rkyv", rkyv(compare(PartialEq)))]
203pub struct Header<'x> {
204    pub name: HeaderName<'x>,
205    pub value: HeaderValue<'x>,
206    pub offset_field: u32,
207    pub offset_start: u32,
208    pub offset_end: u32,
209}
210
211/// A header field
212#[derive(Debug, Clone, PartialOrd, Ord)]
213#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
214#[cfg_attr(feature = "serde", serde(rename_all = "snake_case"))]
215#[cfg_attr(
216    feature = "rkyv",
217    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
218)]
219#[cfg_attr(feature = "rkyv", rkyv(compare(PartialEq)))]
220#[non_exhaustive]
221pub enum HeaderName<'x> {
222    Subject,
223    From,
224    To,
225    Cc,
226    Date,
227    Bcc,
228    ReplyTo,
229    Sender,
230    Comments,
231    InReplyTo,
232    Keywords,
233    Received,
234    MessageId,
235    References,
236    ReturnPath,
237    MimeVersion,
238    ContentDescription,
239    ContentId,
240    ContentLanguage,
241    ContentLocation,
242    ContentTransferEncoding,
243    ContentType,
244    ContentDisposition,
245    ResentTo,
246    ResentFrom,
247    ResentBcc,
248    ResentCc,
249    ResentSender,
250    ResentDate,
251    ResentMessageId,
252    ListArchive,
253    ListHelp,
254    ListId,
255    ListOwner,
256    ListPost,
257    ListSubscribe,
258    ListUnsubscribe,
259    Other(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, str>),
260    DkimSignature,
261    ArcAuthenticationResults,
262    ArcMessageSignature,
263    ArcSeal,
264}
265
266/// Parsed header value.
267#[derive(Debug, PartialEq, Eq, Clone, Default)]
268#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
269#[cfg_attr(
270    feature = "rkyv",
271    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
272)]
273
274pub enum HeaderValue<'x> {
275    /// Address list or group
276    Address(Address<'x>),
277
278    /// String
279    Text(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, str>),
280
281    /// List of strings
282    TextList(
283        #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
284        Vec<Cow<'x, str>>,
285    ),
286
287    /// Datetime
288    DateTime(DateTime),
289
290    /// Content-Type or Content-Disposition header
291    ContentType(ContentType<'x>),
292
293    /// Received header
294    Received(Box<Received<'x>>),
295
296    #[default]
297    Empty,
298}
299
300#[derive(Debug, PartialEq, Eq, Clone)]
301#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
302#[cfg_attr(
303    feature = "rkyv",
304    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
305)]
306pub enum Address<'x> {
307    /// Address list
308    List(Vec<Addr<'x>>),
309    /// Group of addresses
310    Group(Vec<Group<'x>>),
311}
312
313/// Header form
314#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
315pub enum HeaderForm {
316    Raw,
317    Text,
318    Addresses,
319    GroupedAddresses,
320    MessageIds,
321    Date,
322    URLs,
323}
324/// An RFC2047 Content-Type or RFC2183 Content-Disposition MIME header field.
325#[derive(Debug, PartialEq, Eq, Clone)]
326#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
327#[cfg_attr(
328    feature = "rkyv",
329    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
330)]
331pub struct ContentType<'x> {
332    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))]
333    pub c_type: Cow<'x, str>,
334    #[cfg_attr(feature = "serde", serde(default))]
335    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
336    pub c_subtype: Option<Cow<'x, str>>,
337    #[cfg_attr(feature = "serde", serde(default))]
338    pub attributes: Option<Vec<Attribute<'x>>>,
339}
340
341#[derive(Debug, PartialEq, Eq, Clone)]
342#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
343#[cfg_attr(
344    feature = "rkyv",
345    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
346)]
347pub struct Attribute<'x> {
348    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))]
349    pub name: Cow<'x, str>,
350    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))]
351    pub value: Cow<'x, str>,
352}
353
354/// An RFC5322 datetime.
355#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)]
356#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
357#[cfg_attr(
358    feature = "rkyv",
359    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
360)]
361pub struct DateTime {
362    pub year: u16,
363    pub month: u8,
364    pub day: u8,
365    pub hour: u8,
366    pub minute: u8,
367    pub second: u8,
368    pub tz_before_gmt: bool,
369    pub tz_hour: u8,
370    pub tz_minute: u8,
371}
372
373#[derive(Debug, Clone, PartialEq, Eq, Default)]
374#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
375#[cfg_attr(
376    feature = "rkyv",
377    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
378)]
379pub struct Received<'x> {
380    #[cfg_attr(feature = "serde", serde(default))]
381    pub from: Option<Host<'x>>,
382    #[cfg_attr(feature = "serde", serde(default))]
383    pub from_ip: Option<IpAddr>,
384    #[cfg_attr(feature = "serde", serde(default))]
385    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
386    pub from_iprev: Option<Cow<'x, str>>,
387    #[cfg_attr(feature = "serde", serde(default))]
388    pub by: Option<Host<'x>>,
389    #[cfg_attr(feature = "serde", serde(default))]
390    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
391    pub for_: Option<Cow<'x, str>>,
392    #[cfg_attr(feature = "serde", serde(default))]
393    pub with: Option<Protocol>,
394    #[cfg_attr(feature = "serde", serde(default))]
395    pub tls_version: Option<TlsVersion>,
396    #[cfg_attr(feature = "serde", serde(default))]
397    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
398    pub tls_cipher: Option<Cow<'x, str>>,
399    #[cfg_attr(feature = "serde", serde(skip_serializing_if = "Option::is_none"))]
400    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
401    pub id: Option<Cow<'x, str>>,
402    #[cfg_attr(feature = "serde", serde(default))]
403    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
404    pub ident: Option<Cow<'x, str>>,
405    #[cfg_attr(feature = "serde", serde(default))]
406    pub helo: Option<Host<'x>>,
407    #[cfg_attr(feature = "serde", serde(default))]
408    pub helo_cmd: Option<Greeting>,
409    #[cfg_attr(feature = "serde", serde(default))]
410    #[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::Map<rkyv::with::AsOwned>))]
411    pub via: Option<Cow<'x, str>>,
412    pub date: Option<DateTime>,
413}
414
415#[derive(Debug, Clone, PartialEq, Eq)]
416#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
417#[cfg_attr(
418    feature = "rkyv",
419    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
420)]
421
422pub enum Host<'x> {
423    Name(#[cfg_attr(feature = "rkyv", rkyv(with = rkyv::with::AsOwned))] Cow<'x, str>),
424    IpAddr(IpAddr),
425}
426
427#[derive(Debug, Clone, Copy, PartialEq, Eq)]
428#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
429#[cfg_attr(
430    feature = "rkyv",
431    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
432)]
433
434pub enum TlsVersion {
435    SSLv2,
436    SSLv3,
437    TLSv1_0,
438    TLSv1_1,
439    TLSv1_2,
440    TLSv1_3,
441    DTLSv1_0,
442    DTLSv1_2,
443    DTLSv1_3,
444}
445
446#[derive(Debug, Clone, Copy, PartialEq, Eq)]
447#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
448#[cfg_attr(
449    feature = "rkyv",
450    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
451)]
452
453pub enum Greeting {
454    Helo,
455    Ehlo,
456    Lhlo,
457}
458
459#[derive(Debug, Clone, Copy, PartialEq, Eq)]
460#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
461#[cfg_attr(
462    feature = "rkyv",
463    derive(rkyv::Serialize, rkyv::Deserialize, rkyv::Archive)
464)]
465#[allow(clippy::upper_case_acronyms)]
466pub enum Protocol {
467    // IANA Mail Transmission Types
468    SMTP,
469    ESMTP,
470    ESMTPA,
471    ESMTPS,
472    ESMTPSA,
473    LMTP,
474    LMTPA,
475    LMTPS,
476    LMTPSA,
477    MMS,
478    UTF8SMTP,
479    UTF8SMTPA,
480    UTF8SMTPS,
481    UTF8SMTPSA,
482    UTF8LMTP,
483    UTF8LMTPA,
484    UTF8LMTPS,
485    UTF8LMTPSA,
486
487    // Non-Standard Mail Transmission Types
488    HTTP,
489    HTTPS,
490    IMAP,
491    POP3,
492    Local, // includes stdin, socket, etc.
493}
494
495/// MIME Header field access trait
496pub trait MimeHeaders<'x> {
497    /// Returns the Content-Description field
498    fn content_description(&self) -> Option<&str>;
499    /// Returns the Content-Disposition field
500    fn content_disposition(&self) -> Option<&ContentType<'_>>;
501    /// Returns the Content-ID field
502    fn content_id(&self) -> Option<&str>;
503    /// Returns the Content-Encoding field
504    fn content_transfer_encoding(&self) -> Option<&str>;
505    /// Returns the Content-Type field
506    fn content_type(&self) -> Option<&ContentType<'_>>;
507    /// Returns the Content-Language field
508    fn content_language(&self) -> &HeaderValue<'_>;
509    /// Returns the Content-Location field
510    fn content_location(&self) -> Option<&str>;
511    /// Returns the attachment name, if any.
512    fn attachment_name(&self) -> Option<&str> {
513        self.content_disposition()
514            .and_then(|cd| cd.attribute("filename"))
515            .or_else(|| self.content_type().and_then(|ct| ct.attribute("name")))
516    }
517    // Returns true is the content type matches
518    fn is_content_type(&self, type_: &str, subtype: &str) -> bool {
519        self.content_type().is_some_and(|ct| {
520            ct.c_type.eq_ignore_ascii_case(type_)
521                && ct
522                    .c_subtype
523                    .as_ref()
524                    .is_some_and(|st| st.eq_ignore_ascii_case(subtype))
525        })
526    }
527}
528
529pub trait GetHeader<'x> {
530    fn header_value(&self, name: &HeaderName<'_>) -> Option<&HeaderValue<'x>>;
531    fn header(&self, name: impl Into<HeaderName<'x>>) -> Option<&Header<'x>>;
532}
533
534struct BodyPartIterator<'x> {
535    message: &'x Message<'x>,
536    list: &'x [MessagePartId],
537    pos: i32,
538}
539
540struct AttachmentIterator<'x> {
541    message: &'x Message<'x>,
542    pos: i32,
543}