mailparse/
msgidparse.rs

1use std::fmt;
2
3use crate::MailParseError;
4
5/// A simple wrapper around `Vec<String>`. This is primarily here so we can
6/// implement the Display trait on it, and allow user code to easily convert
7/// the return value from `msgidparse` back into a string. This also allows
8/// to add additional methods on this type in the future.
9#[derive(Clone, Debug, PartialEq)]
10pub struct MessageIdList(Vec<String>);
11
12impl std::ops::Deref for MessageIdList {
13    type Target = Vec<String>;
14
15    fn deref(&self) -> &Vec<String> {
16        &self.0
17    }
18}
19
20impl std::ops::DerefMut for MessageIdList {
21    fn deref_mut(&mut self) -> &mut Vec<String> {
22        &mut self.0
23    }
24}
25
26impl fmt::Display for MessageIdList {
27    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
28        let mut first = true;
29        for msgid in self.iter() {
30            if !first {
31                write!(f, " ")?;
32            }
33            write!(f, "<{}>", msgid)?;
34            first = false;
35        }
36        Ok(())
37    }
38}
39
40/// Parse an email header into a structured type holding a list of message ids.
41/// This function can be used to parse headers containing message IDs, such as
42/// `Message-ID`, `In-Reply-To`, and `References`.
43/// This function is currently mostly trivial (splits on whitespace and strips
44/// angle-brackets) but may be enhanced in the future to strip comments (which
45/// are technically allowed by the RFCs but never really used in practice).
46///
47/// # Examples
48/// ```
49///     use mailparse::{msgidparse, MessageIdList};
50///     let parsed_ids = msgidparse("<msg_one@foo.com>  <msg_two@bar.com>").unwrap();
51///     assert_eq!(parsed_ids[0], "msg_one@foo.com");
52///     assert_eq!(parsed_ids[1], "msg_two@bar.com");
53/// ```
54pub fn msgidparse(ids: &str) -> Result<MessageIdList, MailParseError> {
55    let mut msgids = Vec::new();
56
57    // The remaining section of the header, not yet chomped
58    let mut remaining = ids.trim_start();
59    // While we have some value of the header remaining
60    while !remaining.is_empty() {
61        // The next character should be the start of a Message ID
62        if !remaining.starts_with('<') {
63            return Err(MailParseError::Generic("Message IDs must start with <"));
64        }
65        // The ID ends at the next '>'
66        let end_index = remaining
67            .find('>')
68            .ok_or(MailParseError::Generic("Message IDs must end with >"))?;
69        msgids.push(remaining[1..end_index].to_string());
70
71        // Chomp the part of the string we just processed, and any trailing whitespace
72        remaining = remaining[end_index + 1..].trim_start();
73    }
74    Ok(MessageIdList(msgids))
75}
76
77#[cfg(test)]
78mod tests {
79    use super::*;
80
81    #[test]
82    fn parse_message_ids() {
83        assert_eq!(
84            msgidparse("").expect("Empty string"),
85            MessageIdList(Vec::new())
86        );
87        assert_eq!(
88            msgidparse("<msg_one@foo.com>").expect("Single reference"),
89            MessageIdList(vec!["msg_one@foo.com".to_string()])
90        );
91        assert_eq!(
92            msgidparse(" <msg_one@foo.com>").expect("Single reference, leading whitespace"),
93            MessageIdList(vec!["msg_one@foo.com".to_string()])
94        );
95        assert_eq!(
96            msgidparse("<msg_one@foo.com> ").expect("Single reference, trailing whitespace"),
97            MessageIdList(vec!["msg_one@foo.com".to_string()])
98        );
99        assert_eq!(
100            msgidparse("<msg_one@foo.com> <msg_two@bar.com>")
101                .expect("Multiple references separated by space"),
102            MessageIdList(vec![
103                "msg_one@foo.com".to_string(),
104                "msg_two@bar.com".to_string(),
105            ])
106        );
107        assert_eq!(
108            msgidparse("\n<msg_one@foo.com> <msg_two@bar.com>\t<msg_three@qux.com>\r ")
109                .expect("Multiple references separated by various whitespace"),
110            MessageIdList(vec![
111                "msg_one@foo.com".to_string(),
112                "msg_two@bar.com".to_string(),
113                "msg_three@qux.com".to_string(),
114            ])
115        );
116
117        // Non whitespace separator tests
118        assert_eq!(
119            msgidparse("<msg_one@foo.com><msg_two@bar.com>")
120                .expect("Multiple references, no whitespace"),
121            MessageIdList(vec![
122                "msg_one@foo.com".to_string(),
123                "msg_two@bar.com".to_string(),
124            ])
125        );
126        assert_eq!(
127            msgidparse("<msg_one@foo.com><msg_two@bar.com> <msg_three@spam.com> ")
128                .expect("Mixed whitespace/non-whitespace separator"),
129            MessageIdList(vec![
130                "msg_one@foo.com".to_string(),
131                "msg_two@bar.com".to_string(),
132                "msg_three@spam.com".to_string(),
133            ])
134        );
135    }
136}