eml_codec/text/
misc_token.rs

1use nom::{
2    branch::alt,
3    bytes::complete::{tag, take_while1},
4    character::complete::space0,
5    combinator::{map, opt},
6    multi::{many0, many1, separated_list1},
7    sequence::preceded,
8    IResult,
9};
10use std::fmt;
11
12use crate::text::{
13    ascii,
14    encoding::{self, encoded_word},
15    quoted::{quoted_string, QuotedString},
16    whitespace::{fws, is_obs_no_ws_ctl},
17    words::{atom, is_vchar, mime_atom},
18};
19
20#[derive(Debug, PartialEq, Default)]
21pub struct PhraseList<'a>(pub Vec<Phrase<'a>>);
22pub fn phrase_list(input: &[u8]) -> IResult<&[u8], PhraseList> {
23    map(separated_list1(tag(","), phrase), PhraseList)(input)
24}
25
26#[derive(Debug, PartialEq, Clone)]
27pub enum MIMEWord<'a> {
28    Quoted(QuotedString<'a>),
29    Atom(&'a [u8]),
30}
31impl Default for MIMEWord<'static> {
32    fn default() -> Self {
33        Self::Atom(&[])
34    }
35}
36impl<'a> MIMEWord<'a> {
37    pub fn to_string(&self) -> String {
38        match self {
39            Self::Quoted(v) => v.to_string(),
40            Self::Atom(v) => encoding_rs::UTF_8
41                .decode_without_bom_handling(v)
42                .0
43                .to_string(),
44        }
45    }
46}
47pub fn mime_word(input: &[u8]) -> IResult<&[u8], MIMEWord> {
48    alt((
49        map(quoted_string, MIMEWord::Quoted),
50        map(mime_atom, MIMEWord::Atom),
51    ))(input)
52}
53
54#[derive(PartialEq)]
55pub enum Word<'a> {
56    Quoted(QuotedString<'a>),
57    Encoded(encoding::EncodedWord<'a>),
58    Atom(&'a [u8]),
59}
60
61impl<'a> ToString for Word<'a> {
62    fn to_string(&self) -> String {
63        match self {
64            Word::Quoted(v) => v.to_string(),
65            Word::Encoded(v) => v.to_string(),
66            Word::Atom(v) => encoding_rs::UTF_8
67                .decode_without_bom_handling(v)
68                .0
69                .to_string(),
70        }
71    }
72}
73impl<'a> fmt::Debug for Word<'a> {
74    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
75        fmt.debug_tuple("Word")
76            .field(&format_args!("\"{}\"", self.to_string()))
77            .finish()
78    }
79}
80
81/// Word
82///
83/// ```abnf
84///    word            =   atom / quoted-string
85/// ```
86pub fn word(input: &[u8]) -> IResult<&[u8], Word> {
87    alt((
88        map(quoted_string, Word::Quoted),
89        map(encoded_word, Word::Encoded),
90        map(atom, Word::Atom),
91    ))(input)
92}
93
94#[derive(PartialEq)]
95pub struct Phrase<'a>(pub Vec<Word<'a>>);
96
97impl<'a> ToString for Phrase<'a> {
98    fn to_string(&self) -> String {
99        self.0
100            .iter()
101            .map(|v| v.to_string())
102            .collect::<Vec<String>>()
103            .join(" ")
104    }
105}
106impl<'a> fmt::Debug for Phrase<'a> {
107    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
108        fmt.debug_tuple("Phrase")
109            .field(&format_args!("\"{}\"", self.to_string()))
110            .finish()
111    }
112}
113
114/// Phrase
115///
116/// ```abnf
117///    phrase          =   1*word / obs-phrase
118/// ```
119pub fn phrase(input: &[u8]) -> IResult<&[u8], Phrase> {
120    let (input, phrase) = map(many1(word), Phrase)(input)?;
121    Ok((input, phrase))
122}
123
124/// Compatible unstructured input
125///
126/// ```abnf
127/// obs-utext       =   %d0 / obs-NO-WS-CTL / VCHAR
128/// ```
129fn is_unstructured(c: u8) -> bool {
130    is_vchar(c) || is_obs_no_ws_ctl(c) || c == ascii::NULL
131}
132
133#[derive(Debug, PartialEq, Clone)]
134pub enum UnstrToken<'a> {
135    Init,
136    Encoded(encoding::EncodedWord<'a>),
137    Plain(&'a [u8]),
138}
139
140impl<'a> ToString for UnstrToken<'a> {
141    fn to_string(&self) -> String {
142        match self {
143            UnstrToken::Init => "".into(),
144            UnstrToken::Encoded(e) => e.to_string(),
145            UnstrToken::Plain(e) => encoding_rs::UTF_8
146                .decode_without_bom_handling(e)
147                .0
148                .into_owned(),
149        }
150    }
151}
152
153#[derive(PartialEq, Clone)]
154pub struct Unstructured<'a>(pub Vec<UnstrToken<'a>>);
155
156impl<'a> ToString for Unstructured<'a> {
157    fn to_string(&self) -> String {
158        self.0
159            .iter()
160            .fold(
161                (&UnstrToken::Init, String::new()),
162                |(prev_token, mut result), current_token| {
163                    match (prev_token, current_token) {
164                        (UnstrToken::Init, v) => result.push_str(v.to_string().as_ref()),
165                        (UnstrToken::Encoded(_), UnstrToken::Encoded(v)) => {
166                            result.push_str(v.to_string().as_ref())
167                        }
168                        (_, v) => {
169                            result.push(' ');
170                            result.push_str(v.to_string().as_ref())
171                        }
172                    };
173
174                    (current_token, result)
175                },
176            )
177            .1
178    }
179}
180impl<'a> fmt::Debug for Unstructured<'a> {
181    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
182        fmt.debug_tuple("Unstructured")
183            .field(&format_args!("\"{}\"", self.to_string()))
184            .finish()
185    }
186}
187
188/// Unstructured header field body
189///
190/// ```abnf
191/// unstructured    =   (*([FWS] VCHAR_SEQ) *WSP) / obs-unstruct
192/// ```
193pub fn unstructured(input: &[u8]) -> IResult<&[u8], Unstructured> {
194    let (input, r) = many0(preceded(
195        opt(fws),
196        alt((
197            map(encoded_word, UnstrToken::Encoded),
198            map(take_while1(is_unstructured), UnstrToken::Plain),
199        )),
200    ))(input)?;
201
202    let (input, _) = space0(input)?;
203    Ok((input, Unstructured(r)))
204}
205
206#[cfg(test)]
207mod tests {
208    use super::*;
209    #[test]
210    fn test_phrase() {
211        assert_eq!(
212            phrase(b"hello world").unwrap().1.to_string(),
213            "hello world".to_string(),
214        );
215        assert_eq!(
216            phrase(b"salut \"le\" monde").unwrap().1.to_string(),
217            "salut le monde".to_string(),
218        );
219
220        let (rest, parsed) = phrase(b"fin\r\n du\r\nmonde").unwrap();
221        assert_eq!(rest, &b"\r\nmonde"[..]);
222        assert_eq!(parsed.to_string(), "fin du".to_string());
223    }
224}