mail_builder/headers/
text.rs

1/*
2 * SPDX-FileCopyrightText: 2020 Stalwart Labs LLC <hello@stalw.art>
3 *
4 * SPDX-License-Identifier: Apache-2.0 OR MIT
5 */
6
7use std::borrow::Cow;
8
9use crate::encoders::{
10    base64::base64_encode_mime,
11    encode::{get_encoding_type, EncodingType},
12    quoted_printable::quoted_printable_encode_byte,
13};
14
15use super::Header;
16
17/// Unstructured text e-mail header.
18#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
19pub struct Text<'x> {
20    pub text: Cow<'x, str>,
21}
22
23impl<'x> Text<'x> {
24    /// Create a new unstructured text header
25    pub fn new(text: impl Into<Cow<'x, str>>) -> Self {
26        Self { text: text.into() }
27    }
28}
29
30impl<'x, T> From<T> for Text<'x>
31where
32    T: Into<Cow<'x, str>>,
33{
34    fn from(value: T) -> Self {
35        Self::new(value)
36    }
37}
38
39impl Header for Text<'_> {
40    fn write_header(
41        &self,
42        mut output: impl std::io::Write,
43        mut bytes_written: usize,
44    ) -> std::io::Result<usize> {
45        // If already written header name is long,
46        // wrap the first line.
47        if bytes_written >= 76 {
48            output.write_all(b"\r\n")?;
49            bytes_written = 0;
50        }
51
52        match get_encoding_type(self.text.as_bytes(), true, false) {
53            EncodingType::Base64 => {
54                let mut last_pos = 0;
55                let mut chars = self.text.as_bytes().iter().enumerate();
56
57                while let Some(pos) = chars
58                    .find(|(pos, ch)| {
59                        (**ch as i8) >= -0x40
60                            && bytes_written + 13 + (pos - last_pos).div_ceil(3) * 4 > 76
61                    })
62                    .map(|(pos, _)| pos)
63                    .or_else(|| (last_pos < self.text.len()).then_some(self.text.len()))
64                {
65                    let chunk = self.text.as_bytes().get(last_pos..pos).unwrap_or_default();
66                    if bytes_written == 0 {
67                        output.write_all(b"\t")?;
68                    } else {
69                        bytes_written = 0;
70                    }
71
72                    output.write_all(b"=?utf-8?B?")?;
73                    base64_encode_mime(chunk, &mut output, true)?;
74                    output.write_all(b"?=\r\n")?;
75                    last_pos = pos;
76                }
77            }
78            EncodingType::QuotedPrintable(is_ascii) => {
79                let prefix = if is_ascii {
80                    b"=?us-ascii?Q?".as_ref()
81                } else {
82                    b"=?utf-8?Q?".as_ref()
83                };
84
85                output.write_all(prefix)?;
86                bytes_written += prefix.len();
87
88                for (pos, &ch) in self.text.as_bytes().iter().enumerate() {
89                    // (ch as i8) >= 0x40 is an inlined
90                    // check for UTF-8 char boundary without array access
91                    // taken from private u8.is_char_boundary() implementation:
92                    // https://github.com/rust-lang/rust/blob/8708f3cd1f96d226f6420a58ebdd61aa0bc08b0a/library/core/src/str/mod.rs#L360-L383
93                    if bytes_written >= 72 && (pos == 0 || (ch as i8) >= -0x40) {
94                        output.write_all(b"?=\r\n\t")?;
95                        output.write_all(prefix)?;
96                        bytes_written = 1 + prefix.len();
97                    }
98
99                    bytes_written += quoted_printable_encode_byte(ch, &mut output)?;
100                }
101                output.write_all(b"?=\r\n")?;
102            }
103            EncodingType::None => {
104                for (pos, &ch) in self.text.as_bytes().iter().enumerate() {
105                    if bytes_written >= 76 && ch.is_ascii_whitespace() && pos < self.text.len() - 1
106                    {
107                        output.write_all(b"\r\n\t")?;
108                        bytes_written = 1;
109                    }
110                    output.write_all(&[ch])?;
111                    bytes_written += 1;
112                }
113                output.write_all(b"\r\n")?;
114            }
115        }
116        Ok(0)
117    }
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123    use mail_parser::MessageParser;
124    use std::io::{Cursor, Write};
125
126    /// Tests that UTF-8 "Q"-encoded words are split only at character boundaries.
127    ///
128    /// According to RFC 2047
129    /// "The 'encoded-text' in each 'encoded-word' must be well-formed according to the encoding specified"
130    /// so it is not allowed to split a single UTF-8 character between two encoded-words.
131    #[test]
132    fn test_utf8_q_encoding_boundaries() {
133        let mut buf = Cursor::new(Vec::new());
134        buf.write_all(b"Subject: ").unwrap();
135
136        let mut input = String::new();
137
138        // Insert a lot of ASCII characters so the header
139        // is encoded as "Q" encoded-words
140        // rather than "B" encoded-words.
141        for _ in 0..20000 {
142            input += "x";
143        }
144        for _ in 0..600 {
145            // δ is encoded as "=CE=B4".
146            // It should never be split into "...=CE?="
147            // and "=?utf-8?Q?=B4...".
148            input += "δ";
149        }
150
151        // Shift by 1 byte and insert more non-ASCII.
152        input += "x";
153        for _ in 0..600 {
154            input += "δ";
155        }
156
157        let header = Text::new(input.clone());
158        header.write_header(&mut buf, "Subject: ".len()).unwrap();
159
160        let output = str::from_utf8(buf.get_ref()).unwrap();
161
162        for line in output.lines() {
163            assert!(
164                line.trim().len() <= 78,
165                "Line exceeds 78 characters: {}",
166                line
167            );
168        }
169        let message = MessageParser::new()
170            .parse_headers(output.as_bytes())
171            .unwrap();
172        assert_eq!(message.subject().unwrap(), input);
173
174        // Test that "Q" encoding is used.
175        assert!(output.starts_with("Subject: =?utf-8?Q?xxx"));
176
177        assert!(!output.contains("CE?="));
178        assert!(!output.contains("=?utf-8?Q?=B4"));
179    }
180
181    /// Returns a header contents string that will be "B"-encoded.
182    fn b_encoded_input() -> String {
183        let mut input = String::new();
184
185        for _ in 0..600 {
186            input += "δ";
187        }
188        input += "x";
189        for _ in 0..600 {
190            input += "δ";
191        }
192        input
193    }
194
195    /// Tests that UTF-8 "B"-encoded words are split only at character boundaries.
196    #[test]
197    fn test_utf8_b_encoding_boundaries() {
198        let mut buf = Cursor::new(Vec::new());
199        buf.write_all(b"Subject: ").unwrap();
200
201        let input = b_encoded_input();
202
203        let header = Text::new(input.clone());
204        header.write_header(&mut buf, "Subject: ".len()).unwrap();
205
206        let output = str::from_utf8(buf.get_ref()).unwrap();
207        for line in output.lines() {
208            assert!(
209                line.trim().len() <= 78,
210                "Line exceeds 78 characters: {}",
211                line
212            );
213        }
214        let message = MessageParser::new()
215            .parse_headers(output.as_bytes())
216            .unwrap();
217        assert_eq!(message.subject().unwrap(), input);
218
219        // Test that "B" encoding is used.
220        assert!(output.starts_with("Subject: =?utf-8?B?zrTOtM60zrTOtM60"));
221
222        assert!(!output.contains("zg==?=")); // \xb4 at the end of the word
223        assert!(!output.contains("?B?tM60zr")); // \xce at the beginning of the word
224
225        assert!(output.ends_with("\r\n"));
226    }
227
228    /// Tests encoding of UTF-8 "B"-encoded words after a very long header name.
229    /// The header should be wrapped immediately in the beginning.
230    #[test]
231    fn test_utf8_b_encoding_large_bytes_written() {
232        let mut buf = Cursor::new(Vec::new());
233
234        let input = b_encoded_input();
235
236        let header = Text::new(input);
237
238        let bytes_written = 500;
239        header.write_header(&mut buf, bytes_written).unwrap();
240
241        let output = str::from_utf8(buf.get_ref()).unwrap();
242
243        for line in output.lines() {
244            assert!(
245                line.trim().len() <= 78,
246                "Line exceeds 78 characters: {}",
247                line
248            );
249        }
250
251        // Output starts with a newline and continuation space.
252        assert!(output.starts_with("\r\n\t=?utf-8?B?zrTOtM60zrTOtM60"));
253    }
254}