Skip to main content

eml_codec/
print.rs

1use rand::{Rng, SeedableRng};
2// Use a crypto secure RNG which is "portable" (we want the output of our tests
3// to be stable across platforms). Chacha20 provides such a RNG.
4use crate::text::ascii;
5pub use eml_codec_derives::ToStringFromPrint;
6use rand_chacha::ChaCha20Rng as RNG;
7
8// NOTE regarding line-folding and UTF-8 (RFC6532).
9//
10// Line folding (and more generally printing) implemented in this file occurs at
11// the byte level and is unaware of UTF-8 text introduced by RFC6532. This works
12// for the following reasons:
13//
14// - line folding (inserting newlines) only occurs at the ASCII whitespace
15// characters passed to `write_fws_bytes` method; UTF-8 text can only appear in
16// text passed to `write_bytes` which is *never* split by folding.
17//
18// - RFC6532 specifies that line limits should be counted in "characters" and
19// not bytes (however, "character" is not a well-defined unicode concept). The
20// current implementation enforces line limits by counting the length of text as
21// a number of bytes. This is conservative, but always correct wrt any
22// interpretation of "character", and is easier than e.g. performing unicode
23// segmentation to count text length in number of grapheme clusters.
24
25// TODO: provide streaming printing
26
27pub trait Print {
28    fn print(&self, fmt: &mut impl Formatter);
29}
30
31pub fn print_seq<T, Fmt>(fmt: &mut Fmt, s: &[T], sep: impl Fn(&mut Fmt))
32where
33    T: Print,
34    Fmt: Formatter,
35{
36    if !s.is_empty() {
37        s[0].print(fmt);
38        for x in &s[1..] {
39            sep(fmt);
40            x.print(fmt);
41        }
42    }
43}
44
45impl<T: Print> Print for &T {
46    fn print(&self, fmt: &mut impl Formatter) {
47        (*self).print(fmt)
48    }
49}
50
51/// An output formatter that can perform line folding and compute multipart
52/// boundaries.
53///
54/// The `Formatter` API is (unfortunately) quite imperative and tricky to use.
55/// At a high level, the trickiness comes from two aspects: formatter modes and
56/// multipart boundaries.
57///
58/// ## Formatter modes
59///
60/// A `Formatter` can switch between two modes: a "line folding" mode
61/// (for writing email headers) or a "direct" mode (for writing email bodies).
62///
63/// Initially, a newly created `Formatter` is in "direct" mode. Switching to
64/// and out of "line folding" mode is done using the `begin_line_folding` and
65/// `end_line_folding` functions.
66///
67/// Depending on the mode, some functions of the API cannot be called or come
68/// with extra usage restrictions, including basic text-printing functions.
69/// (See the per-function documentation for more details.)
70///
71/// ## Multipart boundaries
72///
73/// When in "direct" mode, a `Formatter` can generate and output multipart
74/// boundaries. These are randomly generated to (probabilistically) ensure that
75/// they do not clash with the rest of the output.
76///
77/// A boundary can be "registered" using `push_new_boundary`, then printed to
78/// the output using `write_current_boundary`. Finally, the boundary should be
79/// discarded when the corresponding multipart body ends, with `pop_boundary`.
80///
81/// Because multipart data can be nested, it is possible to have several
82/// "active" boundaries at a given time. However, boundary-related functions
83/// must be called in a way that is "well-bracketed": conceptually, a
84/// `Formatter` maintains a stack of active boundaries, only the boundary on the
85/// top of the stack can be written to the output, and `push_new_boundary` and
86/// `pop_boundary` must be used in a well-bracketed fashion. (See the
87/// per-function documentation for more details.)
88///
89/// ## Writing to the formatter
90///
91/// Data can be written to the output using the following functions:
92/// - `write_fws_bytes` and `write_fws` output white space; in "line folding" mode,
93///   it can be used for folding;
94/// - `write_bytes` outputs text; in "line folding" mode, it cannot be used
95///   for folding;
96/// - `write_crlf` outputs a line break;
97/// - `write_current_boundary` outputs the boundary at the top of the boundary stack.
98///
99/// All other functions of the API modify the internal state of the `Formatter` but
100/// do not produce output.
101///
102/// **In "line folding" mode**, `write_` functions must obey additional requirements:
103/// - A line *must never start* with whitespace. This includes both whitespace
104///   written using `write_bytes` or `write_fws`.
105/// - Text written with `write_bytes` *must never contain CRLF*. /!\ Successive
106///   calls to `write_bytes` that result in a CRLF when concatenated are also
107///   forbidden! /!\
108///
109/// In exchange, in line folding mode, a `Formatter` provides the following guarantees:
110/// - does not output "folds" that contain only folding whitespace;
111/// - maximizes the length of folds within the line limit;
112/// - keeps folds under the line limit, unless there is no space to fold on;
113///   in that case, fold as soon as possible after the line limit.
114///
115/// Note that the line limit (if any) is determined by each Formatter
116/// implementation.
117pub trait Formatter {
118    // XXX could we provide more safety to ensure that callers of a Formatter
119    // obey the requirements above, instead of panicking or being silently
120    // incorrect?
121
122    /// Switches the `Formatter` mode to "line folding". The `Formatter`
123    /// must be currently in "direct" mode.
124    fn begin_line_folding(&mut self);
125
126    /// Switches the `Formatter` mode to "direct". The `Formatter` must
127    /// be currently in "line folding" mode.
128    fn end_line_folding(&mut self);
129
130    /// Registers a new boundary.
131    /// This pushes the boundary on top of the internal "boundary stack".
132    fn push_new_boundary(&mut self);
133
134    /// Write the current declared boundary to the output (the one on top of the
135    /// internal boundary stack). The `Formatter` can be either in "direct" or
136    /// "line folding" mode.
137    ///
138    /// A boundary must have been registered previously.
139    fn write_current_boundary(&mut self);
140
141    /// Pop the current boundary from the top of the "boundary stack".
142    fn pop_boundary(&mut self);
143
144    /// Write bytes from `buf`; they cannot be used for line folding.
145    ///
146    /// In line folding mode, `buf` must not contain CRLF and consecutive calls
147    /// to `write_bytes` must not result in CRLF being emitted in the output
148    /// (e.g. `fmt.write_bytes(b"\r"); fmt.write_bytes(b"\n")`).
149    ///
150    /// It is fine for `buf` to include whitespace characters.
151    fn write_bytes(&mut self, buf: &[u8]);
152
153    /// Write whitespace bytes from `buf`. In "line folding" mode, they can be
154    /// used for line folding.
155    ///
156    /// `buf` *must only* contain whitespace characters ' ' and '\t'.
157    fn write_fws_bytes(&mut self, buf: &[u8]);
158
159    /// Terminate the current line, writing CRLF ("\r\n").
160    fn write_crlf(&mut self);
161
162    /// Write a single folding white space character.
163    fn write_fws(&mut self) {
164        self.write_fws_bytes(b" ")
165    }
166
167    /// Consumes the `Formatter` and returns the data that was printed to it.
168    fn flush(self) -> Vec<u8>;
169}
170
171enum FormatterMode {
172    Direct,
173    Folding(LineFolder),
174}
175
176/// `Fmt` implements `Formatter`.
177pub struct Fmt {
178    line_limit: Option<usize>,
179    mode: FormatterMode,
180    boundaries: Boundaries,
181    buf: Vec<u8>,
182}
183
184/// Configuration passed when initializing a `Fmt`.
185///
186/// `line_limit` defines the maximum line length allowed before trying to split.
187/// If set to `None`, there is no maximum line limit.
188///
189/// `seed` is used to seed the internal RNG which generates multipart
190/// boundaries. If set to `None`, the RNG is seeded using randomness from
191/// the operating system.
192pub struct FmtConfig {
193    seed: Option<u64>,
194    line_limit: Option<usize>,
195}
196
197pub const FMT_DEFAULT: FmtConfig = FmtConfig {
198    seed: None,
199    line_limit: Some(78), // RFC recommended line limit for emails
200};
201
202pub const FMT_NOFOLD: FmtConfig = FMT_DEFAULT.with_line_limit(None);
203
204impl FmtConfig {
205    pub const fn with_seed(self, seed: Option<u64>) -> Self {
206        Self { seed, ..self }
207    }
208
209    pub const fn with_line_limit(self, line_limit: Option<usize>) -> Self {
210        Self { line_limit, ..self }
211    }
212}
213
214impl Default for FmtConfig {
215    fn default() -> Self {
216        Self {
217            seed: None,           // defaults to system RNG
218            line_limit: Some(78), // RFC recommended line limit for emails
219        }
220    }
221}
222
223impl Fmt {
224    pub fn new(cfg: FmtConfig) -> Self {
225        let rand = cfg
226            .seed
227            .map(RNG::seed_from_u64)
228            .unwrap_or_else(RNG::from_os_rng);
229        Self {
230            line_limit: cfg.line_limit,
231            mode: FormatterMode::Direct,
232            boundaries: Boundaries::new(rand),
233            buf: Vec::new(),
234        }
235    }
236}
237
238impl Formatter for Fmt {
239    fn begin_line_folding(&mut self) {
240        match self.mode {
241            FormatterMode::Direct => {
242                self.mode = FormatterMode::Folding(LineFolder::new(self.line_limit))
243            }
244            FormatterMode::Folding(_) => {
245                panic!("Formatter::begin_line_folding: already in folding mode")
246            }
247        }
248    }
249
250    fn end_line_folding(&mut self) {
251        match self.mode {
252            FormatterMode::Folding(ref mut folder) => {
253                folder.flush(&mut self.buf);
254                self.mode = FormatterMode::Direct
255            }
256            FormatterMode::Direct => {
257                panic!("Formatter::end_line_folding: not in folding mode")
258            }
259        }
260    }
261
262    fn push_new_boundary(&mut self) {
263        self.boundaries.push_new_boundary()
264    }
265
266    fn write_current_boundary(&mut self) {
267        let b = self.boundaries.current_boundary();
268        // inline write_bytes to avoid cloning `b`
269        match self.mode {
270            FormatterMode::Direct => self.buf.extend_from_slice(b),
271            FormatterMode::Folding(ref mut folder) => folder.write_bytes(b, &mut self.buf),
272        }
273    }
274
275    fn pop_boundary(&mut self) {
276        self.boundaries.pop_boundary()
277    }
278
279    fn write_bytes(&mut self, buf: &[u8]) {
280        match self.mode {
281            FormatterMode::Direct => self.buf.extend_from_slice(buf),
282            FormatterMode::Folding(ref mut folder) => folder.write_bytes(buf, &mut self.buf),
283        }
284    }
285
286    fn write_fws_bytes(&mut self, buf: &[u8]) {
287        match self.mode {
288            FormatterMode::Direct => self.buf.extend_from_slice(buf),
289            FormatterMode::Folding(ref mut folder) => folder.write_fws_bytes(buf, &mut self.buf),
290        }
291    }
292
293    fn write_crlf(&mut self) {
294        match self.mode {
295            FormatterMode::Direct => self.buf.extend_from_slice(ascii::CRLF),
296            FormatterMode::Folding(ref mut folder) => folder.write_crlf(&mut self.buf),
297        }
298    }
299
300    fn flush(mut self) -> Vec<u8> {
301        self.boundaries.assert_empty();
302        if let FormatterMode::Folding(mut folder) = self.mode {
303            folder.flush(&mut self.buf)
304        }
305        self.buf
306    }
307}
308
309// Line folding ----------------------------------------------------------------
310
311/// `LineFolder` holds buffers and state used to perform line folding.
312///
313/// The owner of `LineFolder` MUST call its `flush` method after it is done
314/// writing. Flushing must only happen after all writing has been done; once
315/// a `LineFolder` has been flushed it cannot be written to again.
316struct LineFolder {
317    line_limit: LineLimit,
318    // Edge case: at the end of the file, if the remaining data of the final
319    // fold is only spaces, we must not put it on its own fold (as per the RFC).
320    // Instead, we should add it to the previous fold.
321    // To account for that edge case, we buffer both the current and the
322    // previous fold of the current line.
323    prev_fold: Option<Vec<u8>>,
324    // invariant: prev_fold.is_some() ==> !cur_fold.is_empty()
325    cur_fold: Vec<u8>,
326    cur_fold_is_only_fws: bool,
327    last_cut_candidate: Option<usize>,
328    // We only handle flushing once at the end. Once the LineFolder has been
329    // flushed, attempting to write or flush will panic.
330    is_flushed: bool,
331}
332
333impl LineFolder {
334    /// The line limit must not include the final CRLF and must not be zero.
335    /// For emails, this means line_limit=78.
336    fn new(line_limit: Option<usize>) -> Self {
337        Self {
338            line_limit: LineLimit::from(line_limit),
339            prev_fold: None,
340            cur_fold: Vec::new(),
341            cur_fold_is_only_fws: true,
342            last_cut_candidate: None,
343            is_flushed: false,
344        }
345    }
346
347    // NOTE: flushing is only allowed as the last operation on the LineFolder
348    // XXX if flushing fails, calling it again will do nothing; data in buffers is lost.
349    fn flush(&mut self, inner: &mut Vec<u8>) {
350        if self.is_flushed {
351            return;
352        }
353        self.is_flushed = true;
354        self.flush_line(inner)
355    }
356
357    // NOTE: `buf` must not contain line breaks (CRLF).
358    // To output line breaks, use `write_crlf`.
359    // XXX what are the guarantees in case the underlying writer fails?
360    fn write_bytes(&mut self, buf: &[u8], inner: &mut Vec<u8>) {
361        assert!(!self.is_flushed);
362
363        // A line must never start with whitespace
364        // (otherwise it would be indistinguishable from FWS)
365        if self.cur_fold.is_empty() && !buf.is_empty() {
366            // XXX turn this into a debug_assert?
367            assert!(!ascii::WS.contains(&buf[0]))
368        }
369
370        if self.cur_fold.len() + buf.len() <= self.line_limit || self.last_cut_candidate.is_none() {
371            // write `buf`
372            self.cur_fold.extend_from_slice(buf);
373            if !buf.is_empty() {
374                self.cur_fold_is_only_fws = false;
375            }
376        } else {
377            // fold at `last_cut_candidate`
378            self.fold(inner);
379            // recursive call to actually handle `buf`
380            self.write_bytes(buf, inner)
381        }
382    }
383
384    fn write_fws_bytes(&mut self, buf: &[u8], inner: &mut Vec<u8>) {
385        assert!(!self.is_flushed);
386        if buf.is_empty() {
387            return;
388        }
389
390        // A line must never begin with whitespace.
391        // XXX: turn this into debug_assert?
392        assert!(!self.cur_fold.is_empty());
393
394        // add buf[0] to `cur_fold`
395
396        if !self.cur_fold_is_only_fws {
397            self.last_cut_candidate = Some(self.cur_fold.len());
398        }
399        self.cur_fold.push(buf[0]);
400
401        // if we are past the line limit, we should fold if we can
402        // (possibly on the character we just added)
403        if self.cur_fold.len() > self.line_limit && self.last_cut_candidate.is_some() {
404            self.fold(inner)
405        }
406
407        // recursive call to handle the rest of the buffer
408        self.write_fws_bytes(&buf[1..], inner)
409    }
410
411    fn write_crlf(&mut self, inner: &mut Vec<u8>) {
412        assert!(!self.is_flushed);
413        // flush the buffers for the current line
414        self.flush_line(inner);
415        inner.extend_from_slice(ascii::CRLF)
416    }
417
418    // internal helpers
419
420    // NOTE: requires `self.last_cut_candidate.is_some()`
421    // folds at `last_cut_candidate`
422    fn fold(&mut self, inner: &mut Vec<u8>) {
423        // flush any existing `prev_fold`
424        if let Some(prev_fold) = &self.prev_fold {
425            // commit `prev_fold` before we split
426            inner.extend_from_slice(prev_fold);
427            inner.extend_from_slice(ascii::CRLF);
428            self.prev_fold = None;
429        }
430        let cut_pos = self.last_cut_candidate.unwrap();
431        // cur_fold  = |aaaaaa bbb|
432        //                    ^ cut_pos
433        //   becomes
434        // prev_fold = |aaaaaa|
435        // cur_fold  = | bbb|
436        {
437            let mut prev_fold = self.cur_fold.split_off(cut_pos);
438            std::mem::swap(&mut self.cur_fold, &mut prev_fold);
439            self.prev_fold = Some(prev_fold);
440        }
441        self.last_cut_candidate = None;
442        // - if `cur_fold` is of size one, it only contains the
443        // character on which we folded, which is FWS.
444        // - otherwise, `cur_fold` is of size > 1, and contains
445        // non-FWS characters since `cut_pos` is the *last*
446        // cut candidate
447        self.cur_fold_is_only_fws = self.cur_fold.len() == 1
448    }
449
450    // terminate the current line, writing its data
451    fn flush_line(&mut self, inner: &mut Vec<u8>) {
452        if let Some(prev_fold) = &self.prev_fold {
453            inner.extend_from_slice(prev_fold);
454            if self.cur_fold_is_only_fws {
455                // edge case: write `cur_fold` on the same fold
456                // as prev_fold to avoid creating a fold with only
457                // spaces.
458            } else {
459                inner.extend_from_slice(ascii::CRLF);
460            }
461        }
462        inner.extend_from_slice(&self.cur_fold);
463        // reset fold state
464        self.prev_fold = None;
465        self.cur_fold.truncate(0);
466        self.cur_fold_is_only_fws = true;
467        self.last_cut_candidate = None
468    }
469}
470
471// Internal type used by the implementation of LineFolder, representing the
472// maximum allowed line length: either an integer or infinity (no line limit).
473enum LineLimit {
474    NoLimit,
475    Limit(usize),
476}
477
478// Convenience impls allowing to write comparisons of the form `n <= line_limit`.
479impl std::cmp::PartialEq<LineLimit> for usize {
480    fn eq(&self, limit: &LineLimit) -> bool {
481        match limit {
482            LineLimit::Limit(m) => self == m,
483            LineLimit::NoLimit => false,
484        }
485    }
486}
487impl std::cmp::PartialOrd<LineLimit> for usize {
488    fn partial_cmp(&self, limit: &LineLimit) -> Option<std::cmp::Ordering> {
489        match limit {
490            LineLimit::Limit(m) => self.partial_cmp(m),
491            LineLimit::NoLimit => Some(std::cmp::Ordering::Less),
492        }
493    }
494}
495
496impl From<Option<usize>> for LineLimit {
497    fn from(o: Option<usize>) -> Self {
498        match o {
499            None => Self::NoLimit,
500            Some(n) => Self::Limit(n),
501        }
502    }
503}
504
505// Boundary handling for multiparts --------------------------------------------
506
507struct Boundaries {
508    active_boundaries: Vec<Vec<u8>>, // behaves as a stack
509    rand: RNG,
510}
511
512// TODO: check
513const BOUNDARY_LEN: usize = 65;
514
515impl Boundaries {
516    fn new(rand: RNG) -> Self {
517        Self {
518            active_boundaries: Vec::new(),
519            rand,
520        }
521    }
522
523    fn push_new_boundary(&mut self) {
524        let b = self.random_boundary();
525        self.active_boundaries.push(b);
526    }
527
528    fn current_boundary(&self) -> &[u8] {
529        self.active_boundaries.last().unwrap()
530    }
531
532    fn pop_boundary(&mut self) {
533        self.active_boundaries.pop();
534    }
535
536    // generate a random boundary using characters in DIGIT | ALPHA
537    fn random_boundary(&mut self) -> Vec<u8> {
538        let mut v = Vec::with_capacity(BOUNDARY_LEN);
539        for _ in 0..BOUNDARY_LEN {
540            let n = self.rand.random_range(0..(10 + 26 + 26));
541            let byte = if n < 10 {
542                ascii::N0 + n
543            } else if n - 10 < 26 {
544                ascii::LCA + (n - 10)
545            } else {
546                ascii::LSA + (n - 10 - 26)
547            };
548            v.push(byte)
549        }
550        v
551    }
552
553    fn assert_empty(&self) {
554        assert!(self.active_boundaries.is_empty());
555    }
556}
557
558// Public formatting functions -------------------------------------------------
559
560/// Creates a formatter, passes it to `f`, and returns the corresponding output
561/// as a Vec.
562pub fn print_to_vec_with<F>(cfg: FmtConfig, f: F) -> Vec<u8>
563where
564    F: for<'a> Fn(&'a mut Fmt),
565{
566    let mut fmt = Fmt::new(cfg);
567    f(&mut fmt);
568    fmt.flush()
569}
570
571/// Prints a printable value as a Vec.
572pub fn print_to_vec<T: Print>(cfg: FmtConfig, x: T) -> Vec<u8> {
573    print_to_vec_with(cfg, |fmt| x.print(fmt))
574}
575
576// Cow<'a, [u8]> is our base bytes type
577impl<'a> Print for std::borrow::Cow<'a, [u8]> {
578    fn print(&self, fmt: &mut impl Formatter) {
579        fmt.write_bytes(self)
580    }
581}
582
583#[cfg(test)]
584pub(crate) mod tests {
585    use super::*;
586
587    // in tests, fix the formatter seed and use line folding
588    pub fn print_to_vec_with(f: impl Fn(&mut Fmt)) -> Vec<u8> {
589        let cfg = FmtConfig {
590            seed: Some(0),
591            ..FMT_DEFAULT
592        };
593        super::print_to_vec_with(cfg, f)
594    }
595    pub fn print_to_vec<T: Print>(x: T) -> Vec<u8> {
596        let cfg = FmtConfig {
597            seed: Some(0),
598            ..FMT_DEFAULT
599        };
600        super::print_to_vec(cfg, x)
601    }
602
603    #[test]
604    fn test_folding() {
605        let folded = print_to_vec_with(|f| {
606            f.begin_line_folding();
607            f.write_bytes(&[b'x'; 72]);
608            f.write_fws();
609            f.write_bytes(b"yyyyyyyyy");
610        });
611        assert_eq!(folded, [&[b'x'; 72][..], b"\r\n yyyyyyyyy",].concat());
612
613        let folded = print_to_vec_with(|f| {
614            f.begin_line_folding();
615            f.write_bytes(&[b'x'; 80]);
616            f.write_fws();
617            f.write_bytes(b"yyyyyyyyy");
618        });
619        assert_eq!(folded, [&[b'x'; 80][..], b"\r\n yyyyyyyyy",].concat());
620
621        let folded = print_to_vec_with(|f| {
622            f.begin_line_folding();
623            f.write_bytes(&[b'x'; 18]);
624            f.write_fws_bytes(&[b' '; 3]);
625            f.write_bytes(&[b'x'; 16]);
626            f.write_fws();
627            f.write_bytes(&[b'x'; 32]);
628            f.write_fws();
629            f.write_bytes(&[b'y'; 9]);
630        });
631        assert_eq!(
632            folded,
633            [
634                &[b'x'; 18][..],
635                &[b' '; 3][..],
636                &[b'x'; 16][..],
637                &b" "[..],
638                &[b'x'; 32][..],
639                &b"\r\n "[..],
640                &[b'y'; 9][..],
641            ]
642            .concat()
643        );
644
645        // we must not not fold in this case, because doing so would create a
646        // fold containing only whitespace
647        let folded = print_to_vec_with(|f| {
648            f.begin_line_folding();
649            f.write_bytes(b"X");
650            f.write_fws_bytes(&[b' '; 82]);
651        });
652        assert_eq!(folded, [&b"X"[..], &[b' '; 82],].concat());
653    }
654}