eml_codec/print.rs
1use rand::{Rng, SeedableRng};
2// Use a crypto secure RNG which is "portable" (we want the output of our tests
3// to be stable across platforms). Chacha20 provides such a RNG.
4use crate::text::ascii;
5pub use eml_codec_derives::ToStringFromPrint;
6use rand_chacha::ChaCha20Rng as RNG;
7
8// NOTE regarding line-folding and UTF-8 (RFC6532).
9//
10// Line folding (and more generally printing) implemented in this file occurs at
11// the byte level and is unaware of UTF-8 text introduced by RFC6532. This works
12// for the following reasons:
13//
14// - line folding (inserting newlines) only occurs at the ASCII whitespace
15// characters passed to `write_fws_bytes` method; UTF-8 text can only appear in
16// text passed to `write_bytes` which is *never* split by folding.
17//
18// - RFC6532 specifies that line limits should be counted in "characters" and
19// not bytes (however, "character" is not a well-defined unicode concept). The
20// current implementation enforces line limits by counting the length of text as
21// a number of bytes. This is conservative, but always correct wrt any
22// interpretation of "character", and is easier than e.g. performing unicode
23// segmentation to count text length in number of grapheme clusters.
24
25// TODO: provide streaming printing
26
27pub trait Print {
28 fn print(&self, fmt: &mut impl Formatter);
29}
30
31pub fn print_seq<T, Fmt>(fmt: &mut Fmt, s: &[T], sep: impl Fn(&mut Fmt))
32where
33 T: Print,
34 Fmt: Formatter,
35{
36 if !s.is_empty() {
37 s[0].print(fmt);
38 for x in &s[1..] {
39 sep(fmt);
40 x.print(fmt);
41 }
42 }
43}
44
45impl<T: Print> Print for &T {
46 fn print(&self, fmt: &mut impl Formatter) {
47 (*self).print(fmt)
48 }
49}
50
51/// An output formatter that can perform line folding and compute multipart
52/// boundaries.
53///
54/// The `Formatter` API is (unfortunately) quite imperative and tricky to use.
55/// At a high level, the trickiness comes from two aspects: formatter modes and
56/// multipart boundaries.
57///
58/// ## Formatter modes
59///
60/// A `Formatter` can switch between two modes: a "line folding" mode
61/// (for writing email headers) or a "direct" mode (for writing email bodies).
62///
63/// Initially, a newly created `Formatter` is in "direct" mode. Switching to
64/// and out of "line folding" mode is done using the `begin_line_folding` and
65/// `end_line_folding` functions.
66///
67/// Depending on the mode, some functions of the API cannot be called or come
68/// with extra usage restrictions, including basic text-printing functions.
69/// (See the per-function documentation for more details.)
70///
71/// ## Multipart boundaries
72///
73/// When in "direct" mode, a `Formatter` can generate and output multipart
74/// boundaries. These are randomly generated to (probabilistically) ensure that
75/// they do not clash with the rest of the output.
76///
77/// A boundary can be "registered" using `push_new_boundary`, then printed to
78/// the output using `write_current_boundary`. Finally, the boundary should be
79/// discarded when the corresponding multipart body ends, with `pop_boundary`.
80///
81/// Because multipart data can be nested, it is possible to have several
82/// "active" boundaries at a given time. However, boundary-related functions
83/// must be called in a way that is "well-bracketed": conceptually, a
84/// `Formatter` maintains a stack of active boundaries, only the boundary on the
85/// top of the stack can be written to the output, and `push_new_boundary` and
86/// `pop_boundary` must be used in a well-bracketed fashion. (See the
87/// per-function documentation for more details.)
88///
89/// ## Writing to the formatter
90///
91/// Data can be written to the output using the following functions:
92/// - `write_fws_bytes` and `write_fws` output white space; in "line folding" mode,
93/// it can be used for folding;
94/// - `write_bytes` outputs text; in "line folding" mode, it cannot be used
95/// for folding;
96/// - `write_crlf` outputs a line break;
97/// - `write_current_boundary` outputs the boundary at the top of the boundary stack.
98///
99/// All other functions of the API modify the internal state of the `Formatter` but
100/// do not produce output.
101///
102/// **In "line folding" mode**, `write_` functions must obey additional requirements:
103/// - A line *must never start* with whitespace. This includes both whitespace
104/// written using `write_bytes` or `write_fws`.
105/// - Text written with `write_bytes` *must never contain CRLF*. /!\ Successive
106/// calls to `write_bytes` that result in a CRLF when concatenated are also
107/// forbidden! /!\
108///
109/// In exchange, in line folding mode, a `Formatter` provides the following guarantees:
110/// - does not output "folds" that contain only folding whitespace;
111/// - maximizes the length of folds within the line limit;
112/// - keeps folds under the line limit, unless there is no space to fold on;
113/// in that case, fold as soon as possible after the line limit.
114///
115/// Note that the line limit (if any) is determined by each Formatter
116/// implementation.
117pub trait Formatter {
118 // XXX could we provide more safety to ensure that callers of a Formatter
119 // obey the requirements above, instead of panicking or being silently
120 // incorrect?
121
122 /// Switches the `Formatter` mode to "line folding". The `Formatter`
123 /// must be currently in "direct" mode.
124 fn begin_line_folding(&mut self);
125
126 /// Switches the `Formatter` mode to "direct". The `Formatter` must
127 /// be currently in "line folding" mode.
128 fn end_line_folding(&mut self);
129
130 /// Registers a new boundary.
131 /// This pushes the boundary on top of the internal "boundary stack".
132 fn push_new_boundary(&mut self);
133
134 /// Write the current declared boundary to the output (the one on top of the
135 /// internal boundary stack). The `Formatter` can be either in "direct" or
136 /// "line folding" mode.
137 ///
138 /// A boundary must have been registered previously.
139 fn write_current_boundary(&mut self);
140
141 /// Pop the current boundary from the top of the "boundary stack".
142 fn pop_boundary(&mut self);
143
144 /// Write bytes from `buf`; they cannot be used for line folding.
145 ///
146 /// In line folding mode, `buf` must not contain CRLF and consecutive calls
147 /// to `write_bytes` must not result in CRLF being emitted in the output
148 /// (e.g. `fmt.write_bytes(b"\r"); fmt.write_bytes(b"\n")`).
149 ///
150 /// It is fine for `buf` to include whitespace characters.
151 fn write_bytes(&mut self, buf: &[u8]);
152
153 /// Write whitespace bytes from `buf`. In "line folding" mode, they can be
154 /// used for line folding.
155 ///
156 /// `buf` *must only* contain whitespace characters ' ' and '\t'.
157 fn write_fws_bytes(&mut self, buf: &[u8]);
158
159 /// Terminate the current line, writing CRLF ("\r\n").
160 fn write_crlf(&mut self);
161
162 /// Write a single folding white space character.
163 fn write_fws(&mut self) {
164 self.write_fws_bytes(b" ")
165 }
166
167 /// Consumes the `Formatter` and returns the data that was printed to it.
168 fn flush(self) -> Vec<u8>;
169}
170
171enum FormatterMode {
172 Direct,
173 Folding(LineFolder),
174}
175
176/// `Fmt` implements `Formatter`.
177pub struct Fmt {
178 line_limit: Option<usize>,
179 mode: FormatterMode,
180 boundaries: Boundaries,
181 buf: Vec<u8>,
182}
183
184/// Configuration passed when initializing a `Fmt`.
185///
186/// `line_limit` defines the maximum line length allowed before trying to split.
187/// If set to `None`, there is no maximum line limit.
188///
189/// `seed` is used to seed the internal RNG which generates multipart
190/// boundaries. If set to `None`, the RNG is seeded using randomness from
191/// the operating system.
192pub struct FmtConfig {
193 seed: Option<u64>,
194 line_limit: Option<usize>,
195}
196
197pub const FMT_DEFAULT: FmtConfig = FmtConfig {
198 seed: None,
199 line_limit: Some(78), // RFC recommended line limit for emails
200};
201
202pub const FMT_NOFOLD: FmtConfig = FMT_DEFAULT.with_line_limit(None);
203
204impl FmtConfig {
205 pub const fn with_seed(self, seed: Option<u64>) -> Self {
206 Self { seed, ..self }
207 }
208
209 pub const fn with_line_limit(self, line_limit: Option<usize>) -> Self {
210 Self { line_limit, ..self }
211 }
212}
213
214impl Default for FmtConfig {
215 fn default() -> Self {
216 Self {
217 seed: None, // defaults to system RNG
218 line_limit: Some(78), // RFC recommended line limit for emails
219 }
220 }
221}
222
223impl Fmt {
224 pub fn new(cfg: FmtConfig) -> Self {
225 let rand = cfg
226 .seed
227 .map(RNG::seed_from_u64)
228 .unwrap_or_else(RNG::from_os_rng);
229 Self {
230 line_limit: cfg.line_limit,
231 mode: FormatterMode::Direct,
232 boundaries: Boundaries::new(rand),
233 buf: Vec::new(),
234 }
235 }
236}
237
238impl Formatter for Fmt {
239 fn begin_line_folding(&mut self) {
240 match self.mode {
241 FormatterMode::Direct => {
242 self.mode = FormatterMode::Folding(LineFolder::new(self.line_limit))
243 }
244 FormatterMode::Folding(_) => {
245 panic!("Formatter::begin_line_folding: already in folding mode")
246 }
247 }
248 }
249
250 fn end_line_folding(&mut self) {
251 match self.mode {
252 FormatterMode::Folding(ref mut folder) => {
253 folder.flush(&mut self.buf);
254 self.mode = FormatterMode::Direct
255 }
256 FormatterMode::Direct => {
257 panic!("Formatter::end_line_folding: not in folding mode")
258 }
259 }
260 }
261
262 fn push_new_boundary(&mut self) {
263 self.boundaries.push_new_boundary()
264 }
265
266 fn write_current_boundary(&mut self) {
267 let b = self.boundaries.current_boundary();
268 // inline write_bytes to avoid cloning `b`
269 match self.mode {
270 FormatterMode::Direct => self.buf.extend_from_slice(b),
271 FormatterMode::Folding(ref mut folder) => folder.write_bytes(b, &mut self.buf),
272 }
273 }
274
275 fn pop_boundary(&mut self) {
276 self.boundaries.pop_boundary()
277 }
278
279 fn write_bytes(&mut self, buf: &[u8]) {
280 match self.mode {
281 FormatterMode::Direct => self.buf.extend_from_slice(buf),
282 FormatterMode::Folding(ref mut folder) => folder.write_bytes(buf, &mut self.buf),
283 }
284 }
285
286 fn write_fws_bytes(&mut self, buf: &[u8]) {
287 match self.mode {
288 FormatterMode::Direct => self.buf.extend_from_slice(buf),
289 FormatterMode::Folding(ref mut folder) => folder.write_fws_bytes(buf, &mut self.buf),
290 }
291 }
292
293 fn write_crlf(&mut self) {
294 match self.mode {
295 FormatterMode::Direct => self.buf.extend_from_slice(ascii::CRLF),
296 FormatterMode::Folding(ref mut folder) => folder.write_crlf(&mut self.buf),
297 }
298 }
299
300 fn flush(mut self) -> Vec<u8> {
301 self.boundaries.assert_empty();
302 if let FormatterMode::Folding(mut folder) = self.mode {
303 folder.flush(&mut self.buf)
304 }
305 self.buf
306 }
307}
308
309// Line folding ----------------------------------------------------------------
310
311/// `LineFolder` holds buffers and state used to perform line folding.
312///
313/// The owner of `LineFolder` MUST call its `flush` method after it is done
314/// writing. Flushing must only happen after all writing has been done; once
315/// a `LineFolder` has been flushed it cannot be written to again.
316struct LineFolder {
317 line_limit: LineLimit,
318 // Edge case: at the end of the file, if the remaining data of the final
319 // fold is only spaces, we must not put it on its own fold (as per the RFC).
320 // Instead, we should add it to the previous fold.
321 // To account for that edge case, we buffer both the current and the
322 // previous fold of the current line.
323 prev_fold: Option<Vec<u8>>,
324 // invariant: prev_fold.is_some() ==> !cur_fold.is_empty()
325 cur_fold: Vec<u8>,
326 cur_fold_is_only_fws: bool,
327 last_cut_candidate: Option<usize>,
328 // We only handle flushing once at the end. Once the LineFolder has been
329 // flushed, attempting to write or flush will panic.
330 is_flushed: bool,
331}
332
333impl LineFolder {
334 /// The line limit must not include the final CRLF and must not be zero.
335 /// For emails, this means line_limit=78.
336 fn new(line_limit: Option<usize>) -> Self {
337 Self {
338 line_limit: LineLimit::from(line_limit),
339 prev_fold: None,
340 cur_fold: Vec::new(),
341 cur_fold_is_only_fws: true,
342 last_cut_candidate: None,
343 is_flushed: false,
344 }
345 }
346
347 // NOTE: flushing is only allowed as the last operation on the LineFolder
348 // XXX if flushing fails, calling it again will do nothing; data in buffers is lost.
349 fn flush(&mut self, inner: &mut Vec<u8>) {
350 if self.is_flushed {
351 return;
352 }
353 self.is_flushed = true;
354 self.flush_line(inner)
355 }
356
357 // NOTE: `buf` must not contain line breaks (CRLF).
358 // To output line breaks, use `write_crlf`.
359 // XXX what are the guarantees in case the underlying writer fails?
360 fn write_bytes(&mut self, buf: &[u8], inner: &mut Vec<u8>) {
361 assert!(!self.is_flushed);
362
363 // A line must never start with whitespace
364 // (otherwise it would be indistinguishable from FWS)
365 if self.cur_fold.is_empty() && !buf.is_empty() {
366 // XXX turn this into a debug_assert?
367 assert!(!ascii::WS.contains(&buf[0]))
368 }
369
370 if self.cur_fold.len() + buf.len() <= self.line_limit || self.last_cut_candidate.is_none() {
371 // write `buf`
372 self.cur_fold.extend_from_slice(buf);
373 if !buf.is_empty() {
374 self.cur_fold_is_only_fws = false;
375 }
376 } else {
377 // fold at `last_cut_candidate`
378 self.fold(inner);
379 // recursive call to actually handle `buf`
380 self.write_bytes(buf, inner)
381 }
382 }
383
384 fn write_fws_bytes(&mut self, buf: &[u8], inner: &mut Vec<u8>) {
385 assert!(!self.is_flushed);
386 if buf.is_empty() {
387 return;
388 }
389
390 // A line must never begin with whitespace.
391 // XXX: turn this into debug_assert?
392 assert!(!self.cur_fold.is_empty());
393
394 // add buf[0] to `cur_fold`
395
396 if !self.cur_fold_is_only_fws {
397 self.last_cut_candidate = Some(self.cur_fold.len());
398 }
399 self.cur_fold.push(buf[0]);
400
401 // if we are past the line limit, we should fold if we can
402 // (possibly on the character we just added)
403 if self.cur_fold.len() > self.line_limit && self.last_cut_candidate.is_some() {
404 self.fold(inner)
405 }
406
407 // recursive call to handle the rest of the buffer
408 self.write_fws_bytes(&buf[1..], inner)
409 }
410
411 fn write_crlf(&mut self, inner: &mut Vec<u8>) {
412 assert!(!self.is_flushed);
413 // flush the buffers for the current line
414 self.flush_line(inner);
415 inner.extend_from_slice(ascii::CRLF)
416 }
417
418 // internal helpers
419
420 // NOTE: requires `self.last_cut_candidate.is_some()`
421 // folds at `last_cut_candidate`
422 fn fold(&mut self, inner: &mut Vec<u8>) {
423 // flush any existing `prev_fold`
424 if let Some(prev_fold) = &self.prev_fold {
425 // commit `prev_fold` before we split
426 inner.extend_from_slice(prev_fold);
427 inner.extend_from_slice(ascii::CRLF);
428 self.prev_fold = None;
429 }
430 let cut_pos = self.last_cut_candidate.unwrap();
431 // cur_fold = |aaaaaa bbb|
432 // ^ cut_pos
433 // becomes
434 // prev_fold = |aaaaaa|
435 // cur_fold = | bbb|
436 {
437 let mut prev_fold = self.cur_fold.split_off(cut_pos);
438 std::mem::swap(&mut self.cur_fold, &mut prev_fold);
439 self.prev_fold = Some(prev_fold);
440 }
441 self.last_cut_candidate = None;
442 // - if `cur_fold` is of size one, it only contains the
443 // character on which we folded, which is FWS.
444 // - otherwise, `cur_fold` is of size > 1, and contains
445 // non-FWS characters since `cut_pos` is the *last*
446 // cut candidate
447 self.cur_fold_is_only_fws = self.cur_fold.len() == 1
448 }
449
450 // terminate the current line, writing its data
451 fn flush_line(&mut self, inner: &mut Vec<u8>) {
452 if let Some(prev_fold) = &self.prev_fold {
453 inner.extend_from_slice(prev_fold);
454 if self.cur_fold_is_only_fws {
455 // edge case: write `cur_fold` on the same fold
456 // as prev_fold to avoid creating a fold with only
457 // spaces.
458 } else {
459 inner.extend_from_slice(ascii::CRLF);
460 }
461 }
462 inner.extend_from_slice(&self.cur_fold);
463 // reset fold state
464 self.prev_fold = None;
465 self.cur_fold.truncate(0);
466 self.cur_fold_is_only_fws = true;
467 self.last_cut_candidate = None
468 }
469}
470
471// Internal type used by the implementation of LineFolder, representing the
472// maximum allowed line length: either an integer or infinity (no line limit).
473enum LineLimit {
474 NoLimit,
475 Limit(usize),
476}
477
478// Convenience impls allowing to write comparisons of the form `n <= line_limit`.
479impl std::cmp::PartialEq<LineLimit> for usize {
480 fn eq(&self, limit: &LineLimit) -> bool {
481 match limit {
482 LineLimit::Limit(m) => self == m,
483 LineLimit::NoLimit => false,
484 }
485 }
486}
487impl std::cmp::PartialOrd<LineLimit> for usize {
488 fn partial_cmp(&self, limit: &LineLimit) -> Option<std::cmp::Ordering> {
489 match limit {
490 LineLimit::Limit(m) => self.partial_cmp(m),
491 LineLimit::NoLimit => Some(std::cmp::Ordering::Less),
492 }
493 }
494}
495
496impl From<Option<usize>> for LineLimit {
497 fn from(o: Option<usize>) -> Self {
498 match o {
499 None => Self::NoLimit,
500 Some(n) => Self::Limit(n),
501 }
502 }
503}
504
505// Boundary handling for multiparts --------------------------------------------
506
507struct Boundaries {
508 active_boundaries: Vec<Vec<u8>>, // behaves as a stack
509 rand: RNG,
510}
511
512// TODO: check
513const BOUNDARY_LEN: usize = 65;
514
515impl Boundaries {
516 fn new(rand: RNG) -> Self {
517 Self {
518 active_boundaries: Vec::new(),
519 rand,
520 }
521 }
522
523 fn push_new_boundary(&mut self) {
524 let b = self.random_boundary();
525 self.active_boundaries.push(b);
526 }
527
528 fn current_boundary(&self) -> &[u8] {
529 self.active_boundaries.last().unwrap()
530 }
531
532 fn pop_boundary(&mut self) {
533 self.active_boundaries.pop();
534 }
535
536 // generate a random boundary using characters in DIGIT | ALPHA
537 fn random_boundary(&mut self) -> Vec<u8> {
538 let mut v = Vec::with_capacity(BOUNDARY_LEN);
539 for _ in 0..BOUNDARY_LEN {
540 let n = self.rand.random_range(0..(10 + 26 + 26));
541 let byte = if n < 10 {
542 ascii::N0 + n
543 } else if n - 10 < 26 {
544 ascii::LCA + (n - 10)
545 } else {
546 ascii::LSA + (n - 10 - 26)
547 };
548 v.push(byte)
549 }
550 v
551 }
552
553 fn assert_empty(&self) {
554 assert!(self.active_boundaries.is_empty());
555 }
556}
557
558// Public formatting functions -------------------------------------------------
559
560/// Creates a formatter, passes it to `f`, and returns the corresponding output
561/// as a Vec.
562pub fn print_to_vec_with<F>(cfg: FmtConfig, f: F) -> Vec<u8>
563where
564 F: for<'a> Fn(&'a mut Fmt),
565{
566 let mut fmt = Fmt::new(cfg);
567 f(&mut fmt);
568 fmt.flush()
569}
570
571/// Prints a printable value as a Vec.
572pub fn print_to_vec<T: Print>(cfg: FmtConfig, x: T) -> Vec<u8> {
573 print_to_vec_with(cfg, |fmt| x.print(fmt))
574}
575
576// Cow<'a, [u8]> is our base bytes type
577impl<'a> Print for std::borrow::Cow<'a, [u8]> {
578 fn print(&self, fmt: &mut impl Formatter) {
579 fmt.write_bytes(self)
580 }
581}
582
583#[cfg(test)]
584pub(crate) mod tests {
585 use super::*;
586
587 // in tests, fix the formatter seed and use line folding
588 pub fn print_to_vec_with(f: impl Fn(&mut Fmt)) -> Vec<u8> {
589 let cfg = FmtConfig {
590 seed: Some(0),
591 ..FMT_DEFAULT
592 };
593 super::print_to_vec_with(cfg, f)
594 }
595 pub fn print_to_vec<T: Print>(x: T) -> Vec<u8> {
596 let cfg = FmtConfig {
597 seed: Some(0),
598 ..FMT_DEFAULT
599 };
600 super::print_to_vec(cfg, x)
601 }
602
603 #[test]
604 fn test_folding() {
605 let folded = print_to_vec_with(|f| {
606 f.begin_line_folding();
607 f.write_bytes(&[b'x'; 72]);
608 f.write_fws();
609 f.write_bytes(b"yyyyyyyyy");
610 });
611 assert_eq!(folded, [&[b'x'; 72][..], b"\r\n yyyyyyyyy",].concat());
612
613 let folded = print_to_vec_with(|f| {
614 f.begin_line_folding();
615 f.write_bytes(&[b'x'; 80]);
616 f.write_fws();
617 f.write_bytes(b"yyyyyyyyy");
618 });
619 assert_eq!(folded, [&[b'x'; 80][..], b"\r\n yyyyyyyyy",].concat());
620
621 let folded = print_to_vec_with(|f| {
622 f.begin_line_folding();
623 f.write_bytes(&[b'x'; 18]);
624 f.write_fws_bytes(&[b' '; 3]);
625 f.write_bytes(&[b'x'; 16]);
626 f.write_fws();
627 f.write_bytes(&[b'x'; 32]);
628 f.write_fws();
629 f.write_bytes(&[b'y'; 9]);
630 });
631 assert_eq!(
632 folded,
633 [
634 &[b'x'; 18][..],
635 &[b' '; 3][..],
636 &[b'x'; 16][..],
637 &b" "[..],
638 &[b'x'; 32][..],
639 &b"\r\n "[..],
640 &[b'y'; 9][..],
641 ]
642 .concat()
643 );
644
645 // we must not not fold in this case, because doing so would create a
646 // fold containing only whitespace
647 let folded = print_to_vec_with(|f| {
648 f.begin_line_folding();
649 f.write_bytes(b"X");
650 f.write_fws_bytes(&[b' '; 82]);
651 });
652 assert_eq!(folded, [&b"X"[..], &[b' '; 82],].concat());
653 }
654}