1use super::header::{Header, HeaderMap};
4use super::results::{ParsingError, ParsingResult};
5use super::rfc2047::decode_rfc2047;
6
7pub const MIME_LINE_LENGTH: usize = 78;
8
9trait Rfc5322Character {
10 fn is_special(&self) -> bool;
12 fn is_vchar(&self) -> bool;
14 fn is_ftext(&self) -> bool;
16
17 fn is_atext(&self) -> bool {
18 self.is_vchar() && !self.is_special()
19 }
20}
21
22impl Rfc5322Character for char {
23 fn is_ftext(&self) -> bool {
24 match *self {
25 '!'..='9' | ';'..='~' => true,
26 _ => false,
27 }
28 }
29
30 fn is_special(&self) -> bool {
31 match *self {
32 '(' | ')' | '<' | '>' | '[' | ']' | ':' | ';' | '@' | '\\' | ',' | '.' | '\"' | ' ' => {
33 true
34 }
35 _ => false,
36 }
37 }
38
39 fn is_vchar(&self) -> bool {
40 match *self {
41 '!'..='~' => true,
42 _ => false,
43 }
44 }
45}
46
47pub struct Rfc5322Parser<'s> {
58 s: &'s str,
59 pos: usize,
60 pos_stack: Vec<usize>,
61}
62
63impl<'s> Rfc5322Parser<'s> {
64 pub fn new(source: &'s str) -> Rfc5322Parser<'s> {
67 Rfc5322Parser {
68 s: source,
69 pos: 0,
70 pos_stack: Vec::new(),
71 }
72 }
73
74 pub fn push_position(&mut self) {
77 self.pos_stack.push(self.pos);
78 }
79
80 pub fn pop_position(&mut self) {
83 match self.pos_stack.pop() {
84 Some(pos) => {
85 self.pos = pos;
86 }
87 None => panic!("Popped position stack too far"),
88 }
89 }
90
91 pub fn consume_message(&mut self) -> Option<(HeaderMap, String)> {
102 let mut headers = HeaderMap::new();
103 while !self.eof() {
104 let header = self.consume_header();
105 if let Some(header) = header {
106 headers.insert(header);
107 } else {
108 if !self.eof() && self.peek_linebreak() {
110 assert!(self.consume_linebreak());
111 }
112
113 break;
114 }
115 }
116
117 let body = self.s[self.pos..].to_string();
119 self.pos = self.s.len();
120
121 Some((headers, body))
122 }
123
124 pub fn consume_header(&mut self) -> Option<Header> {
133 let last_pos = self.pos;
134 let field_name = self.consume_while(|c| c.is_ftext());
136 self.consume_linear_whitespace();
137 if field_name.is_empty() || self.eof() || self.peek() != ':' {
138 self.pos = last_pos;
141 None
142 } else {
143 self.consume_char();
145 self.consume_linear_whitespace();
146 let field_value = self.consume_unstructured();
147
148 if !self.consume_linebreak() {
150 return None;
151 };
152
153 Some(Header::new(field_name, field_value))
154 }
155 }
156
157 pub fn consume_unstructured(&mut self) -> String {
160 let mut result = String::new();
161 while !self.eof() {
162 if self.peek_linebreak() {
163 if !self.consume_folding_whitespace() {
166 break;
167 }
168 }
169
170 result.push_str(&self.consume_while(|c| c.is_vchar() || c == ' ' || c == '\t')[..])
171 }
172 result
173 }
174
175 pub fn consume_folding_whitespace(&mut self) -> bool {
182 let current_position = self.pos;
184 let is_fws = if !self.eof() && self.consume_linebreak() {
185 match self.consume_char() {
186 Some(' ') | Some('\t') => true,
187 _ => false,
188 }
189 } else {
190 false
191 };
192
193 if is_fws {
194 self.consume_linear_whitespace();
196 } else {
197 self.pos = current_position;
199 }
200
201 is_fws
202 }
203
204 pub fn consume_word(&mut self, allow_dot_atom: bool) -> Option<String> {
213 let p = self.peek();
214 if p == '"' {
215 self.consume_quoted_string()
217 } else {
218 self.consume_atom(allow_dot_atom)
220 }
221 }
222
223 pub fn consume_phrase(&mut self, allow_dot_atom: bool) -> Option<String> {
232 let mut phrase = String::new();
233
234 while !self.eof() {
235 self.consume_linear_whitespace();
236
237 let word = match self.consume_word(allow_dot_atom) {
238 Some(x) => x,
239 None => break, };
242
243 let w_slice = &word[..];
244 let decoded_word = if w_slice.starts_with("=?") && w_slice.ends_with("?=") {
246 match decode_rfc2047(w_slice) {
247 Some(w) => w,
248 None => w_slice.to_string(),
249 }
250 } else {
251 w_slice.to_string()
252 };
253
254 if !phrase.is_empty() {
256 phrase.push_str(" ");
257 }
258 phrase.push_str(&decoded_word[..]);
259 }
260
261 if !phrase.is_empty() {
262 Some(phrase)
263 } else {
264 None
265 }
266 }
267
268 pub fn consume_quoted_string(&mut self) -> Option<String> {
271 if self.peek() != '"' {
272 None
274 } else {
275 let mut quoted_string = String::new();
276 let mut inside_escape = false;
277 let mut terminated = false;
278 self.consume_char();
280 while !terminated && !self.eof() {
281 match self.peek() {
282 '\\' if !inside_escape => {
283 self.consume_char();
286 inside_escape = true;
287 }
288 '"' if !inside_escape => {
289 self.consume_char();
292 terminated = true;
293 }
294 _ => {
295 if let Some(c) = self.consume_char() {
297 quoted_string.push(c);
298 inside_escape = false;
300 }
301 else {
303 return None;
304 }
305 }
306 }
307 }
308
309 if inside_escape || !terminated {
310 None
312 } else {
313 Some(quoted_string)
314 }
315 }
316 }
317
318 pub fn consume_atom(&mut self, allow_dot: bool) -> Option<String> {
324 if self.eof() || !self.peek().is_atext() {
325 None
326 } else {
327 Some(self.consume_while(|c| c.is_atext() || (allow_dot && c == '.')))
328 }
329 }
330
331 pub fn consume_linear_whitespace(&mut self) {
334 self.consume_while(|c| c == '\t' || c == ' ');
335 }
336
337 #[inline]
339 pub fn consume_char(&mut self) -> Option<char> {
341 if self.eof() {
342 return None;
343 }
344 let c = self.peek();
345 self.pos += c.len_utf8();
346 Some(c)
347 }
348
349 pub fn consume_linebreak(&mut self) -> bool {
352 if self.eof() {
353 return false;
354 }
355
356 let start_pos = self.pos;
357
358 match self.consume_char() {
359 Some('\r') => {
360 if !self.eof() && self.peek() == '\n' {
362 self.consume_char();
363 }
364 true
365 }
366 Some('\n') => true,
367 _ => {
368 self.pos = start_pos;
369 false
370 }
371 }
372 }
373
374 pub fn peek_linebreak(&mut self) -> bool {
377 match self.peek() {
378 '\r' | '\n' => true,
379 _ => false,
380 }
381 }
382
383 #[inline]
391 pub fn consume_while<F: Fn(char) -> bool>(&mut self, test: F) -> String {
393 let start_pos = self.pos;
394 while !self.eof() && test(self.peek()) {
395 self.consume_char();
396 }
397 self.s[start_pos..self.pos].to_string()
398 }
399
400 #[inline]
404 pub fn peek(&self) -> char {
406 self.s[self.pos..].chars().next().unwrap()
407 }
408
409 #[inline]
411 pub fn assert_char(&self, c: char) -> ParsingResult<()> {
413 self.assert_not_eof()?;
414
415 let actual_c = self.peek();
416 if c == actual_c {
417 Ok(())
418 } else {
419 Err(ParsingError::new(format!(
420 "Expected {}, got {}",
421 c, actual_c
422 )))
423 }
424 }
425
426 #[inline]
428 pub fn assert_not_eof(&self) -> ParsingResult<()> {
430 if self.eof() {
431 Err(ParsingError::new("Reached EOF.".to_string()))
432 } else {
433 Ok(())
434 }
435 }
436
437 #[inline]
439 pub fn peek_to_end(&self) -> &str {
441 &self.s[self.pos..]
442 }
443
444 #[inline]
446 pub fn eof(&self) -> bool {
448 self.pos >= self.s.len()
449 }
450}
451
452pub struct Rfc5322Builder {
454 result: String,
455}
456
457impl Rfc5322Builder {
458 pub fn new() -> Rfc5322Builder {
460 Rfc5322Builder {
461 result: "".to_string(),
462 }
463 }
464
465 pub fn result(&self) -> &String {
466 &self.result
467 }
468
469 pub fn emit_raw(&mut self, s: &str) {
470 self.result.push_str(s);
471 }
472
473 pub fn emit_folded(&mut self, s: &str) {
474 let mut cur_len = 0;
475 let mut last_space = 0;
476 let mut last_cut = 0;
477
478 for (pos, c) in s.char_indices() {
479 match c {
480 ' ' => {
481 last_space = pos;
482 }
483 '\r' => {
484 cur_len = 0;
485 }
486 '\n' => {
487 cur_len = 0;
488 }
489 _ => {}
490 }
491
492 cur_len += 1;
493 if cur_len >= MIME_LINE_LENGTH && last_space > 0 {
495 self.emit_raw(&s[last_cut..last_space]);
498 self.emit_raw("\r\n\t");
500
501 cur_len = 0;
503 last_cut = last_space + s[last_space..].chars().next().unwrap().len_utf8();
504 last_space = 0;
505 }
506 }
507
508 self.emit_raw(&s[last_cut..]);
510 }
511}
512
513impl Default for Rfc5322Builder {
514 fn default() -> Self {
515 Rfc5322Builder::new()
516 }
517}
518
519#[cfg(test)]
520mod tests {
521 use super::*;
522
523 struct PhraseTestCase<'s> {
524 input: &'s str,
525 output: &'s str,
526 name: &'s str,
527 }
528
529 #[test]
530 fn test_parser() {
531 let mut parser = Rfc5322Parser::new("");
532 assert!(parser.consume_message().is_some());
533
534 let mut parser = Rfc5322Parser::new("\r\n");
535 assert!(parser.consume_message().is_some());
536
537 let mut parser = Rfc5322Parser::new("From: Garbage@-\r\n");
538 assert!(parser.consume_message().is_some());
539
540 let mut parser = Rfc5322Parser::new("From: Garbage@");
541 assert!(parser.consume_message().is_some());
542
543 let mut parser = Rfc5322Parser::new("From: Garnage@-");
544 assert!(parser.consume_message().is_some());
545 }
546
547 #[test]
548 fn test_consume_phrase() {
549 let tests = [
550 PhraseTestCase {
551 input: "\"test phrase\"", output: "test phrase",
552 name: "Simple quoted-string"
553 },
554 PhraseTestCase {
555 input: "\"test \\\"phrase\\\"\"", output: "test \"phrase\"",
556 name: "quoted-string with escape character"
557 },
558 PhraseTestCase {
559 input: "\"=?utf-8?q?encoded=20q-string?=\"", output: "encoded q-string",
560 name: "Encoded quoted-string"
561 },
562 PhraseTestCase {
563 input: "atom test", output: "atom test",
564 name: "Collection of atoms"
565 },
566 PhraseTestCase {
567 input: "=?utf-8?q?encoded=20atom?=", output: "encoded atom",
568 name: "Encoded atom"
569 },
570 PhraseTestCase {
571 input: "Mix of atoms \"and quoted strings\"", output: "Mix of atoms and quoted strings",
572 name: "Mix of atoms and quoted strings"
573 },
574 PhraseTestCase {
575 input: "=?utf-8?q?encoded=20atoms?= mixed with \"unencoded\" \"=?utf-8?b?YW5kIGVuY29kZWQgcS1zdHJpbmdz?=\"",
576 output: "encoded atoms mixed with unencoded and encoded q-strings",
577 name: "Mix of atoms, q-strings of differing encodings"
578 },
579 PhraseTestCase {
580 input: "\"John Smith\" <test@example.org>", output: "John Smith",
581 name: "Stop consuming phrase at \"special\" character",
582 }
583 ];
584
585 for t in tests.iter() {
586 let mut p = Rfc5322Parser::new(t.input);
587 let phrase = p.consume_phrase(false);
588 assert!(phrase.is_some(), format!("{} returned Some", t.name));
589 let test_name = format!("{} == {} for {}", phrase.clone().unwrap(), t.output, t.name);
590 assert!(phrase.unwrap() == t.output.to_string(), test_name);
591 }
592 }
593
594 struct MessageTestCase<'s> {
595 input: &'s str,
596 headers: Vec<(&'s str, &'s str)>,
597 body: &'s str,
598 }
599
600 #[test]
601 fn test_consume_message() {
602 let tests = vec![
603 MessageTestCase {
604 input: "From: \"Joe Blogs\" <joe@example.org>\r\n\r\nBody",
605 headers: vec![
606 ("From", "\"Joe Blogs\" <joe@example.org>"),
607 ],
608 body: "Body",
609 },
610 MessageTestCase {
612 input: "From: \"Joe Blogs\" <joe@example.org>\n\nBody",
613 headers: vec![
614 ("From", "\"Joe Blogs\" <joe@example.org>"),
615 ],
616 body: "Body",
617 },
618 MessageTestCase {
619 input: "From: \"Joe Blogs\" <joe@example.org>\r\n\r\nMultiline\r\nBody",
620 headers: vec![
621 ("From", "\"Joe Blogs\" <joe@example.org>"),
622 ],
623 body: "Multiline\r\nBody",
624 },
625 MessageTestCase {
626 input: "From: \"Joe Blogs\" <joe@example.org>\r\nTo: \"John Doe\" <john@example.org>\r\n\r\nMultiple headers",
627 headers: vec![
628 ("From", "\"Joe Blogs\" <joe@example.org>"),
629 ("To", "\"John Doe\" <john@example.org>"),
630 ],
631 body: "Multiple headers",
632 },
633 MessageTestCase {
634 input: "Folded-Header: Some content that is \r\n\t wrapped with a tab.\r\n\r\nFolding whitespace test",
635 headers: vec![
636 ("Folded-Header", "Some content that is wrapped with a tab."),
637 ],
638 body: "Folding whitespace test",
639 },
640 MessageTestCase {
641 input: "Folded-Header: Some content that is \r\n wrapped with spaces.\r\n\r\nFolding whitespace test",
642 headers: vec![
643 ("Folded-Header", "Some content that is wrapped with spaces."),
644 ],
645 body: "Folding whitespace test",
646 },
647 ];
648
649 for test in tests.iter() {
650 let mut p = Rfc5322Parser::new(test.input);
651 let message = p.consume_message();
652 match message {
653 Some((headers, body)) => {
654 assert_eq!(body, test.body.to_string());
655 for &(header_title, header_value) in test.headers.iter() {
656 let matching_headers = headers.find(&header_title.to_string()).unwrap();
657 assert!(
658 matching_headers
659 .iter()
660 .filter(|h| {
661 let val: String = h.get_value().unwrap();
662 val == header_value.to_string()
663 })
664 .count()
665 > 0
666 );
667 }
668 }
669 None => panic!("Failed to parse message"),
670 };
671 }
672 }
673
674 #[test]
675 fn test_builder_folding() {
676 struct BuildFoldTest<'s> {
677 input: &'s str,
678 expected: &'s str,
679 }
680
681 let tests = vec![
682 BuildFoldTest {
683 input: "A long line that should get folded on a space at some point around here, possibly at this point.",
684 expected: "A long line that should get folded on a space at some point around here,\r\n\
685 \tpossibly at this point.",
686 },
687 BuildFoldTest {
688 input: "A long line that should get folded on a space at some point around here, possibly at this point. And yet more content that will get folded onto another line.",
689 expected: "A long line that should get folded on a space at some point around here,\r\n\
690 \tpossibly at this point. And yet more content that will get folded onto another\r\n\
691 \tline.",
692 },
693 ];
694
695 for test in tests.into_iter() {
696 let mut gen = Rfc5322Builder::new();
697 gen.emit_folded(test.input);
698 assert_eq!(gen.result(), &test.expected.to_string());
699 }
700 }
701}