samotop_parser/
data.rs

1use crate::SmtpParserPeg;
2use samotop_core::smtp::{command::MailBody, *};
3
4impl Parser<MailBody<Vec<u8>>> for SmtpParserPeg {
5    fn parse(&self, input: &[u8], state: &SmtpContext) -> ParseResult<MailBody<Vec<u8>>> {
6        let crlf = match state.session.mode {
7            Some(SmtpSession::DATA_MODE) => true,
8            Some(SmtpSession::DATA_PARTIAL_MODE) => false,
9            mode => {
10                return Err(ParseError::Mismatch(format!(
11                    "Not matching data stream in {:?} mode",
12                    mode
13                )))
14            }
15        };
16        let res = map_cmd(grammar::data(input, crlf));
17        trace!("Parsed {:?} from {:?}", res, String::from_utf8_lossy(input));
18        res
19    }
20}
21
22fn map_cmd(
23    res: std::result::Result<ParseResult<Vec<u8>>, peg::error::ParseError<usize>>,
24) -> ParseResult<MailBody<Vec<u8>>> {
25    match res {
26        Ok(Ok((i, data))) if data.is_empty() => Ok((i, MailBody::End)),
27        Ok(Ok((i, data))) => Ok((
28            i,
29            MailBody::Chunk {
30                ends_with_new_line: data.ends_with(b"\r\n"),
31                data,
32            },
33        )),
34        Ok(Err(e)) => Err(e),
35        Err(e) => Err(ParseError::Failed(format!("Peg parser failed: {}", e))),
36    }
37}
38
39fn utf8(bytes: &[u8]) -> std::result::Result<&str, &'static str> {
40    std::str::from_utf8(bytes).map_err(|_e| "Invalid UTF-8")
41}
42fn utf8s(bytes: &[u8]) -> std::result::Result<String, &'static str> {
43    utf8(bytes).map(|s| s.to_string())
44}
45
46peg::parser! {
47    /// The parser takes advantage of keeping external state of reaching CR LF
48    /// This state is passed as an argument. Caller detects CR LF end from output.
49    /// The parser treats CR LF before final dot as part of the data
50    ///    as otherwise the scheme is terribly ambiguous and complex.
51    grammar grammar() for [u8] {
52
53        pub rule data(crlf:bool) -> ParseResult< Vec<u8>>
54            = complete(crlf) / incomplete(crlf)
55
56        rule complete(crlf:bool) -> ParseResult< Vec<u8>>
57            = s:( eof(crlf) / data_part(crlf) ) p:position!() rest:$([_]*)
58            {Ok((p,s))}
59
60        rule incomplete(crlf:bool) -> ParseResult< Vec<u8>>
61            = rest:$([_]*)
62            {Err(ParseError::Incomplete)}
63
64        rule eof(crlf:bool) ->  Vec<u8>
65            =  b:$(".\r\n")
66            { if crlf {vec![]} else {b.to_vec()} }
67
68        rule data_part(crlf:bool) ->  Vec<u8>
69            = s: ( escaped(crlf) / regular() )
70            {s.into()}
71
72        rule escaped(crlf:bool) -> String    = "." r:$(regular() / ".")
73            {
74                ?match (crlf, utf8s(r)) {
75                    (_, Err(e)) => Err(e),
76                    (true, Ok(r)) => Ok(r),
77                    (false, Ok(r)) => Ok(format!(".{}",r)),
78                }
79            }
80        rule regular() -> String = s:$( ( chr() / eols() )+ ) {?utf8s(s)}
81
82        rule eols() = quiet!{ "\r"+ !("\r")&[_] / "\n" } / expected!("predictable new line chars CR LF")
83        rule chr() = quiet!{![b'\r'|b'\n'|b'.'] [_]} / expected!("any char except CR LF and .")
84    }
85}
86
87#[cfg(test)]
88mod without_crlf {
89
90    use super::*;
91    use samotop_core::common::Result;
92    const CRLF: bool = false;
93    #[test]
94    fn plain_chunk() -> Result<()> {
95        match grammar::data(b"abcd", CRLF)? {
96            Ok((4, b)) => assert_eq!(b, b"abcd".to_vec()),
97            otherwise => panic!("Expected body chunk, got {:?}", otherwise),
98        }
99        Ok(())
100    }
101
102    #[test]
103    fn crlf_chunk() -> Result<()> {
104        match grammar::data(b"abcd\r\nxyz", CRLF)? {
105            Ok((9, b)) => assert_eq!(b, b"abcd\r\nxyz".to_vec()),
106            otherwise => panic!("Expected body chunk, got {:?}", otherwise),
107        }
108        Ok(())
109    }
110
111    #[test]
112    fn lf_chunk() -> Result<()> {
113        match grammar::data(b"abcd\nxyz", CRLF)? {
114            Ok((8, b)) => assert_eq!(b, b"abcd\nxyz".to_vec()),
115            otherwise => panic!("Expected body chunk, got {:?}", otherwise),
116        }
117        Ok(())
118    }
119
120    #[test]
121    fn plain_eol() -> Result<()> {
122        match grammar::data(b"foo\r\n", CRLF)? {
123            Ok((5, b)) if b == b"foo\r\n".to_vec() => {}
124            otherwise => panic!("Expected foo, got {:?}", otherwise),
125        }
126        Ok(())
127    }
128
129    #[test]
130    fn cr_chunk() -> Result<()> {
131        match grammar::data(b"abcd\rxyz", CRLF)? {
132            Ok((8, b)) => assert_eq!(b, b"abcd\rxyz".to_vec()),
133            otherwise => panic!("Expected body chunk, got {:?}", otherwise),
134        }
135        Ok(())
136    }
137
138    #[test]
139    fn mid_way_dot() -> Result<()> {
140        match grammar::data(b".\r\n", CRLF)? {
141            Ok((3, b)) => assert_eq!(b, b".\r\n".to_vec()),
142            otherwise => panic!("Expected dot, got {:?}", otherwise),
143        }
144        Ok(())
145    }
146
147    #[test]
148    fn midway_dot_foo() -> Result<()> {
149        match grammar::data(b".foo", CRLF)? {
150            Ok((4, b)) if b == b".foo".to_vec() => {}
151            otherwise => panic!("Expected dot foo, got {:?}", otherwise),
152        }
153        Ok(())
154    }
155
156    #[test]
157    fn midway_dot_foo_crlf() -> Result<()> {
158        match grammar::data(b".foo\r\n", CRLF)? {
159            Ok((6, b)) if b == b".foo\r\n".to_vec() => {}
160            otherwise => panic!("Expected dot foo crlf, got {:?}", otherwise),
161        }
162        Ok(())
163    }
164
165    #[test]
166    fn mid_way_lflf() -> Result<()> {
167        match grammar::data(b"\n\nfoo", CRLF)? {
168            Ok((5, b)) => assert_eq!(b, b"\n\nfoo".to_vec()),
169            otherwise => panic!("Expected chunk, got {:?}", otherwise),
170        }
171        Ok(())
172    }
173    #[test]
174    fn complex() {
175        let input = b"\r\n..\r\nxoxo\r\n.\r\n";
176        let (len, b) = grammar::data(input, CRLF).unwrap().unwrap();
177        let input = &input[len..];
178        assert_eq!(b, b"\r\n".to_vec());
179        let (len, b) = grammar::data(input, b.ends_with(b"\r\n")).unwrap().unwrap();
180        let input = &input[len..];
181        assert_eq!(b, b".".to_vec());
182        let (len, b) = grammar::data(input, b.ends_with(b"\r\n")).unwrap().unwrap();
183        let input = &input[len..];
184        assert_eq!(b, b"\r\nxoxo\r\n".to_vec());
185        let (len, b) = grammar::data(input, b.ends_with(b"\r\n")).unwrap().unwrap();
186        let input = &input[len..];
187        assert_eq!(b, b"".to_vec());
188        assert!(input.is_empty());
189    }
190    #[test]
191    fn full_dot_stop() -> Result<()> {
192        match grammar::data(b"\r\n.\r\n", CRLF)? {
193            Ok((2, b)) => assert_eq!(b, b"\r\n".to_vec()),
194            otherwise => panic!("Expected crlf, got {:?}", otherwise),
195        }
196        Ok(())
197    }
198    #[test]
199    fn mid_way_dot_stop() -> Result<()> {
200        match grammar::data(b".\r\n", CRLF)? {
201            Ok((3, b)) => assert_eq!(b, b".\r\n".to_vec()),
202            otherwise => panic!("Expected chunk, got {:?}", otherwise),
203        }
204        Ok(())
205    }
206    #[test]
207    fn get_crlf() -> Result<()> {
208        match grammar::data(b"\r\n", CRLF)? {
209            Ok((2, b)) => assert_eq!(b, b"\r\n".to_vec()),
210            otherwise => panic!("Expected crlf, got {:?}", otherwise),
211        }
212        Ok(())
213    }
214    #[test]
215    fn get_crlf_dot() -> Result<()> {
216        match grammar::data(b"\r\n.", CRLF)? {
217            Ok((2, b)) => assert_eq!(b.as_slice(), b"\r\n"),
218            otherwise => panic!("Expected crlf, got {:?}", otherwise),
219        }
220        Ok(())
221    }
222    #[test]
223    fn incomplete_cr() -> Result<()> {
224        match grammar::data(b"\r", CRLF)? {
225            Err(ParseError::Incomplete) => {}
226            otherwise => panic!("Expected incomplete, got {:?}", otherwise),
227        }
228        Ok(())
229    }
230    #[test]
231    fn incomplete_empty() -> Result<()> {
232        match grammar::data(b"", CRLF)? {
233            Err(ParseError::Incomplete) => {}
234            otherwise => panic!("Expected incomplete, got {:?}", otherwise),
235        }
236        Ok(())
237    }
238}
239
240#[cfg(test)]
241mod after_crlf {
242    use super::*;
243    use samotop_core::common::Result;
244    const CRLF: bool = true;
245    #[test]
246    fn complex() {
247        let input = b"\r\n..\r\nxoxo\r\n.\r\n";
248        let (len, b) = grammar::data(input, CRLF).unwrap().unwrap();
249        let input = &input[len..];
250        assert_eq!(b, b"\r\n".to_vec());
251        let (len, b) = grammar::data(input, b.ends_with(b"\r\n")).unwrap().unwrap();
252        let input = &input[len..];
253        assert_eq!(b, b".".to_vec());
254        let (len, b) = grammar::data(input, b.ends_with(b"\r\n")).unwrap().unwrap();
255        let input = &input[len..];
256        assert_eq!(b, b"\r\nxoxo\r\n".to_vec());
257        assert_eq!(input, b".\r\n".to_vec());
258        let (len, b) = grammar::data(input, true).unwrap().unwrap();
259        let input = &input[len..];
260        assert_eq!(b, b"".to_vec(), "input: {:?}", input);
261        assert!(input.is_empty());
262    }
263
264    #[test]
265    fn plain_chunk() -> Result<()> {
266        match grammar::data(b"abcd", CRLF)? {
267            Ok((4, b)) => assert_eq!(b, b"abcd".to_vec()),
268            otherwise => panic!("Expected body chunk, got {:?}", otherwise),
269        }
270        Ok(())
271    }
272
273    #[test]
274    fn ignores_command() -> Result<()> {
275        match grammar::data(b".\r\nquit\r\n\r\n", CRLF)? {
276            Ok((3, b)) => assert_eq!(b, b"".to_vec()),
277            otherwise => panic!("Expected end, got {:?}", otherwise),
278        }
279        Ok(())
280    }
281
282    #[test]
283    fn crlf_chunk() -> Result<()> {
284        match grammar::data(b"abcd\r\nxyz", CRLF)? {
285            Ok((9, b)) => assert_eq!(b, b"abcd\r\nxyz".to_vec()),
286            otherwise => panic!("Expected body chunk, got {:?}", otherwise),
287        }
288        Ok(())
289    }
290
291    #[test]
292    fn lf_chunk() -> Result<()> {
293        match grammar::data(b"abcd\nxyz", CRLF)? {
294            Ok((8, b)) => assert_eq!(b, b"abcd\nxyz".to_vec()),
295            otherwise => panic!("Expected body chunk, got {:?}", otherwise),
296        }
297        Ok(())
298    }
299
300    #[test]
301    fn plain_eol() -> Result<()> {
302        match grammar::data(b"foo\r\n", CRLF)? {
303            Ok((5, b)) if b == b"foo\r\n".to_vec() => {}
304            otherwise => panic!("Expected foo crlf, got {:?}", otherwise),
305        }
306        Ok(())
307    }
308
309    #[test]
310    fn cr_chunk() -> Result<()> {
311        match grammar::data(b"abcd\rxyz", CRLF)? {
312            Ok((8, b)) => assert_eq!(b, b"abcd\rxyz".to_vec()),
313            otherwise => panic!("Expected body chunk, got {:?}", otherwise),
314        }
315        Ok(())
316    }
317
318    #[test]
319    fn dot_stop() -> Result<()> {
320        match grammar::data(b".\r\n", CRLF)? {
321            Ok((3, b)) => {
322                assert!(b.is_empty());
323                assert_eq!(b, b"");
324            }
325            otherwise => panic!("Expected end, got {:?}", otherwise),
326        }
327        Ok(())
328    }
329    #[test]
330    fn dot_stop_full() -> Result<()> {
331        match grammar::data(b"\r\n.\r\n", CRLF)? {
332            Ok((2, b)) => assert_eq!(b, b"\r\n".to_vec()),
333            otherwise => panic!("Expected crlf, got {:?}", otherwise),
334        }
335        Ok(())
336    }
337
338    #[test]
339    fn dot_escape() -> Result<()> {
340        match grammar::data(b".foo", CRLF)? {
341            Ok((4, b)) if b == b"foo".to_vec() => {}
342            otherwise => panic!("Expected foo, got {:?}", otherwise),
343        }
344        Ok(())
345    }
346
347    #[test]
348    fn dot_escape_crlf() -> Result<()> {
349        match grammar::data(b".foo\r\n", CRLF)? {
350            Ok((6, b)) if b == b"foo\r\n".to_vec() => {}
351            otherwise => panic!("Expected foo crlf, got {:?}", otherwise),
352        }
353        Ok(())
354    }
355
356    #[test]
357    fn trailing_lf() -> Result<()> {
358        match grammar::data(b"\n\r\n.\r\n", CRLF)? {
359            Ok((3, b)) if b == b"\n\r\n".to_vec() => {}
360            otherwise => panic!("Expected lf, got {:?}", otherwise),
361        }
362        Ok(())
363    }
364    #[test]
365    fn trailing_cr() -> Result<()> {
366        match grammar::data(b"\r\r\n.\r\n", CRLF)? {
367            Ok((3, b)) if b == b"\r\r\n".to_vec() => {}
368            otherwise => panic!("Expected cr, got {:?}", otherwise),
369        }
370        Ok(())
371    }
372    #[test]
373    fn get_crlf() -> Result<()> {
374        match grammar::data(b"\r\n", CRLF)? {
375            Ok((2, b)) if b == b"\r\n".to_vec() => {}
376            otherwise => panic!("Expected crlf, got {:?}", otherwise),
377        }
378        Ok(())
379    }
380    #[test]
381    fn get_crlf_dot() -> Result<()> {
382        match grammar::data(b"\r\n.", CRLF)? {
383            Ok((2, b)) if b == b"\r\n".to_vec() => {}
384            otherwise => panic!("Expected crlf, got {:?}", otherwise),
385        }
386        Ok(())
387    }
388    #[test]
389    fn incomplete_cr() -> Result<()> {
390        match grammar::data(b"\r", CRLF)? {
391            Err(ParseError::Incomplete) => {}
392            otherwise => panic!("Expected incomplete, got {:?}", otherwise),
393        }
394        Ok(())
395    }
396    #[test]
397    fn incomplete_dot() -> Result<()> {
398        match grammar::data(b".", CRLF)? {
399            Err(ParseError::Incomplete) => {}
400            otherwise => panic!("Expected incomplete, got {:?}", otherwise),
401        }
402        Ok(())
403    }
404    #[test]
405    fn incomplete_dot_cr() -> Result<()> {
406        match grammar::data(b".\r", CRLF)? {
407            Err(ParseError::Incomplete) => {}
408            otherwise => panic!("Expected incomplete, got {:?}", otherwise),
409        }
410        Ok(())
411    }
412    #[test]
413    fn incomplete_empty() -> Result<()> {
414        match grammar::data(b"", CRLF)? {
415            Err(ParseError::Incomplete) => {}
416            otherwise => panic!("Expected incomplete, got {:?}", otherwise),
417        }
418        Ok(())
419    }
420}