samotop_parser/
smtp.rs

1use crate::SmtpParserPeg;
2use samotop_core::{common::Error, smtp::command::*, smtp::StartTls, smtp::*};
3use std::net::{Ipv4Addr, Ipv6Addr};
4use std::str::FromStr;
5
6pub mod grammar {
7    pub(crate) use super::smtp_grammar::*;
8}
9
10impl Parser<StartTls> for SmtpParserPeg {
11    fn parse(&self, input: &[u8], state: &SmtpContext) -> ParseResult<StartTls> {
12        if input.is_empty() {
13            return Err(ParseError::Incomplete);
14        }
15        if let Some(mode) = state.session.mode {
16            return Err(ParseError::Mismatch(format!(
17                "Not parsing in {:?} mode",
18                mode
19            )));
20        }
21        let res = grammar::starttls(input);
22        trace!("Parsed {:?} from {:?}", res, String::from_utf8_lossy(input));
23        match res {
24            Err(e) => Err(ParseError::Failed(format!("Peg parser failed: {}", e))),
25            Ok((i, cmd)) => Ok((i, cmd)),
26        }
27    }
28}
29
30impl Parser<SmtpCommand> for SmtpParserPeg {
31    fn parse(&self, input: &[u8], state: &SmtpContext) -> ParseResult<SmtpCommand> {
32        if input.is_empty() {
33            return Err(ParseError::Incomplete);
34        }
35        if let Some(mode) = state.session.mode {
36            return Err(ParseError::Mismatch(format!(
37                "Not parsing in {:?} mode",
38                mode
39            )));
40        }
41        let res = grammar::command(input);
42        trace!("Parsed {:?} from {:?}", res, String::from_utf8_lossy(input));
43        match res {
44            Err(e) => Err(ParseError::Failed(format!("Peg parser failed: {}", e))),
45            Ok(Err(e)) => Err(e),
46            Ok(Ok((i, cmd))) => Ok((i, cmd)),
47        }
48    }
49}
50
51impl SmtpParserPeg {
52    pub fn forward_path(&self, input: &[u8]) -> ParseResult<SmtpPath> {
53        Self::map(input.len(), grammar::path_forward(input))
54    }
55    fn map<T, E>(len: usize, myres: std::result::Result<T, E>) -> ParseResult<T>
56    where
57        E: Into<Error>,
58    {
59        match myres {
60            Ok(item) => Ok((len, item)),
61            Err(e) => Err(ParseError::Mismatch(e.into().to_string())),
62        }
63    }
64}
65
66fn utf8(bytes: &[u8]) -> std::result::Result<&str, &'static str> {
67    std::str::from_utf8(bytes).map_err(|_e| "Invalid UTF-8")
68}
69fn utf8s(bytes: &[u8]) -> std::result::Result<String, &'static str> {
70    utf8(bytes).map(|s| s.to_string())
71}
72
73peg::parser! {
74    grammar smtp_grammar() for [u8] {
75
76        // https://github.com/kevinmehall/rust-peg/issues/216
77        rule i(literal: &'static str)
78            = input:$([_]*<{literal.len()}>)
79            {? if input.eq_ignore_ascii_case(literal.as_bytes()) { Ok(()) } else { Err(literal) } }
80
81        pub rule starttls() -> (usize, StartTls)
82            = i("starttls") CRLF() p:position!() rest:$([_]*)
83            { (p, StartTls) }
84
85        pub rule command() -> ParseResult< SmtpCommand>
86            = cmd:(valid_command() / invalid_command() / incomplete_command())
87            {cmd}
88
89        pub rule valid_command() -> ParseResult<SmtpCommand>
90            = cmd: (
91                cmd_helo() /
92                cmd_mail() /
93                cmd_send() /
94                cmd_soml() /
95                cmd_saml() /
96                cmd_rcpt() /
97                cmd_data() /
98                cmd_rset() /
99                cmd_quit() /
100                cmd_noop() /
101                cmd_turn() /
102                cmd_vrfy() /
103                cmd_expn() /
104                cmd_help()) p:position!() rest:$([_]*)
105            {Ok((p, cmd))}
106
107        rule incomplete_command() -> ParseResult<SmtpCommand>
108            = s:$(quiet!{ [_]+ } / expected!("incomplete input"))
109            {Err(ParseError::Incomplete)}
110
111        rule invalid_command() -> ParseResult<SmtpCommand>
112            = s:$(quiet!{ "\n" / (![b'\n'][_]) + "\n" } / expected!("invalid input"))
113            {ParseResult::Err(ParseError::Mismatch("PEG - unrecognized command".into()))}
114
115        pub rule cmd_quit() -> SmtpCommand
116            = i("quit") CRLF()
117            { SmtpCommand::Quit }
118
119        pub rule cmd_rset() -> SmtpCommand
120            = i("rset") CRLF()
121            { SmtpCommand::Rset }
122
123        pub rule cmd_data() -> SmtpCommand
124            = i("data") CRLF()
125            { SmtpCommand::Data }
126
127        pub rule cmd_turn() -> SmtpCommand
128            = i("turn") CRLF()
129            { SmtpCommand::Turn }
130
131        pub rule cmd_mail() -> SmtpCommand
132            = i("mail from:") p:path_reverse() s:strparam()* CRLF()
133            { SmtpCommand::Mail(SmtpMail::Mail(p, s)) }
134        pub rule cmd_send() ->SmtpCommand
135            = i("send from:") p:path_reverse() s:strparam()* CRLF()
136            { SmtpCommand::Mail(SmtpMail::Send(p, s)) }
137        pub rule cmd_soml() -> SmtpCommand
138            = i("soml from:") p:path_reverse() s:strparam()* CRLF()
139            { SmtpCommand::Mail(SmtpMail::Soml(p, s)) }
140        pub rule cmd_saml() -> SmtpCommand
141            = i("saml from:") p:path_reverse() s:strparam()* CRLF()
142            { SmtpCommand::Mail(SmtpMail::Saml(p, s)) }
143
144        pub rule cmd_rcpt() -> SmtpCommand
145            = i("rcpt to:") p:path_forward() s:strparam()* CRLF()
146            { SmtpCommand::Rcpt(SmtpRcpt(p, s)) }
147
148        pub rule cmd_helo() -> SmtpCommand
149            = verb:$(i("helo") / i("ehlo") / i("lhlo")) _ host:host() CRLF()
150            { SmtpCommand::Helo(SmtpHelo{verb:String::from_utf8_lossy(verb).to_uppercase(), host}) }
151
152        pub rule cmd_vrfy() -> SmtpCommand
153            = i("vrfy") s:strparam() CRLF()
154            { SmtpCommand::Vrfy(s) }
155
156        pub rule cmd_expn() -> SmtpCommand
157            = i("expn") s:strparam() CRLF()
158            { SmtpCommand::Expn(s) }
159
160        pub rule cmd_noop() -> SmtpCommand
161            = i("noop") s:strparam()* CRLF()
162            { SmtpCommand::Noop(s) }
163
164        pub rule cmd_help() -> SmtpCommand
165            = i("help") s:strparam()* CRLF()
166            { SmtpCommand::Help(s) }
167
168        pub rule path_forward() -> SmtpPath
169            = path_relay() / path_postmaster()
170        pub rule path_reverse() -> SmtpPath
171            = path_relay() / path_null()
172
173        rule path_relay() -> SmtpPath
174            = "<" relays:athost()* name:dot_string() "@" host:host() ">"
175            { SmtpPath::Mailbox{name,host,relays} }
176
177        rule path_postmaster() -> SmtpPath
178            = i("<postmaster>")
179            { SmtpPath::Postmaster }
180
181        rule path_null() -> SmtpPath
182            = "<>"
183            { SmtpPath::Null }
184
185        rule athost() -> SmtpHost
186            = "@" h:host() (&",@" "," / ":")
187            { h }
188
189        rule strparam() -> String
190            = _ s:string()
191            { s }
192
193        pub rule host() -> SmtpHost
194            = host_numeric() /
195            host_ipv4() /
196            host_ipv6() /
197            host_other() /
198            host_domain()
199
200        rule host_domain() -> SmtpHost
201            = s:$( label() ("." label())* )
202            {? utf8s(s).map(SmtpHost::Domain) }
203        rule domain() = quiet!{label() ("." label())*} / expected!("domain name")
204        rule label() = [b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9'] [b'-' | b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9']*
205
206        rule host_numeric() -> SmtpHost
207            = "#" s:$([b'0'..=b'9']+ / expected!("ipv4 number"))
208            { match u32::from_str(utf8(s).expect("ASCII")) {
209                Ok(ip) => SmtpHost::Ipv4(Ipv4Addr::from(ip)),
210                Err(e) => SmtpHost::Invalid{label:"numeric".to_string(), literal:utf8s(s).expect("ASCII")},
211            } }
212
213        rule host_ipv4() -> SmtpHost
214            = "[" s:$(ipv4addr()) "]"
215            { match Ipv4Addr::from_str(utf8(s).expect("ASCII")) {
216                Ok(ip) => SmtpHost::Ipv4(ip),
217                Err(e) => SmtpHost::Invalid{label:"ipv4".to_string(), literal:utf8s(s).expect("ASCII")},
218            } }
219        rule ipv4addr() = quiet!{ipv4part() "." ipv4part() "." ipv4part() "." ipv4part()} / expected!("ipv4 address")
220        rule ipv4part() = "25" [b'0'..=b'5'] /
221            "2" [b'0'..=b'4'] [b'0'..=b'9'] /
222            [b'0'..=b'1'] [b'0'..=b'9'] [b'0'..=b'9']? /
223            [b'0'..=b'9'] [b'0'..=b'9']?
224
225        rule host_ipv6() -> SmtpHost
226            = l:$(i("IPv6")) ":" s:$(ipv6addr())
227            { match Ipv6Addr::from_str(utf8(s).expect("ASCII")) {
228                Ok(ip) => SmtpHost::Ipv6(ip),
229                Err(e) => SmtpHost::Invalid{label:utf8s(l).expect("ASCII"), literal:utf8s(s).expect("ASCII")},
230            } }
231        rule ipv6addr() = quiet!{[b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b':' | b'.']+} / expected!("ipv6 address")
232
233        rule host_other() -> SmtpHost
234            = l:string() ":" s:string()
235            { SmtpHost::Other{label:l, literal:s} }
236
237        pub rule string() -> String
238            = str_quoted() / str_plain()
239
240        pub rule dot_string() -> String
241            = str_quoted() / str_dot_plain()
242
243        rule str_plain() -> String
244            = s:(chr()*)
245            {? utf8s(&s[..]) }
246
247        rule str_dot_plain() -> String
248            = s:(chr_dot()*)
249            {? utf8s(&s[..]) }
250
251        rule str_quoted() -> String
252            = [b'"'] s:(qchar()*) [b'"']
253            {? utf8s(&s[..]) }
254
255        rule qchar() -> u8
256            = qchar_regular() / char_special()
257
258        rule qchar_regular() -> u8
259            = b:$(quiet!{!("\"" / "\\" / "\r" / "\n") [_]} / expected!("quoted character"))
260            {debug_assert!(b.len()==1); b[0]}
261
262        rule chr() -> u8
263            = char_regular() / char_special()
264        rule chr_dot() -> u8
265            = char_regular() / char_special() / dot()
266
267        rule char_regular() -> u8
268            = b:$(quiet!{[b'-' | b'!' | b'#' | b'$' | b'%' | b'&' |
269                b'\'' | b'*' | b'+' | b'-' | b'`' | b'/' |
270                    b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' |
271                    b'=' | b'?' | b'~' | b'^' | b'_' | b'{' | b'}' | b'|' | 0x80..=0xFF
272            ]} / expected!("regular character"))
273            {debug_assert!(b.len()==1); b[0]}
274
275        rule char_special() -> u8
276            = ignore:("\\") b:$(quiet!{[_]} / expected!("special character"))
277            {debug_assert!(b.len()==1); b[0]}
278
279        rule dot() -> u8
280            = b:$(".")
281            {debug_assert!(b.len()==1); b[0]}
282
283        rule CRLF() = quiet!{"\r\n"} / expected!("{CRLF}")
284        rule _() = quiet!{" "} / expected!("{SP}")
285        rule __() = quiet!{_ / "\t"} / expected!("{WS}")
286    }
287}
288
289#[cfg(test)]
290mod tests {
291    use super::grammar::*;
292    use super::*;
293    use samotop_core::common::Result;
294
295    #[test]
296    fn command_parses_unknown_command() {
297        let result = command(b"sOmE other command\r\n");
298        match result {
299            Ok(Err(ParseError::Mismatch(_))) => { /*OK*/ }
300            otherwise => panic!("Expected mismatch, got {:?}", otherwise),
301        }
302    }
303
304    #[test]
305    fn cmd_parses_valid_mail_from() {
306        let result = command(b"mail from:<here.there@everywhere.net>\r\n")
307            .unwrap()
308            .unwrap();
309        assert_eq!(
310            result.1,
311            SmtpCommand::Mail(SmtpMail::Mail(
312                SmtpPath::Mailbox {
313                    name: "here.there".to_owned(),
314                    host: SmtpHost::Domain("everywhere.net".to_owned()),
315                    relays: vec![]
316                },
317                vec![]
318            ))
319        );
320    }
321
322    #[test]
323    fn host_parses_unknown_host() {
324        let result = host(b"who:what").unwrap();
325        assert_eq!(
326            result,
327            SmtpHost::Other {
328                label: "who".to_string(),
329                literal: "what".to_string(),
330            }
331        );
332    }
333
334    #[test]
335    fn cmd_parser_starttls() {
336        let result = starttls(b"STARTTLS\r\n").unwrap();
337        assert_eq!(result, (10, StartTls));
338    }
339
340    #[test]
341    fn command_parses_whitespace_line() {
342        let result = command(b"   \r\n\t\t\r\n");
343        assert!(result.is_err());
344    }
345
346    #[test]
347    fn command_parses_helo() {
348        let input = b"helo domain.com\r\n";
349        let cmd = command(input).unwrap().unwrap().1;
350        assert_eq!(cmd.verb(), "HELO");
351    }
352
353    #[test]
354    fn command_parses_data() -> Result<()> {
355        let input = "DATA\r\n ěšě\r\nš\nčš".as_bytes();
356        let cmd = command(input)??.1;
357        assert_eq!(cmd.verb(), "DATA");
358        Ok(())
359    }
360
361    #[test]
362    fn command_refuses_wrong_newline() {
363        let res = command(b"QUIT\nQUIT\r\nquit\r\n");
364        assert!(
365            res.is_err(),
366            "Single LF should be rejected as a command terminator in SMTP"
367        );
368    }
369
370    #[test]
371    fn command_parses_incomplete_command() {
372        let cmd = command(b"QUIT\r\nQUI").unwrap().unwrap();
373        assert_eq!(cmd, (6, SmtpCommand::Quit));
374    }
375
376    #[test]
377    fn command_parses_valid_utf8() {
378        let cmd = command("Help \"ěščř\"\r\n".as_bytes()).unwrap().unwrap();
379        assert_eq!(cmd, (17, SmtpCommand::Help(vec!["ěščř".to_owned()])));
380    }
381
382    #[test]
383    fn command_parses_invalid_utf8() {
384        let result = command(b"Help \"\x80\x80\"\r\n");
385        match result {
386            Ok(Err(ParseError::Mismatch(_))) => { /*OK*/ }
387            otherwise => panic!("Expected mismatch, got {:?}", otherwise),
388        }
389    }
390
391    #[test]
392    fn command_parses_helo_mail_rcpt_quit() {
393        let cmd = command(
394            concat!(
395                "helo domain.com\r\n",
396                "mail from:<me@there.net>\r\n",
397                "rcpt to:<@relay.net:him@unreachable.local>\r\n",
398                "quit\r\n"
399            )
400            .as_bytes(),
401        )
402        .unwrap()
403        .unwrap();
404        assert_eq!(
405            cmd,
406            (
407                17,
408                SmtpCommand::Helo(SmtpHelo {
409                    verb: "HELO".to_owned(),
410                    host: SmtpHost::Domain("domain.com".to_owned())
411                })
412            )
413        );
414    }
415
416    #[test]
417    fn string_parses_simple_ascii() {
418        let result = string(b"abc").unwrap();
419        assert_eq!(result, "abc".to_string());
420    }
421
422    #[test]
423    fn string_parses_quotes_ascii() {
424        let result = string(b"\"abc\"").unwrap();
425        assert_eq!(result, "abc".to_string());
426    }
427
428    #[test]
429    fn string_parses_quotes_ascii_with_quote() {
430        let result = string(b"\"a\\\"bc\"").unwrap();
431        assert_eq!(result, "a\"bc".to_string());
432    }
433
434    #[test]
435    fn string_parses_quoted_utf8() {
436        let result = string("\"ščřž\"".as_bytes()).unwrap();
437        assert_eq!(result, "ščřž".to_string());
438    }
439
440    #[test]
441    fn string_parses_simple_utf8() {
442        let result = string("ščřž".as_bytes()).unwrap();
443        assert_eq!(result, "ščřž".to_string());
444    }
445
446    #[test]
447    fn string_fails_on_invalid_utf8() {
448        let result = string(b"\"\x80\x80\"");
449        assert!(result.is_err());
450    }
451}