use crate::config::Strictness;
use crate::error::{Error, ErrorKind};
const MAX_RECURSION_DEPTH: usize = 128;
#[derive(Debug, Clone)]
pub(crate) struct Parsed<'a> {
pub input: &'a str,
pub display_name: Option<Span>,
pub local_part: Span,
pub domain: Span,
#[allow(dead_code)]
pub comments: Vec<Span>,
pub local_part_clean: Option<String>,
pub domain_clean: Option<String>,
}
impl<'a> Parsed<'a> {
pub fn local_part_str(&self) -> &str {
self.local_part_clean
.as_deref()
.unwrap_or_else(|| self.local_part.as_str(self.input))
}
pub fn domain_str(&self) -> &str {
self.domain_clean
.as_deref()
.unwrap_or_else(|| self.domain.as_str(self.input))
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct Span {
pub start: usize,
pub end: usize,
}
impl Span {
fn new(start: usize, end: usize) -> Self {
Self { start, end }
}
pub fn as_str<'a>(&self, input: &'a str) -> &'a str {
&input[self.start..self.end]
}
}
struct Parser<'a> {
input: &'a str,
pos: usize,
comments: Vec<Span>,
}
impl<'a> Parser<'a> {
fn new(input: &'a str) -> Self {
Self {
input,
pos: 0,
comments: Vec::new(),
}
}
fn remaining(&self) -> &'a str {
&self.input[self.pos..]
}
fn peek(&self) -> Option<char> {
self.remaining().chars().next()
}
fn advance(&mut self) -> Option<char> {
let ch = self.peek()?;
self.pos += ch.len_utf8();
Some(ch)
}
fn eat(&mut self, expected: char) -> bool {
if self.peek() == Some(expected) {
self.pos += expected.len_utf8();
true
} else {
false
}
}
fn at_end(&self) -> bool {
self.pos >= self.input.len()
}
fn error(&self, kind: ErrorKind) -> Error {
Error::new(kind, self.pos)
}
fn save(&self) -> usize {
self.pos
}
fn restore(&mut self, pos: usize) {
self.pos = pos;
}
}
pub(crate) fn parse(
input: &str,
strictness: Strictness,
allow_display_name: bool,
allow_domain_literal: bool,
) -> Result<Parsed<'_>, Error> {
if input.is_empty() {
return Err(Error::new(ErrorKind::Empty, 0));
}
let mut parser = Parser::new(input);
let allow_obs = matches!(strictness, Strictness::Lax);
if !matches!(strictness, Strictness::Strict) {
skip_cfws(&mut parser, 0);
}
let display_name = if allow_display_name {
try_parse_display_name(&mut parser, allow_obs)
} else {
None
};
let is_angle = display_name.is_some() || parser.peek() == Some('<');
if is_angle {
parser.eat('<');
}
if !matches!(strictness, Strictness::Strict) {
skip_cfws(&mut parser, 0);
}
let (local_part, local_part_clean) = parse_local_part(&mut parser, strictness)?;
if !matches!(strictness, Strictness::Strict) {
skip_cfws(&mut parser, 0);
}
if !parser.eat('@') {
return Err(parser.error(ErrorKind::MissingAtSign));
}
if !matches!(strictness, Strictness::Strict) {
skip_cfws(&mut parser, 0);
}
let (domain, domain_clean) = parse_domain(&mut parser, strictness, allow_domain_literal)?;
if is_angle {
if !matches!(strictness, Strictness::Strict) {
skip_cfws(&mut parser, 0);
}
if !parser.eat('>') {
return Err(parser.error(ErrorKind::Unexpected {
ch: parser.peek().unwrap_or('\0'),
}));
}
}
if !matches!(strictness, Strictness::Strict) {
skip_cfws(&mut parser, 0);
}
if !parser.at_end() {
let ch = parser.peek().unwrap_or('\0');
return Err(parser.error(ErrorKind::Unexpected { ch }));
}
Ok(Parsed {
input,
display_name,
local_part,
domain,
comments: parser.comments,
local_part_clean,
domain_clean,
})
}
fn try_parse_display_name(parser: &mut Parser<'_>, allow_obs: bool) -> Option<Span> {
let save = parser.save();
if parser.peek() == Some('"') {
let start = parser.pos;
if parse_quoted_string(parser, allow_obs).is_err() {
parser.restore(save);
return None;
}
let end = parser.pos;
skip_cfws(parser, 0);
if parser.peek() == Some('<') {
return Some(Span::new(start + 1, end - 1));
}
parser.restore(save);
return None;
}
let start = parser.pos;
let mut found_content = false;
loop {
match parser.peek() {
Some('<') if found_content => {
let name = &parser.input[start..parser.pos];
let trimmed_end = start + name.trim_end().len();
return Some(Span::new(start, trimmed_end));
}
Some(ch) if ch == '@' || ch == '>' => {
parser.restore(save);
return None;
}
Some(ch) if ch < '\u{20}' && ch != '\t' => {
parser.restore(save);
return None;
}
Some(_) => {
found_content = true;
parser.advance();
}
None => {
parser.restore(save);
return None;
}
}
}
}
fn parse_local_part(
parser: &mut Parser<'_>,
strictness: Strictness,
) -> Result<(Span, Option<String>), Error> {
let start = parser.pos;
let allow_obs = matches!(strictness, Strictness::Lax);
if parser.peek() == Some('"') {
if matches!(strictness, Strictness::Strict) {
return Err(parser.error(ErrorKind::InvalidLocalPartChar { ch: '"' }));
}
if !allow_obs {
parse_quoted_string(parser, false)?;
return Ok((Span::new(start, parser.pos), None));
}
}
let clean = parse_dot_atom_local(parser, allow_obs)?;
Ok((Span::new(start, parser.pos), clean))
}
fn parse_dot_atom_local(parser: &mut Parser<'_>, allow_obs: bool) -> Result<Option<String>, Error> {
if !allow_obs {
if !eat_atext_run(parser) {
return Err(match parser.peek() {
Some(ch) if ch != '@' => parser.error(ErrorKind::InvalidLocalPartChar { ch }),
_ => parser.error(ErrorKind::EmptyLocalPart),
});
}
loop {
let save = parser.save();
if !parser.eat('.') {
parser.restore(save);
break;
}
if !eat_atext_run(parser) {
return Err(parser.error(ErrorKind::EmptyLocalPart));
}
}
return Ok(None);
}
let mut clean: Option<String> = None;
let outer_start = parser.pos;
if !eat_atext_run(parser) && !try_quoted_string(parser, allow_obs) {
return Err(match parser.peek() {
Some(ch) if ch != '@' => parser.error(ErrorKind::InvalidLocalPartChar { ch }),
_ => parser.error(ErrorKind::EmptyLocalPart),
});
}
loop {
let last_clean_end = parser.pos;
let save = parser.save();
let comments_len = parser.comments.len();
skip_cfws(parser, 0);
let had_cfws_before_dot = parser.pos > last_clean_end;
if !parser.eat('.') {
parser.restore(save);
parser.comments.truncate(comments_len);
break;
}
if had_cfws_before_dot && clean.is_none() {
let mut s = String::with_capacity(last_clean_end - outer_start);
s.push_str(&parser.input[outer_start..last_clean_end]);
clean = Some(s);
}
skip_cfws(parser, 0);
if clean.is_none() && parser.pos > last_clean_end + 1 {
let mut s = String::with_capacity(last_clean_end - outer_start);
s.push_str(&parser.input[outer_start..last_clean_end]);
clean = Some(s);
}
let atom_start = parser.pos;
if !eat_atext_run(parser) && !try_quoted_string(parser, allow_obs) {
return Err(parser.error(ErrorKind::EmptyLocalPart));
}
if let Some(ref mut s) = clean {
s.push('.');
s.push_str(&parser.input[atom_start..parser.pos]);
}
}
Ok(clean)
}
fn eat_atext_run(parser: &mut Parser<'_>) -> bool {
let start = parser.pos;
while let Some(ch) = parser.peek() {
if is_atext(ch) {
parser.advance();
} else {
break;
}
}
parser.pos > start
}
fn parse_quoted_string(parser: &mut Parser<'_>, allow_obs: bool) -> Result<(), Error> {
if !parser.eat('"') {
return Err(parser.error(ErrorKind::UnterminatedQuotedString));
}
loop {
match parser.peek() {
Some('"') => {
parser.advance();
return Ok(());
}
Some('\\') => {
parser.advance();
match parser.advance() {
Some(ch) if is_quoted_pair_char(ch, allow_obs) => {}
_ => return Err(parser.error(ErrorKind::InvalidQuotedPair)),
}
}
Some(ch) if is_qtext(ch, allow_obs) => {
parser.advance();
}
Some(ch) if is_wsp(ch) || ch == '\r' => {
if !try_eat_fws(parser) {
return Err(parser.error(ErrorKind::InvalidLocalPartChar { ch: '\r' }));
}
}
None => return Err(parser.error(ErrorKind::UnterminatedQuotedString)),
Some(ch) => {
return Err(parser.error(ErrorKind::InvalidLocalPartChar { ch }));
}
}
}
}
fn try_quoted_string(parser: &mut Parser<'_>, allow_obs: bool) -> bool {
if parser.peek() != Some('"') {
return false;
}
let save = parser.save();
if parse_quoted_string(parser, allow_obs).is_ok() {
true
} else {
parser.restore(save);
false
}
}
fn parse_domain(
parser: &mut Parser<'_>,
strictness: Strictness,
allow_domain_literal: bool,
) -> Result<(Span, Option<String>), Error> {
let start = parser.pos;
if parser.peek() == Some('[') {
if !allow_domain_literal {
return Err(parser.error(ErrorKind::InvalidDomainChar { ch: '[' }));
}
parse_domain_literal(parser)?;
return Ok((Span::new(start, parser.pos), None));
}
let allow_obs = matches!(strictness, Strictness::Lax);
let clean = parse_dot_atom_domain(parser, allow_obs)?;
Ok((Span::new(start, parser.pos), clean))
}
fn parse_dot_atom_domain(
parser: &mut Parser<'_>,
allow_obs: bool,
) -> Result<Option<String>, Error> {
if !allow_obs {
parse_domain_label(parser)?;
loop {
let save = parser.save();
if !parser.eat('.') {
parser.restore(save);
break;
}
parse_domain_label(parser)?;
}
return Ok(None);
}
let mut clean: Option<String> = None;
let outer_start = parser.pos;
parse_domain_label(parser)?;
loop {
let last_clean_end = parser.pos;
let save = parser.save();
let comments_len = parser.comments.len();
skip_cfws(parser, 0);
let had_cfws_before_dot = parser.pos > last_clean_end;
if !parser.eat('.') {
parser.restore(save);
parser.comments.truncate(comments_len);
break;
}
if had_cfws_before_dot && clean.is_none() {
let mut s = String::with_capacity(last_clean_end - outer_start);
s.push_str(&parser.input[outer_start..last_clean_end]);
clean = Some(s);
}
skip_cfws(parser, 0);
if clean.is_none() && parser.pos > last_clean_end + 1 {
let mut s = String::with_capacity(last_clean_end - outer_start);
s.push_str(&parser.input[outer_start..last_clean_end]);
clean = Some(s);
}
let label_start = parser.pos;
parse_domain_label(parser)?;
if let Some(ref mut s) = clean {
s.push('.');
s.push_str(&parser.input[label_start..parser.pos]);
}
}
Ok(clean)
}
fn parse_domain_label(parser: &mut Parser<'_>) -> Result<(), Error> {
match parser.peek() {
Some(ch) if ch.is_ascii_alphanumeric() || is_utf8_non_ascii(ch) => {
parser.advance();
}
Some('-') => return Err(parser.error(ErrorKind::DomainLabelHyphen)),
_ => return Err(parser.error(ErrorKind::EmptyDomain)),
}
let mut last_was_hyphen = false;
while let Some(ch) = parser.peek() {
if ch.is_ascii_alphanumeric() || is_utf8_non_ascii(ch) {
last_was_hyphen = false;
parser.advance();
} else if ch == '-' {
last_was_hyphen = true;
parser.advance();
} else {
break;
}
}
if last_was_hyphen {
return Err(parser.error(ErrorKind::DomainLabelHyphen));
}
Ok(())
}
fn parse_domain_literal(parser: &mut Parser<'_>) -> Result<(), Error> {
let open = parser.pos;
if !parser.eat('[') {
return Err(parser.error(ErrorKind::UnterminatedDomainLiteral));
}
let content_start = parser.pos;
loop {
match parser.peek() {
Some(']') => {
let content = &parser.input[content_start..parser.pos];
parser.advance(); if is_address_literal(content) {
return Ok(());
}
return Err(Error::new(ErrorKind::InvalidAddressLiteral, open));
}
Some('\\') => {
parser.advance();
if parser.advance().is_none() {
return Err(parser.error(ErrorKind::UnterminatedDomainLiteral));
}
}
None => return Err(parser.error(ErrorKind::UnterminatedDomainLiteral)),
Some(_) => {
parser.advance();
}
}
}
}
fn is_address_literal(content: &str) -> bool {
use core::net::{Ipv4Addr, Ipv6Addr};
if content
.get(..5)
.is_some_and(|tag| tag.eq_ignore_ascii_case("IPv6:"))
{
return content[5..].parse::<Ipv6Addr>().is_ok();
}
content.parse::<Ipv4Addr>().is_ok()
}
fn try_eat_fws(parser: &mut Parser<'_>) -> bool {
match parser.peek() {
Some(ch) if is_wsp(ch) => {
parser.advance();
while let Some(ch) = parser.peek() {
if is_wsp(ch) {
parser.advance();
} else {
break;
}
}
true
}
Some('\r') => {
let pos = parser.pos;
let bytes = parser.input.as_bytes();
if pos + 2 < bytes.len()
&& bytes[pos] == b'\r'
&& bytes[pos + 1] == b'\n'
&& (bytes[pos + 2] == b' ' || bytes[pos + 2] == b'\t')
{
parser.advance(); parser.advance(); while let Some(ch) = parser.peek() {
if is_wsp(ch) {
parser.advance();
} else {
break;
}
}
true
} else {
false
}
}
_ => false,
}
}
fn skip_cfws(parser: &mut Parser<'_>, depth: usize) {
loop {
loop {
match parser.peek() {
Some(ch) if is_wsp(ch) => {
parser.advance();
}
Some('\r') => {
let pos = parser.pos;
let bytes = parser.input.as_bytes();
if pos + 2 < bytes.len()
&& bytes[pos] == b'\r'
&& bytes[pos + 1] == b'\n'
&& (bytes[pos + 2] == b' ' || bytes[pos + 2] == b'\t')
{
parser.advance(); parser.advance(); while let Some(wch) = parser.peek() {
if is_wsp(wch) {
parser.advance();
} else {
break;
}
}
} else {
break;
}
}
Some('\n') => {
break;
}
_ => break,
}
}
if parser.peek() == Some('(') {
let comment_start = parser.pos;
match parse_comment(parser, depth) {
Ok(()) => {
parser.comments.push(Span::new(comment_start, parser.pos));
continue;
}
Err(_) => {
parser.pos = comment_start;
break;
}
}
}
break;
}
}
fn parse_comment(parser: &mut Parser<'_>, depth: usize) -> Result<(), Error> {
if depth >= MAX_RECURSION_DEPTH || !parser.eat('(') {
return Err(parser.error(ErrorKind::UnterminatedComment));
}
loop {
match parser.peek() {
Some(')') => {
parser.advance();
return Ok(());
}
Some('(') => {
parse_comment(parser, depth + 1)?;
}
Some('\\') => {
parser.advance();
if parser.advance().is_none() {
return Err(parser.error(ErrorKind::UnterminatedComment));
}
}
Some(ch) if is_ctext(ch) || is_wsp(ch) => {
parser.advance();
}
Some('\r') | Some('\n') => {
if !try_eat_fws(parser) {
return Err(parser.error(ErrorKind::UnterminatedComment));
}
}
None => return Err(parser.error(ErrorKind::UnterminatedComment)),
Some(_) => {
parser.advance(); }
}
}
}
fn is_atext(ch: char) -> bool {
ch.is_ascii_alphanumeric()
|| is_utf8_non_ascii(ch)
|| matches!(
ch,
'!' | '#'
| '$'
| '%'
| '&'
| '\''
| '*'
| '+'
| '-'
| '/'
| '='
| '?'
| '^'
| '_'
| '`'
| '{'
| '|'
| '}'
| '~'
)
}
fn is_qtext(ch: char, allow_obs: bool) -> bool {
if ch == '"' || ch == '\\' {
return false;
}
is_printable_ascii(ch) || is_utf8_non_ascii(ch) || (allow_obs && is_obs_no_ws_ctl(ch))
}
fn is_ctext(ch: char) -> bool {
ch != '(' && ch != ')' && ch != '\\' && (is_printable_ascii(ch) || is_utf8_non_ascii(ch))
}
fn is_quoted_pair_char(ch: char, allow_obs: bool) -> bool {
if is_printable_ascii(ch) || is_wsp(ch) {
return true;
}
allow_obs && (matches!(ch, '\0' | '\n' | '\r') || is_obs_no_ws_ctl(ch))
}
fn is_obs_no_ws_ctl(ch: char) -> bool {
matches!(ch as u32, 0x01..=0x08 | 0x0b | 0x0c | 0x0e..=0x1f | 0x7f)
}
fn is_printable_ascii(ch: char) -> bool {
matches!(ch as u32, 0x21..=0x7e)
}
fn is_utf8_non_ascii(ch: char) -> bool {
(ch as u32) >= 0x80
}
fn is_wsp(ch: char) -> bool {
ch == ' ' || ch == '\t'
}
#[cfg(test)]
mod tests;