use crate::text::ascii;
use crate::text::encoding::{encoded_word_plain, Context};
use crate::text::quoted::quoted_pair;
use crate::text::utf8::{is_nonascii_or, space0_str, space1_str, take_utf8_while1};
#[cfg(feature = "tracing-recover")]
use crate::utils::bytes_to_trace_string;
use eml_codec_derives::instrument_input;
use nom::{
branch::alt,
bytes::complete::tag,
combinator::{map, opt, recognize},
multi::many1,
sequence::{pair, tuple},
IResult, Parser,
};
use std::borrow::Cow;
#[cfg(feature = "tracing-recover")]
use tracing::warn;
pub fn obs_crlf(input: &[u8]) -> IResult<&[u8], &str> {
map(
alt((
tag(ascii::CRLF),
map(
alt((tag(&[ascii::LF]), tag(ascii::CRCRLF), tag(&[ascii::CR]))),
|input: &[u8]| {
#[cfg(feature = "tracing-recover")]
warn!(input = %unsafe { str::from_utf8_unchecked(input) },
"best-effort line ending");
input
},
),
)),
|b: &[u8]|
unsafe { str::from_utf8_unchecked(b) },
)(input)
}
pub fn foldable_line(full_line: bool) -> impl Fn(&[u8]) -> IResult<&[u8], &[u8]> {
move |input| {
use memchr::memchr2_iter;
let mut it = memchr2_iter(b'\r', b'\n', input);
while let Some(i) = it.next() {
if i == 0 && full_line {
break; }
match (input[i], input.get(i + 1), input.get(i + 2)) {
(b'\r', Some(b'\n'), Some(b' ' | b'\t')) => {
let _ = it.next();
continue;
}
(b'\r', Some(b'\n'), _) => return Ok((&input[i + 2..], &input[0..i])),
(_b , Some(b' ' | b'\t'), _) => {
#[cfg(feature = "tracing-recover")]
warn!(input = %bytes_to_trace_string(&[_b]), "foldable: best-effort line ending");
continue;
}
_ => return Ok((&input[i + 1..], &input[0..i])),
}
}
Err(nom::Err::Error(nom::error::Error::new(
input,
nom::error::ErrorKind::Fail,
)))
}
}
#[instrument_input("tracing-recover")]
pub fn fws(input: &[u8]) -> IResult<&[u8], Vec<&str>> {
alt((
many1(fold_marker).map(|v| v.into_iter().flatten().collect()),
space1_str.map(|wsp| vec![wsp]),
))(input)
}
#[instrument_input("tracing-recover")]
fn fold_marker(input: &[u8]) -> IResult<&[u8], Vec<&str>> {
let (input, wsp0) = space0_str(input)?;
let (input, _) = obs_crlf(input)?;
let (input, wsp) = space1_str(input)?;
let mut res = vec![];
if !wsp0.is_empty() {
res.push(wsp0)
}
res.push(wsp);
Ok((input, res))
}
#[instrument_input("tracing-recover")]
pub fn cfws(input: &[u8]) -> IResult<&[u8], ()> {
alt((comments, fws.map(|_| ())))(input)
}
#[instrument_input("tracing-recover")]
pub fn comments(input: &[u8]) -> IResult<&[u8], ()> {
let (input, _) = many1(tuple((opt(fws), comment)))(input)?;
let (input, _) = opt(fws)(input)?;
Ok((input, ()))
}
#[instrument_input("tracing-recover")]
pub fn comment(input: &[u8]) -> IResult<&[u8], ()> {
let (input, _) = tag("(")(input)?;
let (input, ()) = comment_body(input)?;
Ok((input, ()))
}
#[instrument_input("tracing-recover")]
pub fn comment_body(input: &[u8]) -> IResult<&[u8], ()> {
let mut nesting = 1;
let mut cursor: &[u8] = input;
loop {
if let Ok((input, _)) = pair(opt(fws), tag(")"))(cursor) {
nesting -= 1;
if nesting == 0 {
return Ok((input, ()));
}
cursor = input;
continue;
}
let (input, _) = opt(fws)(cursor)?;
let (input, enter_subcomment) = alt((
tag("(").map(|_| true),
alt((
recognize(quoted_pair),
recognize(encoded_word_plain(Context::Comment)),
recognize(ctext),
))
.map(|_| false),
))(input)?;
if enter_subcomment {
nesting += 1;
}
cursor = input
}
}
#[instrument_input("tracing-recover")]
pub fn ctext(input: &[u8]) -> IResult<&[u8], Cow<'_, str>> {
take_utf8_while1(is_ctext)(input)
}
pub fn is_ctext(c: char) -> bool {
is_nonascii_or(|c| is_restr_ctext(c) || is_obs_no_ws_ctl(c))(c)
}
pub fn is_restr_ctext(c: u8) -> bool {
(ascii::EXCLAMATION..=ascii::SQUOTE).contains(&c)
|| (ascii::ASTERISK..=ascii::LEFT_BRACKET).contains(&c)
|| (ascii::RIGHT_BRACKET..=ascii::TILDE).contains(&c)
}
pub fn is_obs_no_ws_ctl(c: u8) -> bool {
(ascii::SOH..=ascii::BS).contains(&c)
|| c == ascii::VT
|| c == ascii::FF
|| (ascii::SO..=ascii::US).contains(&c)
|| c == ascii::DEL
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_obs_crlf() {
assert_eq!(obs_crlf(b"\rworld"), Ok((&b"world"[..], "\r")));
assert_eq!(obs_crlf(b"\r\nworld"), Ok((&b"world"[..], "\r\n")));
assert_eq!(obs_crlf(b"\nworld"), Ok((&b"world"[..], "\n")));
}
#[test]
fn test_fws() {
assert_eq!(fws(b"\r\n world"), Ok((&b"world"[..], vec![" "])));
assert_eq!(
fws(b" \r\n \r\n world"),
Ok((&b"world"[..], vec![" ", " ", " "]))
);
assert_eq!(fws(b" world"), Ok((&b"world"[..], vec![" "])));
assert_eq!(
fws(b" \t \r\n world"),
Ok((&b"world"[..], vec![" \t ", " "]))
);
assert!(fws(b"\r\nFrom: test").is_err());
}
#[test]
fn test_cfws() {
assert_eq!(
cfws(b"(A nice \\) chap) <pete(his account)@silly.test(his host)>"),
Ok((&b"<pete(his account)@silly.test(his host)>"[..], ()))
);
assert_eq!(
cfws(b"(Chris's host.)public.example>,"),
Ok((&b"public.example>,"[..], ()))
);
assert_eq!(
cfws(b"(double (comment) is fun) wouch"),
Ok((&b"wouch"[..], ()))
);
assert_eq!(
cfws(b"(using (256/256 bits) (2048 bits))"),
Ok((&b""[..], ()))
);
}
#[test]
fn test_cfws_encoded_word() {
assert_eq!(cfws(b"(=?US-ASCII?Q?Keith_Moore?=)"), Ok((&b""[..], ())),);
}
#[test]
fn test_foldable_line() {
assert_eq!(
foldable_line(true)(b"abc\r\n def\r\n ghi\r\n"),
Ok((&b""[..], &b"abc\r\n def\r\n ghi"[..])),
);
assert_eq!(
foldable_line(false)(b"\r\n abc\r\n"),
Ok((&b""[..], &b"\r\n abc"[..])),
);
assert!(foldable_line(true)(b"\r\n abc\r\n").is_err());
assert!(foldable_line(true)(b"\n foo\r\n").is_err());
assert_eq!(
foldable_line(true)(b"xx\r\n \r\n abc\r\n \r\n def\r\n"),
Ok((&b""[..], &b"xx\r\n \r\n abc\r\n \r\n def"[..])),
);
assert_eq!(foldable_line(false)(b"\r\n"), Ok((&b""[..], &b""[..])),);
assert_eq!(foldable_line(false)(b"\n"), Ok((&b""[..], &b""[..])),);
assert!(foldable_line(true)(b"\r\n").is_err());
assert!(foldable_line(true)(b"\n").is_err());
}
}