#[cfg(feature = "arbitrary")]
use arbitrary::Arbitrary;
use bounded_static::ToStatic;
use nom::{
branch::alt,
bytes::complete::{tag, take, take_while1},
combinator::{map, opt, verify},
multi::many0,
sequence::{delimited, pair, preceded},
IResult,
};
use std::borrow::Cow;
use std::fmt;
#[cfg(feature = "arbitrary")]
use std::ops::ControlFlow;
#[cfg(feature = "tracing")]
use tracing::warn;
use crate::i18n::ContainsUtf8;
use crate::print::{Formatter, Print, ToStringFromPrint};
use crate::text::ascii;
use crate::text::utf8::{is_nonascii_or, take_utf8_while1};
use crate::text::whitespace::{cfws, fws, is_obs_no_ws_ctl};
use crate::text::words::is_vchar;
#[cfg(feature = "tracing-recover")]
use crate::utils::bytes_to_trace_string;
#[cfg(feature = "arbitrary")]
use crate::{arbitrary_utils::arbitrary_string_where, fuzz_eq::FuzzEq};
use eml_codec_derives::instrument_input;
#[derive(Clone, ContainsUtf8, PartialEq, Default, ToStatic, ToStringFromPrint)]
pub struct QuotedString<'a>(pub Vec<Cow<'a, str>>);
impl<'a> fmt::Debug for QuotedString<'a> {
fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt.debug_tuple("QuotedString")
.field(&self.0.iter().collect::<Vec<_>>())
.finish()
}
}
impl<'a> QuotedString<'a> {
pub fn push_str(&mut self, e: &'a str) {
self.0.push(Cow::Borrowed(e))
}
pub fn push(&mut self, e: Cow<'a, str>) {
self.0.push(e)
}
pub fn chars<'b>(&'b self) -> QuotedStringChars<'a, 'b> {
QuotedStringChars {
q: self,
inner: QuotedStringCharsInner::NextFragment(0),
}
}
}
impl<'a> Print for QuotedString<'a> {
fn print(&self, fmt: &mut impl Formatter) {
print_quoted(fmt, self.chars())
}
}
#[cfg(feature = "arbitrary")]
impl<'a> Arbitrary<'a> for QuotedString<'a> {
fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
let mut chunks = Vec::new();
u.arbitrary_loop(None, Some(10), |u| {
let bytes = arbitrary_string_where(u, |c| is_vchar(c) || ascii::WS_CHAR.contains(&c))?;
chunks.push(Cow::Owned(bytes));
Ok(ControlFlow::Continue(()))
})?;
Ok(QuotedString(chunks))
}
}
#[cfg(feature = "arbitrary")]
impl<'a> FuzzEq for QuotedString<'a> {
fn fuzz_eq(&self, other: &Self) -> bool {
self.chars().collect::<String>() == other.chars().collect::<String>()
}
}
#[derive(Clone)]
pub struct QuotedStringChars<'a, 'b> {
q: &'b QuotedString<'a>,
inner: QuotedStringCharsInner<'b>,
}
#[derive(Clone)]
enum QuotedStringCharsInner<'a> {
NextFragment(usize),
FragmentChars(usize, std::str::Chars<'a>),
}
impl<'a, 'b> Iterator for QuotedStringChars<'a, 'b> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
match &mut self.inner {
QuotedStringCharsInner::NextFragment(idx) => match self.q.0.get(*idx) {
Some(frag) => {
self.inner = QuotedStringCharsInner::FragmentChars(*idx, frag.chars());
self.next()
}
None => None,
},
QuotedStringCharsInner::FragmentChars(idx, it) => match it.next() {
Some(c) => Some(c),
None => {
self.inner = QuotedStringCharsInner::NextFragment(*idx + 1);
self.next()
}
},
}
}
}
pub fn quoted_pair(input: &[u8]) -> IResult<&[u8], Option<&str>> {
preceded(
tag(&[ascii::BACKSLASH]),
map(
verify(take(1usize), |b: &[u8]| b[0].is_ascii()),
|s: &[u8]| {
let b = s[0];
if is_strict_quoted_pair(b.into()) {
Some(unsafe { str::from_utf8_unchecked(s) })
} else {
if !(b == ascii::NULL
|| is_obs_no_ws_ctl(b)
|| b == ascii::LF
|| b == ascii::CR)
{
#[cfg(feature = "tracing-recover")]
warn!(byte = %bytes_to_trace_string(&[b]),
"invalid quoted pair")
}
None
}
},
),
)(input)
}
fn is_strict_quoted_pair(c: char) -> bool {
is_vchar(c) || ascii::WS_CHAR.contains(&c)
}
fn is_strict_qtext(c: char) -> bool {
is_nonascii_or(|c| {
c == ascii::EXCLAMATION
|| (ascii::NUM..=ascii::LEFT_BRACKET).contains(&c)
|| (ascii::RIGHT_BRACKET..=ascii::TILDE).contains(&c)
})(c)
}
fn is_obs_qtext(c: u8) -> bool {
is_obs_no_ws_ctl(c)
}
#[instrument_input("tracing")]
fn qcontent(input: &[u8]) -> IResult<&[u8], Option<Cow<'_, str>>> {
alt((
map(take_utf8_while1(is_strict_qtext), Some),
map(take_while1(is_obs_qtext), |_| None),
map(quoted_pair, |qp| qp.map(Cow::Borrowed)),
))(input)
}
#[instrument_input("tracing")]
pub fn quoted_string(input: &[u8]) -> IResult<&[u8], QuotedString<'_>> {
delimited(opt(cfws), quoted_string_plain, opt(cfws))(input)
}
pub fn quoted_string_plain(input: &[u8]) -> IResult<&[u8], QuotedString<'_>> {
let (input, _) = tag("\"")(input)?;
let (input, content) = many0(pair(opt(fws), qcontent))(input)?;
let (input, maybe_wsp) = opt(fws)(input)?;
let (input, _) = tag("\"")(input)?;
let mut qstring =
content
.into_iter()
.fold(QuotedString::default(), |mut acc, (maybe_wsp, c)| {
for wsp in maybe_wsp.into_iter().flat_map(|v| v.into_iter()) {
acc.push_str(wsp);
}
if let Some(c) = c {
acc.push(c);
}
acc
});
for wsp in maybe_wsp.into_iter().flat_map(|v| v.into_iter()) {
qstring.push_str(wsp);
}
Ok((input, qstring))
}
pub fn print_quoted<I>(fmt: &mut impl Formatter, data: I)
where
I: IntoIterator<Item = char>,
{
let mut buf = [0u8; 4];
fmt.write_bytes(b"\"");
for c in data.into_iter() {
let b = c.encode_utf8(&mut buf).as_bytes();
if is_strict_qtext(c) {
fmt.write_bytes(b);
} else if ascii::WS_CHAR.contains(&c) {
fmt.write_fws_bytes(b);
} else if is_vchar(c) {
fmt.write_bytes(b"\\");
fmt.write_bytes(b);
} else {
}
}
fmt.write_bytes(b"\"")
}
#[cfg(test)]
mod tests {
use super::*;
use crate::print::tests::print_to_vec_with;
#[test]
fn test_quoted_string_parser() {
assert_eq!(
quoted_string(b" \"hello\\\"world\" ").unwrap().1,
QuotedString(vec!["hello".into(), "\"".into(), "world".into(),])
);
assert_eq!(
quoted_string(b"\"hello\r\n world\""),
Ok((
&b""[..],
QuotedString(vec!["hello".into(), " ".into(), "world".into(),])
)),
);
assert_eq!(
quoted_string(b"\"\t\""),
Ok((&b""[..], QuotedString(vec!["\t".into(),]))),
);
}
#[test]
fn test_quoted_string_printer() {
let out = print_to_vec_with(|f| {
print_quoted(
f,
QuotedString(vec!["hello".into(), "\"".into(), " world".into()]).chars(),
);
});
assert_eq!(out, b"\"hello\\\" world\"");
}
#[test]
fn test_quoted_string_object() {
assert_eq!(
QuotedString(vec!["hello".into(), " ".into(), "world".into(),]).to_string(),
"\"hello world\"".to_string(),
);
}
}