use charset::Charset;
use nom::{
IResult, Parser,
branch::alt,
bytes::complete::{tag, take_until, take_until1},
combinator::{map, map_opt, opt, success},
error::{ContextError, ParseError},
multi::fold_many1,
sequence::{delimited, pair, separated_pair},
};
use crate::compiler::prelude::*;
use crate::value::Value;
#[derive(Clone, Copy, Debug)]
pub struct DecodeMimeQ;
impl Function for DecodeMimeQ {
fn identifier(&self) -> &'static str {
"decode_mime_q"
}
fn usage(&self) -> &'static str {
"Replaces q-encoded or base64-encoded [encoded-word](https://datatracker.ietf.org/doc/html/rfc2047#section-2) substrings in the `value` with their original string."
}
fn category(&self) -> &'static str {
Category::Codec.as_ref()
}
fn internal_failure_reasons(&self) -> &'static [&'static str] {
&[
"`value` has invalid encoded [encoded-word](https://datatracker.ietf.org/doc/html/rfc2047#section-2) string.",
]
}
fn return_kind(&self) -> u16 {
kind::BYTES
}
fn parameters(&self) -> &'static [Parameter] {
const PARAMETERS: &[Parameter] = &[Parameter::required(
"value",
kind::BYTES,
"The string with [encoded-words](https://datatracker.ietf.org/doc/html/rfc2047#section-2) to decode.",
)];
PARAMETERS
}
fn compile(
&self,
_state: &state::TypeState,
_ctx: &mut FunctionCompileContext,
arguments: ArgumentList,
) -> Compiled {
let value = arguments.required("value");
Ok(DecodeMimeQFn { value }.as_expr())
}
fn examples(&self) -> &'static [Example] {
&[
example! {
title: "Decode single encoded-word",
source: r#"decode_mime_q!("=?utf-8?b?SGVsbG8sIFdvcmxkIQ==?=")"#,
result: Ok("Hello, World!"),
},
example! {
title: "Embedded",
source: r#"decode_mime_q!("From: =?utf-8?b?SGVsbG8sIFdvcmxkIQ==?= <=?utf-8?q?hello=5Fworld=40example=2ecom?=>")"#,
result: Ok("From: Hello, World! <hello_world@example.com>"),
},
example! {
title: "Without charset",
source: r#"decode_mime_q!("?b?SGVsbG8sIFdvcmxkIQ==")"#,
result: Ok("Hello, World!"),
},
]
}
}
#[derive(Clone, Debug)]
struct DecodeMimeQFn {
value: Box<dyn Expression>,
}
impl FunctionExpression for DecodeMimeQFn {
fn resolve(&self, ctx: &mut Context) -> Resolved {
let value = self.value.resolve(ctx)?;
decode_mime_q(&value)
}
fn type_def(&self, _: &state::TypeState) -> TypeDef {
TypeDef::bytes().fallible()
}
}
fn decode_mime_q(bytes: &Value) -> Resolved {
let input = bytes.try_bytes_utf8_lossy()?;
let input: &str = &input;
let (remaining, decoded) = alt((
fold_many1(
parse_delimited_q,
|| ExpressionResult::<String>::Ok(String::new()),
|result, (head, word)| {
let mut result = result?;
result.push_str(head);
result.push_str(&word.decode_word()?);
Ok(result)
},
),
alt((
map_opt(parse_internal_q, |word| word.decode_word().map(Ok).ok()),
success(Ok(String::new())),
)),
))
.parse(input)
.map_err(|e| match e {
nom::Err::Error(e) | nom::Err::Failure(e) => {
nom_language::error::convert_error(input, e)
}
nom::Err::Incomplete(_) => e.to_string(),
})?;
let mut decoded = decoded?;
decoded.push_str(remaining);
Ok(decoded.into())
}
fn parse_delimited_q<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, (&'a str, EncodedWord<'a>), E> {
pair(
take_until("=?"),
delimited(tag("=?"), parse_internal_q, tag("?=")),
)
.parse(input)
}
fn parse_internal_q<'a, E: ParseError<&'a str> + ContextError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, EncodedWord<'a>, E> {
map(
separated_pair(
opt(take_until1("?")),
tag("?"),
separated_pair(
take_until("?"),
tag("?"),
alt((take_until("?="), |input| Ok(("", input)))),
),
),
|(charset, (encoding, input))| EncodedWord {
charset,
encoding,
input,
},
)
.parse(input)
}
struct EncodedWord<'a> {
charset: Option<&'a str>,
encoding: &'a str,
input: &'a str,
}
impl EncodedWord<'_> {
fn decode_word(&self) -> Result<String, ExpressionError> {
let decoded = match self.encoding {
"B" | "b" => base64_simd::STANDARD
.decode_to_vec(self.input.as_bytes())
.map_err(|_| "Unable to decode base64 value")?,
"Q" | "q" => {
let to_decode = self.input.replace('_', " ");
let trimmed = to_decode.trim_end();
let mut result =
quoted_printable::decode(trimmed, quoted_printable::ParseMode::Robust);
if let Ok(ref mut d) = result
&& to_decode.len() != trimmed.len()
{
d.extend_from_slice(&to_decode.as_bytes()[trimmed.len()..]);
}
result.map_err(|_| "Unable to decode quoted_printable value")?
}
_ => return Err(format!("Invalid encoding: {:?}", self.encoding).into()),
};
let charset = self.charset.unwrap_or("utf-8");
let charset = Charset::for_label_no_replacement(charset.as_bytes())
.ok_or_else(|| format!("Unable to decode {charset:?} value"))?;
let (cow, _) = charset.decode_without_bom_handling(&decoded);
Ok(cow.into_owned())
}
}
#[cfg(test)]
mod test {
use nom_language::error::VerboseError;
use crate::value;
use super::*;
#[test]
fn internal() {
let (remaining, word) =
parse_internal_q::<VerboseError<&str>>("utf-8?Q?hello=5Fworld=40example=2ecom")
.unwrap();
assert_eq!(remaining, "");
assert_eq!(word.charset, Some("utf-8"));
assert_eq!(word.encoding, "Q");
assert_eq!(word.input, "hello=5Fworld=40example=2ecom");
}
#[test]
fn internal_no_charset() {
let (remaining, word) =
parse_internal_q::<VerboseError<&str>>("?Q?hello=5Fworld=40example=2ecom").unwrap();
assert_eq!(remaining, "");
assert_eq!(word.charset, None);
assert_eq!(word.encoding, "Q");
assert_eq!(word.input, "hello=5Fworld=40example=2ecom");
}
test_function![
decode_mime_q=> DecodeMimeQ;
non_utf8_charset {
args: func_args![value: value!("Subject: =?iso-8859-1?Q?=A1Hola,_se=F1or!?=")],
want: Ok(value!("Subject: ¡Hola, señor!")),
tdef: TypeDef::bytes().fallible(),
}
missing_encoding{
args: func_args![value: value!("Subject: =?iso-8859-1??=A1Hola,_se=F1or!?=")],
want: Err("Invalid encoding: \"\""),
tdef: TypeDef::bytes().fallible(),
}
unknown_charset{
args: func_args![value: value!("Subject: =?iso-9001?Q?hello=5Fworld=40example=2ecom?=")],
want: Err("Unable to decode \"iso-9001\" value"),
tdef: TypeDef::bytes().fallible(),
}
no_start{
args: func_args![value: value!("Hello world.")],
want: Ok(value!("Hello world.")),
tdef: TypeDef::bytes().fallible(),
}
not_encoded{
args: func_args![value: value!("Is =? equal to ?= or not?")],
want: Ok(value!("Is =? equal to ?= or not?")),
tdef: TypeDef::bytes().fallible(),
}
partial{
args: func_args![value: value!("Is =? equal or not?")],
want: Ok(value!("Is =? equal or not?")),
tdef: TypeDef::bytes().fallible(),
}
empty{
args: func_args![value: value!("")],
want: Ok(value!("")),
tdef: TypeDef::bytes().fallible(),
}
];
}