macro_rules! require {
($condition: expr) => {
if !$condition {
return None;
}
};
}
pub mod forgiving_base64;
pub mod mime;
pub struct DataUrl<'a> {
mime_type: mime::Mime,
base64: bool,
encoded_body_plus_fragment: &'a str,
}
#[derive(Debug)]
pub enum DataUrlError {
NotADataUrl,
NoComma,
}
impl<'a> DataUrl<'a> {
pub fn process(input: &'a str) -> Result<Self, DataUrlError> {
use crate::DataUrlError::*;
let after_colon = pretend_parse_data_url(input).ok_or(NotADataUrl)?;
let (from_colon_to_comma, encoded_body_plus_fragment) =
find_comma_before_fragment(after_colon).ok_or(NoComma)?;
let (mime_type, base64) = parse_header(from_colon_to_comma);
Ok(DataUrl {
mime_type,
base64,
encoded_body_plus_fragment,
})
}
pub fn mime_type(&self) -> &mime::Mime {
&self.mime_type
}
pub fn decode<F, E>(
&self,
write_body_bytes: F,
) -> Result<Option<FragmentIdentifier<'a>>, forgiving_base64::DecodeError<E>>
where
F: FnMut(&[u8]) -> Result<(), E>,
{
if self.base64 {
decode_with_base64(self.encoded_body_plus_fragment, write_body_bytes)
} else {
decode_without_base64(self.encoded_body_plus_fragment, write_body_bytes)
.map_err(forgiving_base64::DecodeError::WriteError)
}
}
pub fn decode_to_vec(
&self,
) -> Result<(Vec<u8>, Option<FragmentIdentifier<'a>>), forgiving_base64::InvalidBase64> {
let mut body = Vec::new();
let fragment = self.decode(|bytes| {
body.extend_from_slice(bytes);
Ok(())
})?;
Ok((body, fragment))
}
}
pub struct FragmentIdentifier<'a>(&'a str);
impl<'a> FragmentIdentifier<'a> {
pub fn to_percent_encoded(&self) -> String {
let mut string = String::new();
for byte in self.0.bytes() {
match byte {
b'\t' | b'\n' | b'\r' => continue,
b'\0'..=b' ' | b'"' | b'<' | b'>' | b'`' | b'\x7F'..=b'\xFF' => {
percent_encode(byte, &mut string)
}
_ => string.push(byte as char),
}
}
string
}
}
fn pretend_parse_data_url(input: &str) -> Option<&str> {
let left_trimmed = input.trim_start_matches(|ch| ch <= ' ');
let mut bytes = left_trimmed.bytes();
{
let mut iter = bytes
.by_ref()
.filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r'));
require!(iter.next()?.to_ascii_lowercase() == b'd');
require!(iter.next()?.to_ascii_lowercase() == b'a');
require!(iter.next()?.to_ascii_lowercase() == b't');
require!(iter.next()?.to_ascii_lowercase() == b'a');
require!(iter.next()? == b':');
}
let bytes_consumed = left_trimmed.len() - bytes.len();
let after_colon = &left_trimmed[bytes_consumed..];
Some(after_colon.trim_end_matches(|ch| ch <= ' '))
}
fn find_comma_before_fragment(after_colon: &str) -> Option<(&str, &str)> {
for (i, byte) in after_colon.bytes().enumerate() {
if byte == b',' {
return Some((&after_colon[..i], &after_colon[i + 1..]));
}
if byte == b'#' {
break;
}
}
None
}
fn parse_header(from_colon_to_comma: &str) -> (mime::Mime, bool) {
let trimmed = from_colon_to_comma.trim_matches(|c| matches!(c, ' ' | '\t' | '\n' | '\r'));
let without_base64_suffix = remove_base64_suffix(trimmed);
let base64 = without_base64_suffix.is_some();
let mime_type = without_base64_suffix.unwrap_or(trimmed);
let mut string = String::new();
if mime_type.starts_with(';') {
string.push_str("text/plain")
}
let mut in_query = false;
for byte in mime_type.bytes() {
match byte {
b'\t' | b'\n' | b'\r' => continue,
b'\0'..=b'\x1F' | b'\x7F'..=b'\xFF' => percent_encode(byte, &mut string),
b' ' | b'"' | b'<' | b'>' if in_query => percent_encode(byte, &mut string),
b'?' => {
in_query = true;
string.push('?')
}
_ => string.push(byte as char),
}
}
let mime_type = string.parse().unwrap_or_else(|_| mime::Mime {
type_: String::from("text"),
subtype: String::from("plain"),
parameters: vec![(String::from("charset"), String::from("US-ASCII"))],
});
(mime_type, base64)
}
#[allow(clippy::skip_while_next)]
fn remove_base64_suffix(s: &str) -> Option<&str> {
let mut bytes = s.bytes();
{
let iter = bytes
.by_ref()
.filter(|&byte| !matches!(byte, b'\t' | b'\n' | b'\r'));
let mut iter = iter.rev();
require!(iter.next()? == b'4');
require!(iter.next()? == b'6');
require!(iter.next()?.to_ascii_lowercase() == b'e');
require!(iter.next()?.to_ascii_lowercase() == b's');
require!(iter.next()?.to_ascii_lowercase() == b'a');
require!(iter.next()?.to_ascii_lowercase() == b'b');
require!(iter.skip_while(|&byte| byte == b' ').next()? == b';');
}
Some(&s[..bytes.len()])
}
fn percent_encode(byte: u8, string: &mut String) {
const HEX_UPPER: [u8; 16] = *b"0123456789ABCDEF";
string.push('%');
string.push(HEX_UPPER[(byte >> 4) as usize] as char);
string.push(HEX_UPPER[(byte & 0x0f) as usize] as char);
}
fn decode_without_base64<F, E>(
encoded_body_plus_fragment: &str,
mut write_bytes: F,
) -> Result<Option<FragmentIdentifier<'_>>, E>
where
F: FnMut(&[u8]) -> Result<(), E>,
{
let bytes = encoded_body_plus_fragment.as_bytes();
let mut slice_start = 0;
for (i, &byte) in bytes.iter().enumerate() {
if matches!(byte, b'%' | b'#' | b'\t' | b'\n' | b'\r') {
if i > slice_start {
write_bytes(&bytes[slice_start..i])?;
}
match byte {
b'%' => {
let l = bytes.get(i + 2).and_then(|&b| (b as char).to_digit(16));
let h = bytes.get(i + 1).and_then(|&b| (b as char).to_digit(16));
if let (Some(h), Some(l)) = (h, l) {
let one_byte = h as u8 * 0x10 + l as u8;
write_bytes(&[one_byte])?;
slice_start = i + 3;
} else {
}
}
b'#' => {
let fragment_start = i + 1;
let fragment = &encoded_body_plus_fragment[fragment_start..];
return Ok(Some(FragmentIdentifier(fragment)));
}
_ => slice_start = i + 1,
}
}
}
write_bytes(&bytes[slice_start..])?;
Ok(None)
}
fn decode_with_base64<F, E>(
encoded_body_plus_fragment: &str,
write_bytes: F,
) -> Result<Option<FragmentIdentifier<'_>>, forgiving_base64::DecodeError<E>>
where
F: FnMut(&[u8]) -> Result<(), E>,
{
let mut decoder = forgiving_base64::Decoder::new(write_bytes);
let fragment = decode_without_base64(encoded_body_plus_fragment, |bytes| decoder.feed(bytes))?;
decoder.finish()?;
Ok(fragment)
}