use std::collections::HashMap;
use base64::{engine::Engine as _, prelude::BASE64_STANDARD};
use regex::Regex;
use std::sync::LazyLock;
use thiserror::Error;
use super::{PermissionMask, Resource, ResourceType};
#[derive(Clone)]
enum ResourceContent {
Text(String),
Raw(Vec<u8>),
}
impl ResourceContent {
fn text_from_base64(base64: &str) -> Result<Self, AddResourceError> {
let decoded = BASE64_STANDARD.decode(base64)?;
Ok(Self::Text(String::from_utf8(decoded)?))
}
fn raw_from_base64(base64: &str) -> Result<Self, AddResourceError> {
let decoded = BASE64_STANDARD.decode(base64)?;
Ok(Self::Raw(decoded))
}
}
#[derive(Clone)]
pub struct ResourceImpl {
name: String,
kind: ResourceType,
content: ResourceContent,
dependencies: Vec<String>,
permission: PermissionMask,
}
pub struct ResourceStorage {
#[cfg(not(feature = "single-thread"))]
backend: Box<dyn ResourceStorageBackend + Sync + Send>,
#[cfg(feature = "single-thread")]
backend: Box<dyn ResourceStorageBackend>,
}
impl Default for ResourceStorage {
fn default() -> Self {
Self {
backend: Box::new(InMemoryResourceStorage::default()),
}
}
}
impl ResourceStorage {
#[cfg(not(feature = "single-thread"))]
pub fn from_backend<S: ResourceStorageBackend + 'static + Sync + Send>(backend: S) -> Self {
Self {
backend: Box::new(backend),
}
}
#[cfg(feature = "single-thread")]
pub fn from_backend<S: ResourceStorageBackend + 'static>(backend: S) -> Self {
Self {
backend: Box::new(backend),
}
}
#[cfg(test)]
pub fn in_memory_from_resources(resources: impl IntoIterator<Item = Resource>) -> Self {
Self::from_backend(InMemoryResourceStorage::from_resources(resources))
}
}
pub trait ResourceStorageBackend {
fn get_resource(&self, resource_ident: &str) -> Option<ResourceImpl>;
}
#[derive(Default, Clone)]
pub struct InMemoryResourceStorage {
resources: HashMap<String, ResourceImpl>,
aliases: HashMap<String, String>,
}
impl ResourceStorageBackend for InMemoryResourceStorage {
fn get_resource(&self, resource_ident: &str) -> Option<ResourceImpl> {
let resource = if let Some(resource) = self.resources.get(resource_ident) {
Some(resource)
} else if let Some(canonical_name) = self.aliases.get(resource_ident) {
self.resources.get(canonical_name)
} else {
None
};
resource.cloned()
}
}
impl InMemoryResourceStorage {
pub fn from_resources(resources: impl IntoIterator<Item = Resource>) -> Self {
let mut self_ = Self::default();
resources.into_iter().for_each(|resource| {
#[allow(clippy::unnecessary_lazy_evaluations)]
self_.add_resource(resource).unwrap_or_else(|_e| {
#[cfg(test)]
eprintln!("Failed to add resource: {_e:?}")
})
});
self_
}
pub fn add_resource(&mut self, resource: Resource) -> Result<(), AddResourceError> {
let resource_content: ResourceContent;
if let ResourceType::Mime(content_type) = &resource.kind {
if !resource.dependencies.is_empty() && !content_type.supports_dependencies() {
return Err(AddResourceError::ContentTypeDoesNotSupportDependencies);
}
if content_type.is_textual() {
resource_content = ResourceContent::text_from_base64(&resource.content)?;
} else {
resource_content = ResourceContent::raw_from_base64(&resource.content)?;
}
} else {
resource_content = ResourceContent::text_from_base64(&resource.content)?;
}
for ident in std::iter::once(&resource.name).chain(resource.aliases.iter()) {
if self.resources.contains_key(ident) || self.aliases.contains_key(ident) {
return Err(AddResourceError::NameAlreadyAdded);
}
}
resource.aliases.iter().for_each(|alias| {
self.aliases.insert(alias.clone(), resource.name.clone());
});
let resource_impl = ResourceImpl {
name: resource.name.clone(),
kind: resource.kind,
content: resource_content,
dependencies: resource.dependencies,
permission: resource.permission,
};
self.resources.insert(resource.name, resource_impl);
Ok(())
}
pub fn take_resources(&mut self) -> HashMap<String, ResourceImpl> {
std::mem::take(&mut self.resources)
}
}
#[inline(always)]
fn stringify_arg<const QUOTED: bool>(arg: &str) -> String {
const QU: u8 = b'"';
const BS: u8 = b'\\';
const BB: u8 = b'b';
const TT: u8 = b't';
const NN: u8 = b'n';
const FF: u8 = b'f';
const RR: u8 = b'r';
const UU: u8 = b'u';
const __: u8 = 0;
static ESCAPED: [u8; 256] = [
UU, UU, UU, UU, UU, UU, UU, UU, BB, TT, NN, UU, FF, RR, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, UU, __, __, QU, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, BS, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, __, ];
#[inline(never)]
fn write_string_complex(output: &mut Vec<u8>, string: &str, mut start: usize) {
output.extend_from_slice(&string.as_bytes()[..start]);
for (index, ch) in string.bytes().enumerate().skip(start) {
let escape = ESCAPED[ch as usize];
if escape > 0 {
output.extend_from_slice(&string.as_bytes()[start..index]);
output.extend_from_slice(&[b'\\', escape]);
start = index + 1;
}
if escape == b'u' {
output.extend_from_slice(format!("{ch:04x}").as_bytes());
}
}
output.extend_from_slice(&string.as_bytes()[start..]);
}
let mut output = Vec::with_capacity(arg.len() + 2);
if QUOTED {
output.push(b'"');
}
'process: {
for (index, ch) in arg.bytes().enumerate() {
if ESCAPED[ch as usize] > 0 {
write_string_complex(&mut output, arg, index);
break 'process;
}
}
output.extend_from_slice(arg.as_bytes());
}
if QUOTED {
output.push(b'"');
}
String::from_utf8(output).unwrap()
}
fn extract_function_name(fn_def: &str) -> Option<&str> {
static FUNCTION_NAME_RE: LazyLock<Regex> =
LazyLock::new(|| Regex::new(r#"^function\s+([^\(\)\{\}\s]+)\s*\("#).unwrap());
FUNCTION_NAME_RE.captures(fn_def).map(|captures| {
captures.get(1).unwrap().as_str()
})
}
impl ResourceStorage {
pub fn get_scriptlet_resources<'a>(
&self,
script_injections: impl IntoIterator<Item = (&'a str, PermissionMask)>,
) -> String {
let mut deps = vec![];
let mut invokations = String::new();
script_injections.into_iter().for_each(|(s, mask)| {
if let Ok(invokation) = self.get_scriptlet_resource(s, mask, &mut deps) {
invokations += "try {\n";
invokations += &invokation;
invokations += "\n} catch ( e ) { }\n";
}
});
let mut result = String::new();
for dep in deps.iter() {
if let ResourceContent::Text(content) = &dep.content {
result += content;
result += "\n";
}
}
result += &invokations;
result
}
fn recursive_dependencies(
&self,
new_dep: &str,
prev_deps: &mut Vec<ResourceImpl>,
filter_permission: PermissionMask,
) -> Result<(), ScriptletResourceError> {
if prev_deps.iter().any(|dep| dep.name == new_dep) {
return Ok(());
}
let resource = self.get_permissioned_resource(new_dep, filter_permission)?;
let deps = resource.dependencies.clone();
prev_deps.push(resource);
for dep in deps.iter() {
self.recursive_dependencies(dep, prev_deps, filter_permission)?;
}
Ok(())
}
fn get_scriptlet_resource(
&self,
scriptlet_args: &str,
filter_permission: PermissionMask,
required_deps: &mut Vec<ResourceImpl>,
) -> Result<String, ScriptletResourceError> {
let scriptlet_args = parse_scriptlet_args(scriptlet_args).unwrap();
if scriptlet_args.is_empty() {
return Err(ScriptletResourceError::MissingScriptletName);
}
let scriptlet_name = with_js_extension(scriptlet_args[0].as_ref());
let args = &scriptlet_args[1..];
if args.len() == 1 && args[0].starts_with('{') && args[0].ends_with('}') {
return Err(ScriptletResourceError::ScriptletArgObjectSyntaxUnsupported);
}
let resource = self.get_permissioned_resource(&scriptlet_name, filter_permission)?;
if !resource.kind.supports_scriptlet_injection() {
return Err(ScriptletResourceError::ContentTypeNotInjectable);
}
for dep in resource.dependencies.iter() {
self.recursive_dependencies(dep, required_deps, filter_permission)?;
}
let template = match &resource.content {
ResourceContent::Raw(_content) => {
return Err(ScriptletResourceError::ContentTypeNotInjectable);
}
ResourceContent::Text(content) => content.clone(),
};
if let Some(function_name) = extract_function_name(&template) {
if !required_deps.iter().any(|dep| dep.name == resource.name) {
required_deps.push(resource);
}
use itertools::Itertools as _;
Ok(format!(
"{}({})",
function_name,
args.iter().map(|arg| stringify_arg::<true>(arg)).join(", ")
))
} else {
Ok(patch_template_scriptlet(
template,
args.iter().map(|arg| stringify_arg::<false>(arg)),
))
}
}
pub fn get_redirect_resource(&self, resource_ident: &str) -> Option<String> {
let resource = self.backend.get_resource(resource_ident);
resource.and_then(|resource| {
if !resource.permission.is_default() {
return None;
}
if !resource.kind.supports_redirect() {
return None;
}
if let ResourceType::Mime(mime) = &resource.kind {
let bytes = match &resource.content {
ResourceContent::Raw(content) => content,
ResourceContent::Text(content) => content.as_bytes(),
};
let encoded = BASE64_STANDARD.encode(bytes);
Some(format!("data:{mime};base64,{encoded}"))
} else {
None
}
})
}
fn get_permissioned_resource(
&self,
scriptlet_name: &str,
filter_permission: PermissionMask,
) -> Result<ResourceImpl, ScriptletResourceError> {
let resource = self
.backend
.get_resource(scriptlet_name)
.ok_or(ScriptletResourceError::NoMatchingScriptlet)?;
if !resource.permission.is_injectable_by(filter_permission) {
return Err(ScriptletResourceError::InsufficientPermissions);
}
Ok(resource)
}
}
#[derive(Debug, Error, PartialEq)]
pub enum AddResourceError {
#[error("invalid base64 content")]
InvalidBase64Content,
#[error("invalid utf-8 content")]
InvalidUtf8Content,
#[error("resource name already added")]
NameAlreadyAdded,
#[error("resource content type does not support dependencies")]
ContentTypeDoesNotSupportDependencies,
}
impl From<base64::DecodeError> for AddResourceError {
fn from(_: base64::DecodeError) -> Self {
AddResourceError::InvalidBase64Content
}
}
impl From<std::string::FromUtf8Error> for AddResourceError {
fn from(_: std::string::FromUtf8Error) -> Self {
AddResourceError::InvalidUtf8Content
}
}
#[derive(Debug, Error, PartialEq)]
pub enum ScriptletResourceError {
#[error("no scriptlet has the provided name")]
NoMatchingScriptlet,
#[error("no scriptlet name was provided")]
MissingScriptletName,
#[error("object syntax for scriptlet arguments is unsupported")]
ScriptletArgObjectSyntaxUnsupported,
#[error("scriptlet content was corrupted")]
CorruptScriptletContent,
#[error("resource content type cannot be used for a scriptlet injection")]
ContentTypeNotInjectable,
#[error("filter rule is not authorized to inject the intended scriptlet")]
InsufficientPermissions,
}
impl From<base64::DecodeError> for ScriptletResourceError {
fn from(_: base64::DecodeError) -> Self {
Self::CorruptScriptletContent
}
}
impl From<std::string::FromUtf8Error> for ScriptletResourceError {
fn from(_: std::string::FromUtf8Error) -> Self {
Self::CorruptScriptletContent
}
}
static TEMPLATE_ARGUMENT_RE: [LazyLock<Regex>; 9] = [
LazyLock::new(|| template_argument_regex(1)),
LazyLock::new(|| template_argument_regex(2)),
LazyLock::new(|| template_argument_regex(3)),
LazyLock::new(|| template_argument_regex(4)),
LazyLock::new(|| template_argument_regex(5)),
LazyLock::new(|| template_argument_regex(6)),
LazyLock::new(|| template_argument_regex(7)),
LazyLock::new(|| template_argument_regex(8)),
LazyLock::new(|| template_argument_regex(9)),
];
fn template_argument_regex(i: usize) -> Regex {
Regex::new(&format!(r"\{{\{{{i}\}}\}}")).unwrap()
}
fn patch_template_scriptlet(
mut template: String,
args: impl IntoIterator<Item = impl AsRef<str>>,
) -> String {
args.into_iter()
.take(TEMPLATE_ARGUMENT_RE.len())
.enumerate()
.for_each(|(i, arg)| {
template = TEMPLATE_ARGUMENT_RE[i]
.replace(&template, arg.as_ref().replace('$', "$$"))
.to_string();
});
template
}
fn with_js_extension(scriptlet_name: &str) -> String {
if scriptlet_name.ends_with(".js") {
scriptlet_name.to_string()
} else {
format!("{scriptlet_name}.js")
}
}
fn index_next_unescaped_separator(s: &str, separator: char) -> (Option<usize>, bool) {
assert!(separator != '\\');
let mut new_arg_end = 0;
let mut needs_transform = false;
while new_arg_end < s.len() {
let rest = &s[new_arg_end..];
if let Some(i) = rest.find(separator) {
let mut trailing_escapes = 0;
while trailing_escapes < i && rest[..i - trailing_escapes].ends_with('\\') {
trailing_escapes += 1;
}
if trailing_escapes % 2 == 0 {
new_arg_end += i;
break;
} else {
new_arg_end += i + 1;
needs_transform = true;
continue;
}
} else {
return (None, needs_transform);
}
}
let new_arg_end = if new_arg_end >= s.len() {
None
} else {
Some(new_arg_end)
};
(new_arg_end, needs_transform)
}
fn normalize_arg(arg: &str, separator: char) -> String {
assert!(separator != '\\');
let mut output = String::with_capacity(arg.len());
let mut escaped = false;
for i in arg.chars() {
if i == '\\' {
if escaped {
escaped = false;
output += "\\\\";
} else {
escaped = true;
}
continue;
}
if escaped {
if i != separator {
output.push('\\');
}
escaped = false;
}
output.push(i);
}
output
}
pub(crate) fn parse_scriptlet_args(mut args: &str) -> Option<Vec<String>> {
let mut args_vec = vec![];
if args.trim().is_empty() {
return Some(args_vec);
}
loop {
if let Some(i) = args.find(|c: char| !c.is_whitespace()) {
args = &args[i..];
}
let (arg, needs_transform);
match args.chars().next() {
Some(qc) if qc == '"' || qc == '\'' || qc == '`' => {
args = &args[1..];
let i;
(i, needs_transform) = index_next_unescaped_separator(args, qc);
if let Some(i) = i {
arg = &args[..i];
args = &args[i + 1..];
if let Some(i) = args.find(|c: char| !c.is_whitespace()) {
args = &args[i..];
}
if args.starts_with(',') {
args = &args[1..];
} else if !args.is_empty() {
return None;
}
} else {
return None;
}
}
Some(_) => {
let i;
(i, needs_transform) = index_next_unescaped_separator(args, ',');
arg = args[..i.unwrap_or(args.len())].trim_end();
args = &args[i.map(|i| i + 1).unwrap_or(args.len())..];
}
None => {
break;
}
}
let arg = if needs_transform {
normalize_arg(arg, ',')
} else {
arg.to_string()
};
args_vec.push(arg);
}
Some(args_vec)
}
#[cfg(test)]
#[path = "../../tests/unit/resources/resource_storage.rs"]
mod unit_tests;