use std::{
any::Any,
borrow::Cow,
collections::BTreeMap,
fmt::Debug,
fs::File,
io::{BufRead, BufReader, Read, Seek},
path::Path,
str::FromStr,
sync::Arc,
};
use bytes::{Buf, Bytes};
use sha2::Digest;
use shared_buffer::OwnedBuffer;
use crate::{compat::Volume, PathSegmentError, Version};
#[derive(Debug, Clone)]
pub struct Container {
imp: Arc<dyn AbstractWebc + Send + Sync>,
}
#[allow(clippy::result_large_err)]
impl Container {
pub fn from_disk(path: impl AsRef<Path>) -> Result<Self, ContainerError> {
let path = path.as_ref();
if path.is_dir() {
return parse_dir(path);
}
let mut f = File::open(path).map_err(|error| ContainerError::Open {
error,
path: path.to_path_buf(),
})?;
if is_tarball(&mut f) {
return parse_tarball(BufReader::new(f));
}
match crate::detect(&mut f) {
Ok(Version::V1) => parse_v1_mmap(f),
Ok(Version::V2) => parse_v2_mmap(f),
Ok(Version::V3) => parse_v3_mmap(f),
Ok(other) => {
let mut buffer = Vec::new();
f.rewind()
.and_then(|_| f.read_to_end(&mut buffer))
.map_err(|error| ContainerError::Read {
path: path.to_path_buf(),
error,
})?;
Container::from_bytes_and_version(buffer.into(), other)
}
Err(e) => Err(ContainerError::Detect(e)),
}
}
pub fn from_bytes(bytes: impl Into<Bytes>) -> Result<Self, ContainerError> {
let bytes: Bytes = bytes.into();
if is_tarball(std::io::Cursor::new(&bytes)) {
return parse_tarball(bytes.reader());
}
let version = crate::detect(bytes.as_ref())?;
Container::from_bytes_and_version(bytes, version)
}
#[doc(hidden)]
pub fn new(repr: impl AbstractWebc + Send + Sync + 'static) -> Self {
Container {
imp: Arc::new(repr),
}
}
fn from_bytes_and_version(bytes: Bytes, version: Version) -> Result<Self, ContainerError> {
match version {
Version::V1 => parse_v1_owned(bytes),
Version::V2 => parse_v2_owned(bytes),
Version::V3 => parse_v3_owned(bytes),
other => Err(ContainerError::UnsupportedVersion(other)),
}
}
pub fn version(&self) -> Version {
self.imp.version()
}
pub fn manifest(&self) -> &crate::metadata::Manifest {
self.imp.manifest()
}
pub fn webc_hash(&self) -> Option<[u8; 32]> {
self.imp.get_webc_hash()
}
pub fn atoms(&self) -> BTreeMap<String, OwnedBuffer> {
let mut atoms = BTreeMap::new();
for name in self.imp.atom_names() {
if let Some(atom) = self.imp.get_atom(&name) {
atoms.insert(name.into_owned(), atom);
}
}
atoms
}
pub fn get_atom(&self, name: &str) -> Option<OwnedBuffer> {
self.imp.get_atom(name)
}
pub fn volumes(&self) -> BTreeMap<String, Volume> {
let mut volumes = BTreeMap::new();
for name in self.imp.volume_names() {
if let Some(atom) = self.imp.get_volume(&name) {
volumes.insert(name.into_owned(), atom);
}
}
volumes
}
pub fn get_volume(&self, name: &str) -> Option<Volume> {
self.imp.get_volume(name)
}
pub fn downcast_ref<T>(&self) -> Option<&T>
where
T: 'static,
{
self.as_any().downcast_ref()
}
pub fn downcast<T>(self) -> Result<Arc<T>, Self>
where
T: 'static,
{
if self.as_any().is::<T>() {
unsafe { Ok(Arc::from_raw(Arc::into_raw(self.imp).cast())) }
} else {
Err(self)
}
}
pub fn unpack(&self, out_dir: &std::path::Path, overwrite: bool) -> Result<(), ContainerError> {
match out_dir.metadata() {
Ok(m) => {
if !m.is_dir() {
return Err(ContainerError::Open {
path: out_dir.to_path_buf(),
error: std::io::Error::new(
std::io::ErrorKind::AlreadyExists,
"output path is not a directory",
),
});
}
let mut items = std::fs::read_dir(out_dir).map_err(|err| ContainerError::Open {
path: out_dir.to_path_buf(),
error: err,
})?;
if items.next().is_some() && !overwrite {
return Err(ContainerError::Open {
path: out_dir.to_path_buf(),
error: std::io::Error::new(
std::io::ErrorKind::AlreadyExists,
"output directory is not empty",
),
})?;
}
}
Err(err) if err.kind() == std::io::ErrorKind::NotFound => {
std::fs::create_dir_all(out_dir).map_err(|err| ContainerError::Open {
path: out_dir.to_path_buf(),
error: err,
})?;
}
Err(err) => {
return Err(ContainerError::Open {
path: out_dir.to_path_buf(),
error: err,
});
}
};
let manifest_path = out_dir.join("manifest.json");
let manifest_data =
serde_json::to_vec(self.manifest()).expect("could not serialize manifest to JSON");
std::fs::write(&manifest_path, manifest_data).map_err(|err| ContainerError::Open {
path: manifest_path,
error: err,
})?;
for (root, volume) in self.volumes() {
let root = root.strip_prefix('/').unwrap_or(root.as_str());
let volume_dir = out_dir.join(root);
volume.unpack("/", &volume_dir)?;
}
for (name, contents) in self.atoms() {
std::fs::write(out_dir.join(name), contents)?;
}
Ok(())
}
pub fn validate(&self) -> Result<(), anyhow::Error> {
if self.version() == Version::V1 {
anyhow::bail!("v1 validation is unsupported");
}
let manifest = self.manifest();
for (name, bytes) in self.atoms().iter() {
let signature = manifest.atom_signature(name)?;
let expected = sha2::Sha256::digest(bytes);
if signature.as_bytes() != expected.as_slice() {
anyhow::bail!(format!(
"signature of atom: {name} does not match what is expected"
))
}
}
if let Some(fs) = manifest.filesystem()? {
for crate::metadata::annotations::FileSystemMapping {
volume_name,
host_path,
..
} in fs.iter()
{
let volume = self
.get_volume(volume_name)
.ok_or_else(|| anyhow::Error::msg(format!("could not find: {volume_name}")))?;
if self.version() == Version::V2 {
let host_path = host_path.clone().ok_or_else(|| {
anyhow::Error::msg("host_path is not present in fs mapping")
})?;
let host_path_segments = crate::PathSegments::from_str(&host_path)?;
volume.read_dir(host_path_segments).ok_or_else(|| {
anyhow::Error::msg(format!("could not read directory: {host_path}"))
})?;
}
}
}
for (_, volume) in self.volumes().iter() {
traverse_volume(volume, crate::PathSegments::ROOT, self.version())?;
}
Ok(())
}
}
fn traverse_volume(
volume: &crate::Volume,
path: crate::PathSegments,
version: crate::Version,
) -> Result<(), anyhow::Error> {
let entries = volume
.read_dir(&path)
.ok_or_else(|| anyhow::Error::msg(format!("failed to read path: {path}")))?;
for (name, read_dir_hash, metadata) in entries {
let entry_path = path.join(name);
match metadata {
crate::Metadata::Dir { .. } => traverse_volume(volume, entry_path, version)?,
crate::Metadata::File { length, .. } => {
let (content, read_file_hash) =
volume.read_file(entry_path.clone()).ok_or_else(|| {
anyhow::Error::msg(format!("failed to read file: {entry_path}"))
})?;
if content.len() != length {
anyhow::bail!("File: {entry_path} length does not match with the actual content: {} != {}", length, content.len());
}
if version == crate::Version::V3 {
let expected: [u8; 32] = sha2::Sha256::digest(&content).into();
let read_dir_hash = read_dir_hash.ok_or_else(|| {
anyhow::Error::msg(format!(
"hash of {entry_path} is not present in V3 when calling read_dir"
))
})?;
let read_file_hash = read_file_hash.ok_or_else(|| {
anyhow::Error::msg(format!(
"hash of {entry_path} is not present in V3 when calling read_file"
))
})?;
if expected != read_dir_hash {
anyhow::bail!("hash of {entry_path} does not match the expected value when calling read_dir");
}
if expected != read_file_hash {
anyhow::bail!("hash of {entry_path} does not match the expected value when calling read_file");
}
}
}
}
}
Ok(())
}
#[doc(hidden)]
pub trait AbstractWebc: AsAny + Debug {
fn version(&self) -> Version;
fn manifest(&self) -> &crate::metadata::Manifest;
fn atom_names(&self) -> Vec<Cow<'_, str>>;
fn get_atom(&self, name: &str) -> Option<OwnedBuffer>;
fn get_webc_hash(&self) -> Option<[u8; 32]>;
fn get_atoms_hash(&self) -> Option<[u8; 32]>;
fn volume_names(&self) -> Vec<Cow<'_, str>>;
fn get_volume(&self, name: &str) -> Option<Volume>;
}
macro_rules! guarded_fn {
(
$(
#[cfg(feature = $feature:literal)]
$(#[$meta:meta])*
fn $name:ident($($arg:ident : $arg_ty:ty)*) $(-> $ret:ty)? $body:block
)*
) => {
$(
$(#[$meta])*
fn $name($($arg : $arg_ty)*) $(-> $ret)* {
cfg_if::cfg_if! {
if #[cfg(feature = $feature)] {
$body
} else {
$(
let _ = $arg;
)*
Err(ContainerError::FeatureNotEnabled {
feature: $feature,
})
}
}
}
)*
};
}
guarded_fn! {
#[cfg(feature = "package")]
#[allow(clippy::result_large_err)]
fn parse_tarball(reader: impl BufRead) -> Result<Container, ContainerError> {
let pkg = crate::wasmer_package::Package::from_tarball(reader)
.map_err(ContainerError::WasmerPackage)?;
Ok(Container::new(pkg))
}
#[cfg(feature = "package")]
#[allow(clippy::result_large_err)]
fn parse_dir(path: &Path) -> Result<Container, ContainerError> {
let wasmer_toml = path.join("wasmer.toml");
let pkg = crate::wasmer_package::Package::from_manifest(wasmer_toml)?;
Ok(Container::new(pkg))
}
#[cfg(feature = "v1")]
#[allow(clippy::result_large_err)]
fn parse_v1_mmap(f: File) -> Result<Container, ContainerError> {
let options = crate::v1::ParseOptions::default();
let webc = crate::v1::WebCMmap::from_file(f, &options)?;
Ok(Container::new(webc))
}
#[cfg(feature = "v1")]
#[allow(clippy::result_large_err)]
fn parse_v1_owned(bytes: Bytes) -> Result<Container, ContainerError> {
let options = crate::v1::ParseOptions::default();
let webc = crate::v1::WebCOwned::parse(bytes, &options)?;
Ok(Container::new(webc))
}
#[cfg(feature = "v2")]
#[allow(clippy::result_large_err)]
fn parse_v2_owned(bytes: Bytes) -> Result<Container, ContainerError> {
let reader = crate::v2::read::OwnedReader::parse(bytes)?;
Ok(Container::new(reader))
}
#[cfg(feature = "v2")]
#[allow(clippy::result_large_err)]
fn parse_v2_mmap(f: File) -> Result<Container, ContainerError> {
let webc = crate::v2::read::OwnedReader::from_file(f)?;
Ok(Container::new(webc))
}
#[cfg(feature = "v3")]
#[allow(clippy::result_large_err)]
fn parse_v3_owned(bytes: Bytes) -> Result<Container, ContainerError> {
let reader = crate::v3::read::OwnedReader::parse(bytes)?;
Ok(Container::new(reader))
}
#[cfg(feature = "v3")]
#[allow(clippy::result_large_err)]
fn
parse_v3_mmap(f: File) -> Result<Container, ContainerError> {
let webc = crate::v3::read::OwnedReader::from_file(f)?;
Ok(Container::new(webc))
}
}
#[cfg(feature = "v1")]
mod v1 {
use super::*;
impl AbstractWebc for crate::v1::WebCMmap {
fn version(&self) -> Version {
Version::V1
}
fn manifest(&self) -> &crate::metadata::Manifest {
&self.manifest
}
fn atom_names(&self) -> Vec<Cow<'_, str>> {
self.get_all_atoms().into_keys().map(Cow::Owned).collect()
}
fn get_atom(&self, name: &str) -> Option<OwnedBuffer> {
let atoms = self.get_all_atoms();
let atom = atoms.get(name)?;
let range = crate::utils::subslice_offsets(&self.buffer, atom);
Some(self.buffer.slice(range))
}
fn get_webc_hash(&self) -> Option<[u8; 32]> {
self.webc_hash()
}
fn get_atoms_hash(&self) -> Option<[u8; 32]> {
None
}
fn volume_names(&self) -> Vec<Cow<'_, str>> {
self.volumes
.keys()
.map(|s| Cow::Borrowed(s.as_str()))
.collect()
}
fn get_volume(&self, name: &str) -> Option<Volume> {
let package_name = self.get_package_name();
let volume = crate::v1::WebC::get_volume(self, &package_name, name)?;
let buffer = self.buffer.clone();
Some(Volume::new(crate::volume::v1::VolumeV1 {
volume: volume.clone(),
buffer,
}))
}
}
impl From<crate::v1::WebCMmap> for Container {
fn from(value: crate::v1::WebCMmap) -> Self {
Container::new(value)
}
}
impl AbstractWebc for crate::v1::WebCOwned {
fn version(&self) -> Version {
Version::V1
}
fn manifest(&self) -> &crate::metadata::Manifest {
&self.manifest
}
fn atom_names(&self) -> Vec<Cow<'_, str>> {
self.get_all_atoms().into_keys().map(Cow::Owned).collect()
}
fn get_atom(&self, name: &str) -> Option<OwnedBuffer> {
let atoms = self.get_all_atoms();
let atom = atoms.get(name)?;
let range = crate::utils::subslice_offsets(&self.backing_data, atom);
Some(self.backing_data.slice(range).into())
}
fn get_atoms_hash(&self) -> Option<[u8; 32]> {
None
}
fn get_webc_hash(&self) -> Option<[u8; 32]> {
self.webc_hash()
}
fn volume_names(&self) -> Vec<Cow<'_, str>> {
self.volumes
.keys()
.map(|s| Cow::Borrowed(s.as_str()))
.collect()
}
fn get_volume(&self, name: &str) -> Option<Volume> {
let package_name = self.get_package_name();
let volume = crate::v1::WebC::get_volume(self, &package_name, name)?.clone();
let buffer = self.backing_data.clone().into();
Some(Volume::new(crate::volume::v1::VolumeV1 { buffer, volume }))
}
}
impl From<crate::v1::WebCOwned> for Container {
fn from(value: crate::v1::WebCOwned) -> Self {
Container::new(value)
}
}
}
#[cfg(feature = "v2")]
mod v2 {
use super::*;
impl AbstractWebc for crate::v2::read::OwnedReader {
fn version(&self) -> Version {
Version::V2
}
fn manifest(&self) -> &crate::metadata::Manifest {
self.manifest()
}
fn atom_names(&self) -> Vec<Cow<'_, str>> {
self.atom_names().map(Cow::Borrowed).collect()
}
fn get_atom(&self, name: &str) -> Option<OwnedBuffer> {
self.get_atom(name).cloned().map(OwnedBuffer::from)
}
fn get_webc_hash(&self) -> Option<[u8; 32]> {
self.webc_hash()
}
fn get_atoms_hash(&self) -> Option<[u8; 32]> {
None
}
fn volume_names(&self) -> Vec<Cow<'_, str>> {
crate::v2::read::OwnedReader::volume_names(self)
.map(Cow::Borrowed)
.collect()
}
fn get_volume(&self, name: &str) -> Option<Volume> {
self.get_volume(name).ok().map(Volume::new)
}
}
impl From<crate::v2::read::OwnedReader> for Container {
fn from(value: crate::v2::read::OwnedReader) -> Self {
Container::new(value)
}
}
}
#[cfg(feature = "v3")]
mod v3 {
use super::*;
impl AbstractWebc for crate::v3::read::OwnedReader {
fn version(&self) -> Version {
Version::V3
}
fn manifest(&self) -> &crate::metadata::Manifest {
self.manifest()
}
fn atom_names(&self) -> Vec<Cow<'_, str>> {
self.atom_names().map(Cow::Borrowed).collect()
}
fn get_atom(&self, name: &str) -> Option<OwnedBuffer> {
self.get_atom(name)
.cloned()
.map(|(_, b)| OwnedBuffer::from(b))
}
fn get_webc_hash(&self) -> Option<[u8; 32]> {
self.webc_hash()
}
fn get_atoms_hash(&self) -> Option<[u8; 32]> {
Some(self.atoms_hash())
}
fn volume_names(&self) -> Vec<Cow<'_, str>> {
crate::v3::read::OwnedReader::volume_names(self)
.map(Cow::Borrowed)
.collect()
}
fn get_volume(&self, name: &str) -> Option<Volume> {
self.get_volume(name).ok().map(Volume::new)
}
}
impl From<crate::v3::read::OwnedReader> for Container {
fn from(value: crate::v3::read::OwnedReader) -> Self {
Container::new(value)
}
}
}
#[cfg(feature = "package")]
mod package {
use super::*;
impl AbstractWebc for crate::wasmer_package::Package {
fn version(&self) -> Version {
Version::V3
}
fn manifest(&self) -> &crate::metadata::Manifest {
self.manifest()
}
fn atom_names(&self) -> Vec<Cow<'_, str>> {
self.atoms()
.keys()
.map(|s| Cow::Borrowed(s.as_str()))
.collect()
}
fn get_atom(&self, name: &str) -> Option<OwnedBuffer> {
self.atoms().get(name).cloned()
}
fn get_webc_hash(&self) -> Option<[u8; 32]> {
self.webc_hash()
}
fn get_atoms_hash(&self) -> Option<[u8; 32]> {
None
}
fn volume_names(&self) -> Vec<Cow<'_, str>> {
self.volume_names()
}
fn get_volume(&self, name: &str) -> Option<Volume> {
self.get_volume(name).map(Volume::new)
}
}
impl From<crate::wasmer_package::Package> for Container {
fn from(value: crate::wasmer_package::Package) -> Self {
Container::new(value)
}
}
}
#[doc(hidden)]
pub trait AsAny {
fn as_any(&self) -> &(dyn Any + 'static);
}
impl<T> AsAny for T
where
T: Any,
{
fn as_any(&self) -> &(dyn Any + 'static) {
self
}
}
#[derive(Debug, thiserror::Error)]
#[non_exhaustive]
pub enum ContainerError {
#[error("Unable to detect the WEBC version")]
Detect(#[from] crate::DetectError),
#[error("Unsupported WEBC version, {_0}")]
UnsupportedVersion(crate::Version),
#[error("Unable to parse because the \"{feature}\" must be enabled")]
FeatureNotEnabled {
feature: &'static str,
},
#[error(transparent)]
#[cfg(feature = "v1")]
V1(#[from] crate::v1::Error),
#[error(transparent)]
#[cfg(feature = "v2")]
V2Owned(#[from] crate::v2::read::OwnedReaderError),
#[error(transparent)]
#[cfg(feature = "v3")]
V3Owned(#[from] crate::v3::read::OwnedReaderError),
#[error(transparent)]
#[cfg(feature = "package")]
WasmerPackage(#[from] crate::wasmer_package::WasmerPackageError),
#[error(transparent)]
Path(#[from] PathSegmentError),
#[error("Unable to open \"{}\"", path.display())]
Open {
path: std::path::PathBuf,
#[source]
error: std::io::Error,
},
#[error("Unable to read \"{}\"", path.display())]
Read {
path: std::path::PathBuf,
#[source]
error: std::io::Error,
},
#[error("IOError: {0:?}")]
IOError(#[from] std::io::Error),
}
fn is_tarball(mut file: impl Read + Seek) -> bool {
const TAR_GZ_MAGIC_BYTES: [u8; 2] = [0x1F, 0x8B];
let mut buffer = [0_u8; 2];
let result = match file.read_exact(&mut buffer) {
Ok(_) => buffer == TAR_GZ_MAGIC_BYTES,
Err(_) => false,
};
let _ = file.rewind();
result
}
#[cfg(test)]
mod tests {
use tempfile::TempDir;
use crate::{wasmer_package::Package, Container};
#[test]
fn container_unpacks_atoms() {
let temp = TempDir::new().unwrap();
let wasmer_toml = r#"
[package]
name = "some/package"
version = "0.0.0"
description = "Test package"
[[module]]
name = "foo"
source = "foo.wasm"
abi = "wasi"
[fs]
"/bar" = "bar"
"#;
let manifest = temp.path().join("wasmer.toml");
std::fs::write(&manifest, wasmer_toml).unwrap();
let atom_path = temp.path().join("foo.wasm");
std::fs::write(&atom_path, b"").unwrap();
let bar = temp.path().join("bar");
std::fs::create_dir(&bar).unwrap();
let webc = Package::from_manifest(&manifest)
.unwrap()
.serialize()
.unwrap();
let container = Container::from_bytes(webc).unwrap();
let out_dir = temp.path().join("out");
container.unpack(&out_dir, false).unwrap();
let expected_entries = vec![
"bar", "metadata", "foo", "manifest.json",
];
let entries = std::fs::read_dir(&out_dir)
.unwrap()
.map(|e| e.unwrap())
.collect::<Vec<_>>();
assert_eq!(expected_entries.len(), entries.len());
assert!(expected_entries.iter().all(|e| {
entries
.iter()
.any(|entry| entry.file_name().as_os_str() == *e)
}))
}
}