#![deny(rustdoc::broken_intra_doc_links, rustdoc::bare_urls, rust_2018_idioms)]
#![warn(
missing_copy_implementations,
missing_debug_implementations,
missing_docs,
clippy::explicit_iter_loop,
clippy::future_not_send,
clippy::use_self,
clippy::clone_on_ref_ptr
)]
#![cfg_attr(
feature = "gcp",
doc = "* [`gcp`]: [Google Cloud Storage](https://cloud.google.com/storage/) support. See [`GoogleCloudStorageBuilder`](gcp::GoogleCloudStorageBuilder)"
)]
#![cfg_attr(
feature = "aws",
doc = "* [`aws`]: [Amazon S3](https://aws.amazon.com/s3/). See [`AmazonS3Builder`](aws::AmazonS3Builder)"
)]
#![cfg_attr(
feature = "azure",
doc = "* [`azure`]: [Azure Blob Storage](https://azure.microsoft.com/en-gb/services/storage/blobs/). See [`MicrosoftAzureBuilder`](azure::MicrosoftAzureBuilder)"
)]
#![cfg_attr(
feature = "http",
doc = "* [`http`]: [HTTP/WebDAV Storage](https://datatracker.ietf.org/doc/html/rfc2518). See [`HttpBuilder`](http::HttpBuilder)"
)]
#[cfg(all(
target_arch = "wasm32",
any(feature = "gcp", feature = "aws", feature = "azure", feature = "http")
))]
compile_error!("Features 'gcp', 'aws', 'azure', 'http' are not supported on wasm.");
#[cfg(feature = "aws")]
pub mod aws;
#[cfg(feature = "azure")]
pub mod azure;
pub mod buffered;
#[cfg(not(target_arch = "wasm32"))]
pub mod chunked;
pub mod delimited;
#[cfg(feature = "gcp")]
pub mod gcp;
#[cfg(feature = "http")]
pub mod http;
pub mod limit;
#[cfg(not(target_arch = "wasm32"))]
pub mod local;
pub mod memory;
pub mod path;
pub mod prefix;
#[cfg(feature = "cloud")]
pub mod signer;
pub mod throttle;
#[cfg(feature = "cloud")]
mod client;
#[cfg(feature = "cloud")]
pub use client::{
backoff::BackoffConfig, retry::RetryConfig, ClientConfigKey, ClientOptions, CredentialProvider,
StaticCredentialProvider,
};
#[cfg(feature = "cloud")]
mod config;
mod tags;
pub use tags::TagSet;
pub mod multipart;
mod parse;
mod payload;
mod upload;
mod util;
mod attributes;
#[cfg(any(feature = "integration", test))]
pub mod integration;
pub use attributes::*;
pub use parse::{parse_url, parse_url_opts, ObjectStoreScheme};
pub use payload::*;
pub use upload::*;
pub use util::{coalesce_ranges, collect_bytes, GetRange, OBJECT_STORE_COALESCE_DEFAULT};
use crate::path::Path;
#[cfg(not(target_arch = "wasm32"))]
use crate::util::maybe_spawn_blocking;
use async_trait::async_trait;
use bytes::Bytes;
use chrono::{DateTime, Utc};
use futures::{stream::BoxStream, StreamExt, TryStreamExt};
use snafu::Snafu;
use std::fmt::{Debug, Formatter};
#[cfg(not(target_arch = "wasm32"))]
use std::io::{Read, Seek, SeekFrom};
use std::ops::Range;
use std::sync::Arc;
pub type DynObjectStore = dyn ObjectStore;
pub type MultipartId = String;
#[async_trait]
pub trait ObjectStore: std::fmt::Display + Send + Sync + Debug + 'static {
async fn put(&self, location: &Path, payload: PutPayload) -> Result<PutResult> {
self.put_opts(location, payload, PutOptions::default())
.await
}
async fn put_opts(
&self,
location: &Path,
payload: PutPayload,
opts: PutOptions,
) -> Result<PutResult>;
async fn put_multipart(&self, location: &Path) -> Result<Box<dyn MultipartUpload>> {
self.put_multipart_opts(location, PutMultipartOpts::default())
.await
}
async fn put_multipart_opts(
&self,
location: &Path,
opts: PutMultipartOpts,
) -> Result<Box<dyn MultipartUpload>>;
async fn get(&self, location: &Path) -> Result<GetResult> {
self.get_opts(location, GetOptions::default()).await
}
async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult>;
async fn get_range(&self, location: &Path, range: Range<usize>) -> Result<Bytes> {
let options = GetOptions {
range: Some(range.into()),
..Default::default()
};
self.get_opts(location, options).await?.bytes().await
}
async fn get_ranges(&self, location: &Path, ranges: &[Range<usize>]) -> Result<Vec<Bytes>> {
coalesce_ranges(
ranges,
|range| self.get_range(location, range),
OBJECT_STORE_COALESCE_DEFAULT,
)
.await
}
async fn head(&self, location: &Path) -> Result<ObjectMeta> {
let options = GetOptions {
head: true,
..Default::default()
};
Ok(self.get_opts(location, options).await?.meta)
}
async fn delete(&self, location: &Path) -> Result<()>;
fn delete_stream<'a>(
&'a self,
locations: BoxStream<'a, Result<Path>>,
) -> BoxStream<'a, Result<Path>> {
locations
.map(|location| async {
let location = location?;
self.delete(&location).await?;
Ok(location)
})
.buffered(10)
.boxed()
}
fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>>;
fn list_with_offset(
&self,
prefix: Option<&Path>,
offset: &Path,
) -> BoxStream<'_, Result<ObjectMeta>> {
let offset = offset.clone();
self.list(prefix)
.try_filter(move |f| futures::future::ready(f.location > offset))
.boxed()
}
async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult>;
async fn copy(&self, from: &Path, to: &Path) -> Result<()>;
async fn rename(&self, from: &Path, to: &Path) -> Result<()> {
self.copy(from, to).await?;
self.delete(from).await
}
async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()>;
async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
self.copy_if_not_exists(from, to).await?;
self.delete(from).await
}
}
macro_rules! as_ref_impl {
($type:ty) => {
#[async_trait]
impl ObjectStore for $type {
async fn put(&self, location: &Path, payload: PutPayload) -> Result<PutResult> {
self.as_ref().put(location, payload).await
}
async fn put_opts(
&self,
location: &Path,
payload: PutPayload,
opts: PutOptions,
) -> Result<PutResult> {
self.as_ref().put_opts(location, payload, opts).await
}
async fn put_multipart(&self, location: &Path) -> Result<Box<dyn MultipartUpload>> {
self.as_ref().put_multipart(location).await
}
async fn put_multipart_opts(
&self,
location: &Path,
opts: PutMultipartOpts,
) -> Result<Box<dyn MultipartUpload>> {
self.as_ref().put_multipart_opts(location, opts).await
}
async fn get(&self, location: &Path) -> Result<GetResult> {
self.as_ref().get(location).await
}
async fn get_opts(&self, location: &Path, options: GetOptions) -> Result<GetResult> {
self.as_ref().get_opts(location, options).await
}
async fn get_range(&self, location: &Path, range: Range<usize>) -> Result<Bytes> {
self.as_ref().get_range(location, range).await
}
async fn get_ranges(
&self,
location: &Path,
ranges: &[Range<usize>],
) -> Result<Vec<Bytes>> {
self.as_ref().get_ranges(location, ranges).await
}
async fn head(&self, location: &Path) -> Result<ObjectMeta> {
self.as_ref().head(location).await
}
async fn delete(&self, location: &Path) -> Result<()> {
self.as_ref().delete(location).await
}
fn delete_stream<'a>(
&'a self,
locations: BoxStream<'a, Result<Path>>,
) -> BoxStream<'a, Result<Path>> {
self.as_ref().delete_stream(locations)
}
fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, Result<ObjectMeta>> {
self.as_ref().list(prefix)
}
fn list_with_offset(
&self,
prefix: Option<&Path>,
offset: &Path,
) -> BoxStream<'_, Result<ObjectMeta>> {
self.as_ref().list_with_offset(prefix, offset)
}
async fn list_with_delimiter(&self, prefix: Option<&Path>) -> Result<ListResult> {
self.as_ref().list_with_delimiter(prefix).await
}
async fn copy(&self, from: &Path, to: &Path) -> Result<()> {
self.as_ref().copy(from, to).await
}
async fn rename(&self, from: &Path, to: &Path) -> Result<()> {
self.as_ref().rename(from, to).await
}
async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
self.as_ref().copy_if_not_exists(from, to).await
}
async fn rename_if_not_exists(&self, from: &Path, to: &Path) -> Result<()> {
self.as_ref().rename_if_not_exists(from, to).await
}
}
};
}
as_ref_impl!(Arc<dyn ObjectStore>);
as_ref_impl!(Box<dyn ObjectStore>);
#[derive(Debug)]
pub struct ListResult {
pub common_prefixes: Vec<Path>,
pub objects: Vec<ObjectMeta>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ObjectMeta {
pub location: Path,
pub last_modified: DateTime<Utc>,
pub size: usize,
pub e_tag: Option<String>,
pub version: Option<String>,
}
#[derive(Debug, Default)]
pub struct GetOptions {
pub if_match: Option<String>,
pub if_none_match: Option<String>,
pub if_modified_since: Option<DateTime<Utc>>,
pub if_unmodified_since: Option<DateTime<Utc>>,
pub range: Option<GetRange>,
pub version: Option<String>,
pub head: bool,
}
impl GetOptions {
fn check_preconditions(&self, meta: &ObjectMeta) -> Result<()> {
let etag = meta.e_tag.as_deref().unwrap_or("*");
let last_modified = meta.last_modified;
if let Some(m) = &self.if_match {
if m != "*" && m.split(',').map(str::trim).all(|x| x != etag) {
return Err(Error::Precondition {
path: meta.location.to_string(),
source: format!("{etag} does not match {m}").into(),
});
}
} else if let Some(date) = self.if_unmodified_since {
if last_modified > date {
return Err(Error::Precondition {
path: meta.location.to_string(),
source: format!("{date} < {last_modified}").into(),
});
}
}
if let Some(m) = &self.if_none_match {
if m == "*" || m.split(',').map(str::trim).any(|x| x == etag) {
return Err(Error::NotModified {
path: meta.location.to_string(),
source: format!("{etag} matches {m}").into(),
});
}
} else if let Some(date) = self.if_modified_since {
if last_modified <= date {
return Err(Error::NotModified {
path: meta.location.to_string(),
source: format!("{date} >= {last_modified}").into(),
});
}
}
Ok(())
}
}
#[derive(Debug)]
pub struct GetResult {
pub payload: GetResultPayload,
pub meta: ObjectMeta,
pub range: Range<usize>,
pub attributes: Attributes,
}
pub enum GetResultPayload {
File(std::fs::File, std::path::PathBuf),
Stream(BoxStream<'static, Result<Bytes>>),
}
impl Debug for GetResultPayload {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::File(_, _) => write!(f, "GetResultPayload(File)"),
Self::Stream(_) => write!(f, "GetResultPayload(Stream)"),
}
}
}
impl GetResult {
pub async fn bytes(self) -> Result<Bytes> {
let len = self.range.end - self.range.start;
match self.payload {
#[cfg(not(target_arch = "wasm32"))]
GetResultPayload::File(mut file, path) => {
maybe_spawn_blocking(move || {
file.seek(SeekFrom::Start(self.range.start as _))
.map_err(|source| local::Error::Seek {
source,
path: path.clone(),
})?;
let mut buffer = Vec::with_capacity(len);
file.take(len as _)
.read_to_end(&mut buffer)
.map_err(|source| local::Error::UnableToReadBytes { source, path })?;
Ok(buffer.into())
})
.await
}
GetResultPayload::Stream(s) => collect_bytes(s, Some(len)).await,
#[cfg(target_arch = "wasm32")]
_ => unimplemented!("File IO not implemented on wasm32."),
}
}
pub fn into_stream(self) -> BoxStream<'static, Result<Bytes>> {
match self.payload {
#[cfg(not(target_arch = "wasm32"))]
GetResultPayload::File(file, path) => {
const CHUNK_SIZE: usize = 8 * 1024;
local::chunked_stream(file, path, self.range, CHUNK_SIZE)
}
GetResultPayload::Stream(s) => s,
#[cfg(target_arch = "wasm32")]
_ => unimplemented!("File IO not implemented on wasm32."),
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub enum PutMode {
#[default]
Overwrite,
Create,
Update(UpdateVersion),
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct UpdateVersion {
pub e_tag: Option<String>,
pub version: Option<String>,
}
impl From<PutResult> for UpdateVersion {
fn from(value: PutResult) -> Self {
Self {
e_tag: value.e_tag,
version: value.version,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct PutOptions {
pub mode: PutMode,
pub tags: TagSet,
pub attributes: Attributes,
}
impl From<PutMode> for PutOptions {
fn from(mode: PutMode) -> Self {
Self {
mode,
..Default::default()
}
}
}
impl From<TagSet> for PutOptions {
fn from(tags: TagSet) -> Self {
Self {
tags,
..Default::default()
}
}
}
impl From<Attributes> for PutOptions {
fn from(attributes: Attributes) -> Self {
Self {
attributes,
..Default::default()
}
}
}
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct PutMultipartOpts {
pub tags: TagSet,
pub attributes: Attributes,
}
impl From<TagSet> for PutMultipartOpts {
fn from(tags: TagSet) -> Self {
Self {
tags,
..Default::default()
}
}
}
impl From<Attributes> for PutMultipartOpts {
fn from(attributes: Attributes) -> Self {
Self {
attributes,
..Default::default()
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct PutResult {
pub e_tag: Option<String>,
pub version: Option<String>,
}
pub type Result<T, E = Error> = std::result::Result<T, E>;
#[derive(Debug, Snafu)]
#[allow(missing_docs)]
pub enum Error {
#[snafu(display("Generic {} error: {}", store, source))]
Generic {
store: &'static str,
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
#[snafu(display("Object at location {} not found: {}", path, source))]
NotFound {
path: String,
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
#[snafu(
display("Encountered object with invalid path: {}", source),
context(false)
)]
InvalidPath { source: path::Error },
#[snafu(display("Error joining spawned task: {}", source), context(false))]
JoinError { source: tokio::task::JoinError },
#[snafu(display("Operation not supported: {}", source))]
NotSupported {
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
#[snafu(display("Object at location {} already exists: {}", path, source))]
AlreadyExists {
path: String,
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
#[snafu(display("Request precondition failure for path {}: {}", path, source))]
Precondition {
path: String,
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
#[snafu(display("Object at location {} not modified: {}", path, source))]
NotModified {
path: String,
source: Box<dyn std::error::Error + Send + Sync + 'static>,
},
#[snafu(display("Operation not yet implemented."))]
NotImplemented,
#[snafu(display("Configuration key: '{}' is not valid for store '{}'.", key, store))]
UnknownConfigurationKey { store: &'static str, key: String },
}
impl From<Error> for std::io::Error {
fn from(e: Error) -> Self {
let kind = match &e {
Error::NotFound { .. } => std::io::ErrorKind::NotFound,
_ => std::io::ErrorKind::Other,
};
Self::new(kind, e)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::buffered::BufWriter;
use chrono::TimeZone;
use tokio::io::AsyncWriteExt;
macro_rules! maybe_skip_integration {
() => {
if std::env::var("TEST_INTEGRATION").is_err() {
eprintln!("Skipping integration test - set TEST_INTEGRATION");
return;
}
};
}
pub(crate) use maybe_skip_integration;
fn list_store<'a>(
store: &'a dyn ObjectStore,
path_str: &str,
) -> BoxStream<'a, Result<ObjectMeta>> {
let path = Path::from(path_str);
store.list(Some(&path))
}
#[cfg(any(feature = "azure", feature = "aws"))]
pub async fn signing<T>(integration: &T)
where
T: ObjectStore + signer::Signer,
{
use reqwest::Method;
use std::time::Duration;
let data = Bytes::from("hello world");
let path = Path::from("file.txt");
integration.put(&path, data.clone().into()).await.unwrap();
let signed = integration
.signed_url(Method::GET, &path, Duration::from_secs(60))
.await
.unwrap();
let resp = reqwest::get(signed).await.unwrap();
let loaded = resp.bytes().await.unwrap();
assert_eq!(data, loaded);
}
#[cfg(any(feature = "aws", feature = "azure"))]
pub async fn tagging<F, Fut>(storage: Arc<dyn ObjectStore>, validate: bool, get_tags: F)
where
F: Fn(Path) -> Fut + Send + Sync,
Fut: std::future::Future<Output = Result<reqwest::Response>> + Send,
{
use bytes::Buf;
use serde::Deserialize;
#[derive(Deserialize)]
struct Tagging {
#[serde(rename = "TagSet")]
list: TagList,
}
#[derive(Debug, Deserialize)]
struct TagList {
#[serde(rename = "Tag")]
tags: Vec<Tag>,
}
#[derive(Debug, Deserialize, Eq, PartialEq)]
#[serde(rename_all = "PascalCase")]
struct Tag {
key: String,
value: String,
}
let tags = vec![
Tag {
key: "foo.com=bar/s".to_string(),
value: "bananas/foo.com-_".to_string(),
},
Tag {
key: "namespace/key.foo".to_string(),
value: "value with a space".to_string(),
},
];
let mut tag_set = TagSet::default();
for t in &tags {
tag_set.push(&t.key, &t.value)
}
let path = Path::from("tag_test");
storage
.put_opts(&path, "test".into(), tag_set.clone().into())
.await
.unwrap();
let multi_path = Path::from("tag_test_multi");
let mut write = storage
.put_multipart_opts(&multi_path, tag_set.clone().into())
.await
.unwrap();
write.put_part("foo".into()).await.unwrap();
write.complete().await.unwrap();
let buf_path = Path::from("tag_test_buf");
let mut buf = BufWriter::new(storage, buf_path.clone()).with_tags(tag_set);
buf.write_all(b"foo").await.unwrap();
buf.shutdown().await.unwrap();
if !validate {
return;
}
for path in [path, multi_path, buf_path] {
let resp = get_tags(path.clone()).await.unwrap();
let body = resp.bytes().await.unwrap();
let mut resp: Tagging = quick_xml::de::from_reader(body.reader()).unwrap();
resp.list.tags.sort_by(|a, b| a.key.cmp(&b.key));
assert_eq!(resp.list.tags, tags);
}
}
#[tokio::test]
async fn test_list_lifetimes() {
let store = memory::InMemory::new();
let mut stream = list_store(&store, "path");
assert!(stream.next().await.is_none());
}
#[test]
fn test_preconditions() {
let mut meta = ObjectMeta {
location: Path::from("test"),
last_modified: Utc.timestamp_nanos(100),
size: 100,
e_tag: Some("123".to_string()),
version: None,
};
let mut options = GetOptions::default();
options.check_preconditions(&meta).unwrap();
options.if_modified_since = Some(Utc.timestamp_nanos(50));
options.check_preconditions(&meta).unwrap();
options.if_modified_since = Some(Utc.timestamp_nanos(100));
options.check_preconditions(&meta).unwrap_err();
options.if_modified_since = Some(Utc.timestamp_nanos(101));
options.check_preconditions(&meta).unwrap_err();
options = GetOptions::default();
options.if_unmodified_since = Some(Utc.timestamp_nanos(50));
options.check_preconditions(&meta).unwrap_err();
options.if_unmodified_since = Some(Utc.timestamp_nanos(100));
options.check_preconditions(&meta).unwrap();
options.if_unmodified_since = Some(Utc.timestamp_nanos(101));
options.check_preconditions(&meta).unwrap();
options = GetOptions::default();
options.if_match = Some("123".to_string());
options.check_preconditions(&meta).unwrap();
options.if_match = Some("123,354".to_string());
options.check_preconditions(&meta).unwrap();
options.if_match = Some("354, 123,".to_string());
options.check_preconditions(&meta).unwrap();
options.if_match = Some("354".to_string());
options.check_preconditions(&meta).unwrap_err();
options.if_match = Some("*".to_string());
options.check_preconditions(&meta).unwrap();
options.if_unmodified_since = Some(Utc.timestamp_nanos(200));
options.check_preconditions(&meta).unwrap();
options = GetOptions::default();
options.if_none_match = Some("123".to_string());
options.check_preconditions(&meta).unwrap_err();
options.if_none_match = Some("*".to_string());
options.check_preconditions(&meta).unwrap_err();
options.if_none_match = Some("1232".to_string());
options.check_preconditions(&meta).unwrap();
options.if_none_match = Some("23, 123".to_string());
options.check_preconditions(&meta).unwrap_err();
options.if_modified_since = Some(Utc.timestamp_nanos(10));
options.check_preconditions(&meta).unwrap_err();
meta.e_tag = None;
options = GetOptions::default();
options.if_none_match = Some("*".to_string()); options.check_preconditions(&meta).unwrap_err();
options = GetOptions::default();
options.if_match = Some("*".to_string()); options.check_preconditions(&meta).unwrap();
}
}