Skip to main content

oxigdal_cloud/
lib.rs

1//! Advanced cloud storage backends for OxiGDAL
2//!
3//! This crate provides comprehensive cloud storage integration for OxiGDAL, including:
4//!
5//! - **Cloud Providers**: S3, Azure Blob Storage, Google Cloud Storage
6//! - **Authentication**: OAuth 2.0, service accounts, API keys, SAS tokens, IAM roles
7//! - **Advanced Caching**: Multi-level cache with memory and disk tiers, compression, LRU+LFU eviction
8//! - **Intelligent Prefetching**: Predictive prefetch, access pattern analysis, bandwidth management
9//! - **Retry Logic**: Exponential backoff, jitter, circuit breaker, retry budgets
10//! - **HTTP Backend**: Enhanced HTTP/HTTPS with authentication and retry support
11//!
12//! # Features
13//!
14//! - `s3` - AWS S3 support
15//! - `azure-blob` - Azure Blob Storage support
16//! - `gcs` - Google Cloud Storage support
17//! - `http` - HTTP/HTTPS backend
18//! - `cache` - Advanced caching layer
19//! - `prefetch` - Intelligent prefetching
20//! - `retry` - Retry logic with backoff
21//!
22//! # Examples
23//!
24//! ## AWS S3
25//!
26//! ```rust,no_run
27//! # #[cfg(feature = "s3")]
28//! # async fn example() -> oxigdal_cloud::Result<()> {
29//! use oxigdal_cloud::backends::S3Backend;
30//! use oxigdal_cloud::backends::CloudStorageBackend;
31//!
32//! let backend = S3Backend::new("my-bucket", "data/zarr")
33//!     .with_region("us-west-2");
34//!
35//! // Get object
36//! let data = backend.get("file.tif").await?;
37//!
38//! // Put object
39//! backend.put("output.tif", &data).await?;
40//!
41//! # Ok(())
42//! # }
43//! ```
44//!
45//! ## Multi-cloud Abstraction
46//!
47//! ```rust,no_run
48//! # async fn example() -> oxigdal_cloud::Result<()> {
49//! use oxigdal_cloud::CloudBackend;
50//!
51//! // Parse URL and create appropriate backend
52//! let backend = CloudBackend::from_url("s3://bucket/file.tif")?;
53//! let data = backend.get().await?;
54//!
55//! # Ok(())
56//! # }
57//! ```
58//!
59//! ## Advanced Caching
60//!
61//! ```rust,no_run
62//! # #[cfg(feature = "cache")]
63//! # async fn example() -> oxigdal_cloud::Result<()> {
64//! use oxigdal_cloud::cache::{CacheConfig, MultiLevelCache};
65//! use bytes::Bytes;
66//!
67//! let config = CacheConfig::new()
68//!     .with_max_memory_size(100 * 1024 * 1024) // 100 MB
69//!     .with_cache_dir("/tmp/oxigdal-cache");
70//!
71//! let cache = MultiLevelCache::new(config)?;
72//!
73//! // Cache data
74//! cache.put("key".to_string(), Bytes::from("data")).await?;
75//!
76//! // Retrieve from cache
77//! let data = cache.get(&"key".to_string()).await?;
78//!
79//! # Ok(())
80//! # }
81//! ```
82
83#![cfg_attr(not(feature = "std"), no_std)]
84// Allow partial documentation during development
85#![allow(missing_docs)]
86// Allow dead code for backend features
87#![allow(dead_code)]
88// Allow matches! suggestions - explicit patterns preferred for cloud errors
89#![allow(clippy::match_like_matches_macro)]
90// Allow expect() for internal cloud state invariants
91#![allow(clippy::expect_used)]
92// Allow complex types in cloud interfaces
93#![allow(clippy::type_complexity)]
94// Allow manual div_ceil for bandwidth calculations
95#![allow(clippy::manual_div_ceil)]
96// Allow unused variables in platform-specific code
97#![allow(unused_variables)]
98// Allow collapsible matches for explicit error handling
99#![allow(clippy::collapsible_match)]
100// Allow async fn in traits for cloud operations
101#![allow(async_fn_in_trait)]
102// Allow stripping prefix manually for URL path handling
103#![allow(clippy::manual_strip)]
104// Allow first element access with get(0)
105#![allow(clippy::get_first)]
106// Allow field assignment outside initializer
107#![allow(clippy::field_reassign_with_default)]
108// Allow unused imports in feature-gated modules
109#![allow(unused_imports)]
110// Allow method names that may conflict with std traits
111#![allow(clippy::should_implement_trait)]
112
113#[cfg(feature = "alloc")]
114extern crate alloc;
115
116pub mod auth;
117pub mod backends;
118#[cfg(feature = "cache")]
119pub mod cache;
120pub mod error;
121#[cfg(feature = "async")]
122pub mod multicloud;
123#[cfg(feature = "prefetch")]
124pub mod prefetch;
125#[cfg(feature = "retry")]
126pub mod retry;
127
128pub use error::{CloudError, Result};
129
130#[cfg(feature = "s3")]
131pub use backends::s3::S3Backend;
132
133#[cfg(feature = "azure-blob")]
134pub use backends::azure::AzureBlobBackend;
135
136#[cfg(feature = "gcs")]
137pub use backends::gcs::GcsBackend;
138
139#[cfg(feature = "http")]
140pub use backends::http::HttpBackend;
141
142#[cfg(feature = "async")]
143pub use multicloud::{
144    CloudProvider, CloudProviderConfig, CloudRegion, CrossCloudTransferConfig,
145    CrossCloudTransferResult, MultiCloudManager, MultiCloudManagerBuilder, ProviderHealth,
146    RoutingStrategy, TransferCostEstimate,
147};
148
149use url::Url;
150
151/// Multi-cloud storage backend abstraction
152#[derive(Debug)]
153pub enum CloudBackend {
154    /// AWS S3 backend
155    #[cfg(feature = "s3")]
156    S3 {
157        /// S3 backend instance
158        backend: S3Backend,
159        /// Object key
160        key: String,
161    },
162
163    /// Azure Blob Storage backend
164    #[cfg(feature = "azure-blob")]
165    Azure {
166        /// Azure backend instance
167        backend: AzureBlobBackend,
168        /// Blob name
169        blob: String,
170    },
171
172    /// Google Cloud Storage backend
173    #[cfg(feature = "gcs")]
174    Gcs {
175        /// GCS backend instance
176        backend: GcsBackend,
177        /// Object name
178        object: String,
179    },
180
181    /// HTTP/HTTPS backend
182    #[cfg(feature = "http")]
183    Http {
184        /// HTTP backend instance
185        backend: HttpBackend,
186        /// Resource path
187        path: String,
188    },
189}
190
191impl CloudBackend {
192    /// Creates a cloud backend from a URL
193    ///
194    /// Supported URL formats:
195    /// - `s3://bucket/key` - AWS S3
196    /// - `az://container/blob` - Azure Blob Storage
197    /// - `gs://bucket/object` - Google Cloud Storage
198    /// - `<http://example.com/path>` or `<https://example.com/path>` - HTTP/HTTPS
199    ///
200    /// # Examples
201    ///
202    /// ```rust,no_run
203    /// # fn example() -> oxigdal_cloud::Result<()> {
204    /// use oxigdal_cloud::CloudBackend;
205    ///
206    /// let backend = CloudBackend::from_url("s3://my-bucket/data/file.tif")?;
207    /// # Ok(())
208    /// # }
209    /// ```
210    pub fn from_url(url: &str) -> Result<Self> {
211        let parsed = Url::parse(url)?;
212
213        match parsed.scheme() {
214            #[cfg(feature = "s3")]
215            "s3" => {
216                let bucket = parsed.host_str().ok_or_else(|| CloudError::InvalidUrl {
217                    url: url.to_string(),
218                })?;
219
220                let key = parsed.path().trim_start_matches('/').to_string();
221
222                Ok(Self::S3 {
223                    backend: S3Backend::new(bucket, ""),
224                    key,
225                })
226            }
227
228            #[cfg(feature = "azure-blob")]
229            "az" | "azure" => {
230                let container = parsed.host_str().ok_or_else(|| CloudError::InvalidUrl {
231                    url: url.to_string(),
232                })?;
233
234                // Account name should be in the username part of the URL
235                let account = parsed.username();
236                if account.is_empty() {
237                    return Err(CloudError::InvalidUrl {
238                        url: url.to_string(),
239                    });
240                }
241
242                let blob = parsed.path().trim_start_matches('/').to_string();
243
244                Ok(Self::Azure {
245                    backend: AzureBlobBackend::new(account, container),
246                    blob,
247                })
248            }
249
250            #[cfg(feature = "gcs")]
251            "gs" | "gcs" => {
252                let bucket = parsed.host_str().ok_or_else(|| CloudError::InvalidUrl {
253                    url: url.to_string(),
254                })?;
255
256                let object = parsed.path().trim_start_matches('/').to_string();
257
258                Ok(Self::Gcs {
259                    backend: GcsBackend::new(bucket),
260                    object,
261                })
262            }
263
264            #[cfg(feature = "http")]
265            "http" | "https" => {
266                // Reconstruct base URL without the path
267                let base_url = format!(
268                    "{}://{}",
269                    parsed.scheme(),
270                    parsed.host_str().ok_or_else(|| CloudError::InvalidUrl {
271                        url: url.to_string(),
272                    })?
273                );
274
275                let path = parsed.path().trim_start_matches('/').to_string();
276
277                Ok(Self::Http {
278                    backend: HttpBackend::new(base_url),
279                    path,
280                })
281            }
282
283            scheme => Err(CloudError::UnsupportedProtocol {
284                protocol: scheme.to_string(),
285            }),
286        }
287    }
288
289    /// Gets data from the cloud backend
290    #[cfg(feature = "async")]
291    pub async fn get(&self) -> Result<bytes::Bytes> {
292        use backends::CloudStorageBackend;
293
294        match self {
295            #[cfg(feature = "s3")]
296            Self::S3 { backend, key } => backend.get(key).await,
297
298            #[cfg(feature = "azure-blob")]
299            Self::Azure { backend, blob } => backend.get(blob).await,
300
301            #[cfg(feature = "gcs")]
302            Self::Gcs { backend, object } => backend.get(object).await,
303
304            #[cfg(feature = "http")]
305            Self::Http { backend, path } => backend.get(path).await,
306        }
307    }
308
309    /// Puts data to the cloud backend
310    #[cfg(feature = "async")]
311    pub async fn put(&self, data: &[u8]) -> Result<()> {
312        use backends::CloudStorageBackend;
313
314        match self {
315            #[cfg(feature = "s3")]
316            Self::S3 { backend, key } => backend.put(key, data).await,
317
318            #[cfg(feature = "azure-blob")]
319            Self::Azure { backend, blob } => backend.put(blob, data).await,
320
321            #[cfg(feature = "gcs")]
322            Self::Gcs { backend, object } => backend.put(object, data).await,
323
324            #[cfg(feature = "http")]
325            Self::Http { .. } => Err(CloudError::NotSupported {
326                operation: "HTTP backend is read-only".to_string(),
327            }),
328        }
329    }
330
331    /// Checks if the object exists
332    #[cfg(feature = "async")]
333    pub async fn exists(&self) -> Result<bool> {
334        use backends::CloudStorageBackend;
335
336        match self {
337            #[cfg(feature = "s3")]
338            Self::S3 { backend, key } => backend.exists(key).await,
339
340            #[cfg(feature = "azure-blob")]
341            Self::Azure { backend, blob } => backend.exists(blob).await,
342
343            #[cfg(feature = "gcs")]
344            Self::Gcs { backend, object } => backend.exists(object).await,
345
346            #[cfg(feature = "http")]
347            Self::Http { backend, path } => backend.exists(path).await,
348        }
349    }
350}
351
352#[cfg(test)]
353#[allow(clippy::panic)]
354mod tests {
355    use super::*;
356
357    #[test]
358    #[cfg(feature = "s3")]
359    fn test_cloud_backend_from_url_s3() {
360        let backend = CloudBackend::from_url("s3://my-bucket/path/to/file.tif");
361        assert!(backend.is_ok());
362
363        if let Ok(CloudBackend::S3 { backend, key }) = backend {
364            assert_eq!(backend.bucket, "my-bucket");
365            assert_eq!(key, "path/to/file.tif");
366        } else {
367            panic!("Expected S3 backend");
368        }
369    }
370
371    #[test]
372    #[cfg(feature = "gcs")]
373    fn test_cloud_backend_from_url_gcs() {
374        let backend = CloudBackend::from_url("gs://my-bucket/path/to/file.tif");
375        assert!(backend.is_ok());
376
377        if let Ok(CloudBackend::Gcs { backend, object }) = backend {
378            assert_eq!(backend.bucket, "my-bucket");
379            assert_eq!(object, "path/to/file.tif");
380        } else {
381            panic!("Expected GCS backend");
382        }
383    }
384
385    #[test]
386    #[cfg(feature = "http")]
387    fn test_cloud_backend_from_url_http() {
388        let backend = CloudBackend::from_url("https://example.com/path/to/file.tif");
389        assert!(backend.is_ok());
390
391        if let Ok(CloudBackend::Http { backend, path }) = backend {
392            assert!(backend.base_url.contains("example.com"));
393            assert_eq!(path, "path/to/file.tif");
394        } else {
395            panic!("Expected HTTP backend");
396        }
397    }
398
399    #[test]
400    fn test_cloud_backend_from_url_invalid() {
401        let backend = CloudBackend::from_url("invalid://url");
402        assert!(backend.is_err());
403    }
404}