cloud_copy/
config.rs

1//! Implementation of cloud configuration.
2
3use std::num::NonZero;
4use std::thread::available_parallelism;
5use std::time::Duration;
6
7use secrecy::SecretString;
8use serde::Deserialize;
9use tokio_retry2::strategy::ExponentialFactorBackoff;
10use tokio_retry2::strategy::MaxInterval;
11
12/// The default number of retries for network operations.
13const DEFAULT_RETRIES: usize = 5;
14
15/// Represents authentication configuration for S3.
16#[derive(Debug, Clone, Deserialize)]
17pub struct S3AuthConfig {
18    /// The AWS Access Key ID to use.
19    pub access_key_id: String,
20    /// The AWS Secret Access Key to use.
21    pub secret_access_key: SecretString,
22}
23
24/// Represents authentication configuration for Google Cloud Storage.
25#[derive(Debug, Clone, Deserialize)]
26pub struct GoogleAuthConfig {
27    /// The HMAC Access Key to use.
28    pub access_key: String,
29    /// The HMAC Secret to use.
30    pub secret: SecretString,
31}
32
33/// Represents configuration for Azure Storage.
34#[derive(Debug, Clone, Default, Deserialize)]
35pub struct AzureConfig {
36    /// Enables support for [Azurite](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite).
37    ///
38    /// Requests for Azurite are expected to use host suffix
39    /// `blob.core.windows.net.localhost`.
40    ///
41    /// Any URLs that use the `az` scheme will be rewritten to use that suffix.
42    ///
43    /// This setting is primarily intended for local testing.
44    #[serde(default)]
45    pub use_azurite: bool,
46}
47
48/// Represents configuration for AWS S3.
49#[derive(Debug, Clone, Default, Deserialize)]
50pub struct S3Config {
51    /// The default region to apply to `s3` schemed URLs.
52    ///
53    /// Defaults to `us-east-1`.
54    #[serde(default)]
55    pub region: Option<String>,
56    /// The auth to use for S3.
57    ///
58    /// If `None`, no authentication header will be put on requests.
59    #[serde(default)]
60    pub auth: Option<S3AuthConfig>,
61    /// Enables support for [localstack](https://github.com/localstack/localstack).
62    ///
63    /// The domain suffix is expected to be `localhost.localstack.cloud`.
64    ///
65    /// Any URLs that use the `s3` scheme will be rewritten to use that suffix.
66    ///
67    /// This setting is primarily intended for local testing.
68    #[serde(default)]
69    pub use_localstack: bool,
70}
71
72/// Represents configuration for Google Cloud Storage.
73#[derive(Debug, Clone, Default, Deserialize)]
74pub struct GoogleConfig {
75    /// The auth to use for Google Cloud Storage.
76    ///
77    /// If `None`, no authentication header will be put on requests.
78    #[serde(default)]
79    pub auth: Option<GoogleAuthConfig>,
80}
81
82/// Configuration used in a cloud copy operation.
83#[derive(Debug, Clone, Default, Deserialize)]
84pub struct Config {
85    /// If `link_to_cache` is `true`, then a downloaded file that is already
86    /// present (and fresh) in the cache will be hard linked at the requested
87    /// destination instead of copied.
88    ///
89    /// If the creation of the hard link fails (for example, the cache exists on
90    /// a different file system than the destination path), then a copy to the
91    /// destination will be made instead.
92    ///
93    /// Note that cache files are created read-only; if the destination is
94    /// created as a hard link, it will also be read-only. It is not recommended
95    /// to make the destination writable as writing to the destination path
96    /// would corrupt the corresponding content entry in the cache.
97    ///
98    /// When `false`, a copy to the destination is always performed.
99    #[serde(default)]
100    pub link_to_cache: bool,
101    /// The block size to use for file transfers.
102    ///
103    /// The default block size depends on the cloud storage service.
104    #[serde(default)]
105    pub block_size: Option<u64>,
106    /// The parallelism level for network operations.
107    ///
108    /// Defaults to the host's available parallelism.
109    #[serde(default)]
110    pub parallelism: Option<usize>,
111    /// The number of retries to attempt for network operations.
112    ///
113    /// Defaults to `5`.
114    #[serde(default)]
115    pub retries: Option<usize>,
116    /// The Azure Storage configuration.
117    #[serde(default)]
118    pub azure: AzureConfig,
119    /// The AWS S3 configuration.
120    #[serde(default)]
121    pub s3: S3Config,
122    /// The Google Cloud Storage configuration.
123    #[serde(default)]
124    pub google: GoogleConfig,
125}
126
127impl Config {
128    /// Gets the parallelism supported for uploads and downloads.
129    ///
130    /// For uploads, this is the number of blocks that may be concurrently
131    /// transferred.
132    ///
133    /// For downloads, this is the number of blocks that may be concurrently
134    /// downloaded if the download supports ranged requests.
135    ///
136    /// Defaults to the host's available parallelism (or 1 if it cannot be
137    /// determined).
138    pub fn parallelism(&self) -> usize {
139        self.parallelism
140            .unwrap_or_else(|| available_parallelism().map(NonZero::get).unwrap_or(1))
141    }
142
143    /// Gets an iterator over the retry durations for network operations.
144    ///
145    /// Retries use an exponential power of 2 backoff, starting at 1 second with
146    /// a maximum duration of 10 minutes.
147    pub fn retry_durations<'a>(&self) -> impl Iterator<Item = Duration> + use<'a> {
148        const INITIAL_DELAY_MILLIS: u64 = 1000;
149        const BASE_FACTOR: f64 = 2.0;
150        const MAX_DURATION: Duration = Duration::from_secs(600);
151
152        ExponentialFactorBackoff::from_millis(INITIAL_DELAY_MILLIS, BASE_FACTOR)
153            .max_duration(MAX_DURATION)
154            .take(self.retries.unwrap_or(DEFAULT_RETRIES))
155    }
156}