cloud_copy/
config.rs

1//! Implementation of cloud configuration.
2
3use std::num::NonZero;
4use std::thread::available_parallelism;
5use std::time::Duration;
6
7use secrecy::SecretString;
8use serde::Deserialize;
9use tokio_retry2::strategy::ExponentialFactorBackoff;
10use tokio_retry2::strategy::MaxInterval;
11
12/// The default number of retries for network operations.
13const DEFAULT_RETRIES: usize = 5;
14
15/// Represents authentication configuration for S3.
16#[derive(Debug, Clone, Deserialize)]
17pub struct S3AuthConfig {
18    /// The AWS Access Key ID to use.
19    pub access_key_id: String,
20    /// The AWS Secret Access Key to use.
21    pub secret_access_key: SecretString,
22}
23
24/// Represents authentication configuration for Google Cloud Storage.
25#[derive(Debug, Clone, Deserialize)]
26pub struct GoogleAuthConfig {
27    /// The HMAC Access Key to use.
28    pub access_key: String,
29    /// The HMAC Secret to use.
30    pub secret: SecretString,
31}
32
33/// Represents configuration for Azure Storage.
34#[derive(Debug, Clone, Default, Deserialize)]
35pub struct AzureConfig {
36    /// Enables support for [Azurite](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite).
37    ///
38    /// Requests for Azurite are expected to use host suffix
39    /// `blob.core.windows.net.localhost`.
40    ///
41    /// Any URLs that use the `az` scheme will be rewritten to use that suffix.
42    ///
43    /// This setting is primarily intended for local testing.
44    #[serde(default)]
45    pub use_azurite: bool,
46}
47
48/// Represents configuration for AWS S3.
49#[derive(Debug, Clone, Default, Deserialize)]
50pub struct S3Config {
51    /// The default region to apply to `s3` schemed URLs.
52    ///
53    /// Defaults to `us-east-1`.
54    #[serde(default)]
55    pub region: Option<String>,
56    /// The auth to use for S3.
57    ///
58    /// If `None`, no authentication header will be put on requests.
59    #[serde(default)]
60    pub auth: Option<S3AuthConfig>,
61    /// Enables support for [localstack](https://github.com/localstack/localstack).
62    ///
63    /// The domain suffix is expected to be `localhost.localstack.cloud`.
64    ///
65    /// Any URLs that use the `s3` scheme will be rewritten to use that suffix.
66    ///
67    /// This setting is primarily intended for local testing.
68    #[serde(default)]
69    pub use_localstack: bool,
70}
71
72/// Represents configuration for Google Cloud Storage.
73#[derive(Debug, Clone, Default, Deserialize)]
74pub struct GoogleConfig {
75    /// The auth to use for Google Cloud Storage.
76    ///
77    /// If `None`, no authentication header will be put on requests.
78    #[serde(default)]
79    pub auth: Option<GoogleAuthConfig>,
80}
81
82/// Configuration used in a cloud copy operation.
83#[derive(Debug, Clone, Default, Deserialize)]
84pub struct Config {
85    /// If `link_to_cache` is `true`, then a downloaded file that is already
86    /// present (and fresh) in the cache will be hard linked at the requested
87    /// destination instead of copied.
88    ///
89    /// If the creation of the hard link fails (for example, the cache exists on
90    /// a different file system than the destination path), then a copy to the
91    /// destination will be made instead.
92    ///
93    /// Note that cache files are created read-only; if the destination is
94    /// created as a hard link, it will also be read-only. It is not recommended
95    /// to make the destination writable as writing to the destination path
96    /// would corrupt the corresponding content entry in the cache.
97    ///
98    /// When `false`, a copy to the destination is always performed.
99    #[serde(default)]
100    pub link_to_cache: bool,
101    /// Whether or not the destination should be overwritten.
102    ///
103    /// If `false` and the destination is a local file that already exists, the
104    /// copy operation will fail.
105    ///
106    /// If `false` and the destination is a remote file, a network request will
107    /// be made for the URL; if the request succeeds, the copy operation will
108    /// fail.
109    #[serde(default)]
110    pub overwrite: bool,
111    /// The block size to use for file transfers.
112    ///
113    /// The default block size depends on the cloud storage service.
114    #[serde(default)]
115    pub block_size: Option<u64>,
116    /// The parallelism level for network operations.
117    ///
118    /// Defaults to the host's available parallelism.
119    #[serde(default)]
120    pub parallelism: Option<usize>,
121    /// The number of retries to attempt for network operations.
122    ///
123    /// Defaults to `5`.
124    #[serde(default)]
125    pub retries: Option<usize>,
126    /// The Azure Storage configuration.
127    #[serde(default)]
128    pub azure: AzureConfig,
129    /// The AWS S3 configuration.
130    #[serde(default)]
131    pub s3: S3Config,
132    /// The Google Cloud Storage configuration.
133    #[serde(default)]
134    pub google: GoogleConfig,
135}
136
137impl Config {
138    /// Gets the parallelism supported for uploads and downloads.
139    ///
140    /// For uploads, this is the number of blocks that may be concurrently
141    /// transferred.
142    ///
143    /// For downloads, this is the number of blocks that may be concurrently
144    /// downloaded if the download supports ranged requests.
145    ///
146    /// Defaults to the host's available parallelism (or 1 if it cannot be
147    /// determined).
148    pub fn parallelism(&self) -> usize {
149        self.parallelism
150            .unwrap_or_else(|| available_parallelism().map(NonZero::get).unwrap_or(1))
151    }
152
153    /// Gets an iterator over the retry durations for network operations.
154    ///
155    /// Retries use an exponential power of 2 backoff, starting at 1 second with
156    /// a maximum duration of 10 minutes.
157    pub fn retry_durations<'a>(&self) -> impl Iterator<Item = Duration> + use<'a> {
158        const INITIAL_DELAY_MILLIS: u64 = 1000;
159        const BASE_FACTOR: f64 = 2.0;
160        const MAX_DURATION: Duration = Duration::from_secs(600);
161
162        ExponentialFactorBackoff::from_millis(INITIAL_DELAY_MILLIS, BASE_FACTOR)
163            .max_duration(MAX_DURATION)
164            .take(self.retries.unwrap_or(DEFAULT_RETRIES))
165    }
166}