cloud_copy/config.rs
1//! Implementation of cloud configuration.
2
3use std::num::NonZero;
4use std::thread::available_parallelism;
5use std::time::Duration;
6
7use secrecy::SecretString;
8use serde::Deserialize;
9use tokio_retry2::strategy::ExponentialFactorBackoff;
10use tokio_retry2::strategy::MaxInterval;
11
12/// The default number of retries for network operations.
13const DEFAULT_RETRIES: usize = 5;
14
15/// Represents authentication configuration for S3.
16#[derive(Debug, Clone, Deserialize)]
17pub struct S3AuthConfig {
18 /// The AWS Access Key ID to use.
19 pub access_key_id: String,
20 /// The AWS Secret Access Key to use.
21 pub secret_access_key: SecretString,
22}
23
24/// Represents authentication configuration for Google Cloud Storage.
25#[derive(Debug, Clone, Deserialize)]
26pub struct GoogleAuthConfig {
27 /// The HMAC Access Key to use.
28 pub access_key: String,
29 /// The HMAC Secret to use.
30 pub secret: SecretString,
31}
32
33/// Represents configuration for Azure Storage.
34#[derive(Debug, Clone, Default, Deserialize)]
35pub struct AzureConfig {
36 /// Enables support for [Azurite](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite).
37 ///
38 /// Requests for Azurite are expected to use host suffix
39 /// `blob.core.windows.net.localhost`.
40 ///
41 /// Any URLs that use the `az` scheme will be rewritten to use that suffix.
42 ///
43 /// This setting is primarily intended for local testing.
44 #[serde(default)]
45 pub use_azurite: bool,
46}
47
48/// Represents configuration for AWS S3.
49#[derive(Debug, Clone, Default, Deserialize)]
50pub struct S3Config {
51 /// The default region to apply to `s3` schemed URLs.
52 ///
53 /// Defaults to `us-east-1`.
54 #[serde(default)]
55 pub region: Option<String>,
56 /// The auth to use for S3.
57 ///
58 /// If `None`, no authentication header will be put on requests.
59 #[serde(default)]
60 pub auth: Option<S3AuthConfig>,
61 /// Enables support for [localstack](https://github.com/localstack/localstack).
62 ///
63 /// The domain suffix is expected to be `localhost.localstack.cloud`.
64 ///
65 /// Any URLs that use the `s3` scheme will be rewritten to use that suffix.
66 ///
67 /// This setting is primarily intended for local testing.
68 #[serde(default)]
69 pub use_localstack: bool,
70}
71
72/// Represents configuration for Google Cloud Storage.
73#[derive(Debug, Clone, Default, Deserialize)]
74pub struct GoogleConfig {
75 /// The auth to use for Google Cloud Storage.
76 ///
77 /// If `None`, no authentication header will be put on requests.
78 #[serde(default)]
79 pub auth: Option<GoogleAuthConfig>,
80}
81
82/// Configuration used in a cloud copy operation.
83#[derive(Debug, Clone, Default, Deserialize)]
84pub struct Config {
85 /// If `link_to_cache` is `true`, then a downloaded file that is already
86 /// present (and fresh) in the cache will be hard linked at the requested
87 /// destination instead of copied.
88 ///
89 /// If the creation of the hard link fails (for example, the cache exists on
90 /// a different file system than the destination path), then a copy to the
91 /// destination will be made instead.
92 ///
93 /// Note that cache files are created read-only; if the destination is
94 /// created as a hard link, it will also be read-only. It is not recommended
95 /// to make the destination writable as writing to the destination path
96 /// would corrupt the corresponding content entry in the cache.
97 ///
98 /// When `false`, a copy to the destination is always performed.
99 #[serde(default)]
100 pub link_to_cache: bool,
101 /// Whether or not the destination should be overwritten.
102 ///
103 /// If `false` and the destination is a local file that already exists, the
104 /// copy operation will fail.
105 ///
106 /// If `false` and the destination is a remote file, a network request will
107 /// be made for the URL; if the request succeeds, the copy operation will
108 /// fail.
109 #[serde(default)]
110 pub overwrite: bool,
111 /// The block size to use for file transfers.
112 ///
113 /// The default block size depends on the cloud storage service.
114 #[serde(default)]
115 pub block_size: Option<u64>,
116 /// The parallelism level for network operations.
117 ///
118 /// Defaults to the host's available parallelism.
119 #[serde(default)]
120 pub parallelism: Option<usize>,
121 /// The number of retries to attempt for network operations.
122 ///
123 /// Defaults to `5`.
124 #[serde(default)]
125 pub retries: Option<usize>,
126 /// The Azure Storage configuration.
127 #[serde(default)]
128 pub azure: AzureConfig,
129 /// The AWS S3 configuration.
130 #[serde(default)]
131 pub s3: S3Config,
132 /// The Google Cloud Storage configuration.
133 #[serde(default)]
134 pub google: GoogleConfig,
135}
136
137impl Config {
138 /// Gets the parallelism supported for uploads and downloads.
139 ///
140 /// For uploads, this is the number of blocks that may be concurrently
141 /// transferred.
142 ///
143 /// For downloads, this is the number of blocks that may be concurrently
144 /// downloaded if the download supports ranged requests.
145 ///
146 /// Defaults to the host's available parallelism (or 1 if it cannot be
147 /// determined).
148 pub fn parallelism(&self) -> usize {
149 self.parallelism
150 .unwrap_or_else(|| available_parallelism().map(NonZero::get).unwrap_or(1))
151 }
152
153 /// Gets an iterator over the retry durations for network operations.
154 ///
155 /// Retries use an exponential power of 2 backoff, starting at 1 second with
156 /// a maximum duration of 10 minutes.
157 pub fn retry_durations<'a>(&self) -> impl Iterator<Item = Duration> + use<'a> {
158 const INITIAL_DELAY_MILLIS: u64 = 1000;
159 const BASE_FACTOR: f64 = 2.0;
160 const MAX_DURATION: Duration = Duration::from_secs(600);
161
162 ExponentialFactorBackoff::from_millis(INITIAL_DELAY_MILLIS, BASE_FACTOR)
163 .max_duration(MAX_DURATION)
164 .take(self.retries.unwrap_or(DEFAULT_RETRIES))
165 }
166}