cloud_copy/config.rs
1//! Implementation of cloud configuration.
2
3use std::num::NonZero;
4use std::thread::available_parallelism;
5use std::time::Duration;
6
7use secrecy::SecretString;
8use serde::Deserialize;
9use tokio_retry2::strategy::ExponentialFactorBackoff;
10use tokio_retry2::strategy::MaxInterval;
11
12/// The default number of retries for network operations.
13const DEFAULT_RETRIES: usize = 5;
14
15/// Represents authentication configuration for S3.
16#[derive(Debug, Clone, Deserialize)]
17pub struct S3AuthConfig {
18 /// The AWS Access Key ID to use.
19 pub access_key_id: String,
20 /// The AWS Secret Access Key to use.
21 pub secret_access_key: SecretString,
22}
23
24/// Represents authentication configuration for Google Cloud Storage.
25#[derive(Debug, Clone, Deserialize)]
26pub struct GoogleAuthConfig {
27 /// The HMAC Access Key to use.
28 pub access_key: String,
29 /// The HMAC Secret to use.
30 pub secret: SecretString,
31}
32
33/// Represents configuration for Azure Storage.
34#[derive(Debug, Clone, Default, Deserialize)]
35pub struct AzureConfig {
36 /// Enables support for [Azurite](https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azurite).
37 ///
38 /// Requests for Azurite are expected to use host suffix
39 /// `blob.core.windows.net.localhost`.
40 ///
41 /// Any URLs that use the `az` scheme will be rewritten to use that suffix.
42 ///
43 /// This setting is primarily intended for local testing.
44 #[serde(default)]
45 pub use_azurite: bool,
46}
47
48/// Represents configuration for AWS S3.
49#[derive(Debug, Clone, Default, Deserialize)]
50pub struct S3Config {
51 /// The default region to apply to `s3` schemed URLs.
52 ///
53 /// Defaults to `us-east-1`.
54 #[serde(default)]
55 pub region: Option<String>,
56 /// The auth to use for S3.
57 ///
58 /// If `None`, no authentication header will be put on requests.
59 #[serde(default)]
60 pub auth: Option<S3AuthConfig>,
61 /// Enables support for [localstack](https://github.com/localstack/localstack).
62 ///
63 /// The domain suffix is expected to be `localhost.localstack.cloud`.
64 ///
65 /// Any URLs that use the `s3` scheme will be rewritten to use that suffix.
66 ///
67 /// This setting is primarily intended for local testing.
68 #[serde(default)]
69 pub use_localstack: bool,
70}
71
72/// Represents configuration for Google Cloud Storage.
73#[derive(Debug, Clone, Default, Deserialize)]
74pub struct GoogleConfig {
75 /// The auth to use for Google Cloud Storage.
76 ///
77 /// If `None`, no authentication header will be put on requests.
78 #[serde(default)]
79 pub auth: Option<GoogleAuthConfig>,
80}
81
82/// Configuration used in a cloud copy operation.
83#[derive(Debug, Clone, Default, Deserialize)]
84pub struct Config {
85 /// If `link_to_cache` is `true`, then a downloaded file that is already
86 /// present (and fresh) in the cache will be hard linked at the requested
87 /// destination instead of copied.
88 ///
89 /// If the creation of the hard link fails (for example, the cache exists on
90 /// a different file system than the destination path), then a copy to the
91 /// destination will be made instead.
92 ///
93 /// Note that cache files are created read-only; if the destination is
94 /// created as a hard link, it will also be read-only. It is not recommended
95 /// to make the destination writable as writing to the destination path
96 /// would corrupt the corresponding content entry in the cache.
97 ///
98 /// When `false`, a copy to the destination is always performed.
99 #[serde(default)]
100 pub link_to_cache: bool,
101 /// The block size to use for file transfers.
102 ///
103 /// The default block size depends on the cloud storage service.
104 #[serde(default)]
105 pub block_size: Option<u64>,
106 /// The parallelism level for network operations.
107 ///
108 /// Defaults to the host's available parallelism.
109 #[serde(default)]
110 pub parallelism: Option<usize>,
111 /// The number of retries to attempt for network operations.
112 ///
113 /// Defaults to `5`.
114 #[serde(default)]
115 pub retries: Option<usize>,
116 /// The Azure Storage configuration.
117 #[serde(default)]
118 pub azure: AzureConfig,
119 /// The AWS S3 configuration.
120 #[serde(default)]
121 pub s3: S3Config,
122 /// The Google Cloud Storage configuration.
123 #[serde(default)]
124 pub google: GoogleConfig,
125}
126
127impl Config {
128 /// Gets the parallelism supported for uploads and downloads.
129 ///
130 /// For uploads, this is the number of blocks that may be concurrently
131 /// transferred.
132 ///
133 /// For downloads, this is the number of blocks that may be concurrently
134 /// downloaded if the download supports ranged requests.
135 ///
136 /// Defaults to the host's available parallelism (or 1 if it cannot be
137 /// determined).
138 pub fn parallelism(&self) -> usize {
139 self.parallelism
140 .unwrap_or_else(|| available_parallelism().map(NonZero::get).unwrap_or(1))
141 }
142
143 /// Gets an iterator over the retry durations for network operations.
144 ///
145 /// Retries use an exponential power of 2 backoff, starting at 1 second with
146 /// a maximum duration of 10 minutes.
147 pub fn retry_durations<'a>(&self) -> impl Iterator<Item = Duration> + use<'a> {
148 const INITIAL_DELAY_MILLIS: u64 = 1000;
149 const BASE_FACTOR: f64 = 2.0;
150 const MAX_DURATION: Duration = Duration::from_secs(600);
151
152 ExponentialFactorBackoff::from_millis(INITIAL_DELAY_MILLIS, BASE_FACTOR)
153 .max_duration(MAX_DURATION)
154 .take(self.retries.unwrap_or(DEFAULT_RETRIES))
155 }
156}