Skip to main content

wdl_engine/
config.rs

1//! Implementation of engine configuration.
2
3use std::borrow::Cow;
4use std::collections::HashMap;
5use std::collections::HashSet;
6use std::path::Path;
7use std::path::PathBuf;
8use std::sync::Arc;
9
10use anyhow::Context;
11use anyhow::Result;
12use anyhow::anyhow;
13use anyhow::bail;
14use anyhow::ensure;
15use bytesize::ByteSize;
16use indexmap::IndexMap;
17use secrecy::ExposeSecret;
18use serde::Deserialize;
19use serde::Serialize;
20use tokio::process::Command;
21use tracing::error;
22use tracing::warn;
23use url::Url;
24
25use crate::CancellationContext;
26use crate::Events;
27use crate::SYSTEM;
28use crate::Value;
29use crate::backend::TaskExecutionBackend;
30use crate::convert_unit_string;
31use crate::path::is_supported_url;
32
33/// The inclusive maximum number of task retries the engine supports.
34pub(crate) const MAX_RETRIES: u64 = 100;
35
36/// The default task shell.
37pub(crate) const DEFAULT_TASK_SHELL: &str = "bash";
38
39/// The default task container.
40pub(crate) const DEFAULT_TASK_CONTAINER: &str = "ubuntu:latest";
41
42/// The default backend name.
43const DEFAULT_BACKEND_NAME: &str = "default";
44
45/// The maximum size, in bytes, for an LSF job name prefix.
46const MAX_LSF_JOB_NAME_PREFIX: usize = 100;
47
48/// The string that replaces redacted serialization fields.
49const REDACTED: &str = "<REDACTED>";
50
51/// Configuration sentinel value indicating use a system cache directory.
52const CACHE_DIR_SENTINEL: &str = "system";
53
54/// Gets the default root cache directory for the user.
55pub(crate) fn cache_dir() -> Result<PathBuf> {
56    /// The subdirectory within the user's cache directory for all caches
57    const CACHE_DIR_ROOT: &str = "sprocket";
58
59    Ok(dirs::cache_dir()
60        .context("failed to determine user cache directory")?
61        .join(CACHE_DIR_ROOT))
62}
63
64/// Helper for `serde`.
65fn is_default_shell(shell: &str) -> bool {
66    shell == DEFAULT_TASK_SHELL
67}
68
69/// Helper for `serde`.
70fn get_default_shell() -> String {
71    DEFAULT_TASK_SHELL.to_string()
72}
73
74/// Helper for `serde`.
75fn get_default_container() -> String {
76    DEFAULT_TASK_CONTAINER.to_string()
77}
78
79/// Helper for `serde`.
80fn get_default_backend_name() -> String {
81    DEFAULT_BACKEND_NAME.to_string()
82}
83
84/// Helper for `serde`.
85fn get_sentinel_cache_dir() -> String {
86    CACHE_DIR_SENTINEL.to_string()
87}
88
89/// Represents a secret string that is, by default, redacted for serialization.
90///
91/// This type is a wrapper around [`secrecy::SecretString`].
92#[derive(Debug, Clone)]
93pub struct SecretString {
94    /// The inner secret string.
95    ///
96    /// This type is not serializable.
97    inner: secrecy::SecretString,
98    /// Whether or not the secret string is redacted for serialization.
99    ///
100    /// If `true` (the default), `<REDACTED>` is serialized for the string's
101    /// value.
102    ///
103    /// If `false`, the inner secret string is exposed for serialization.
104    redacted: bool,
105}
106
107impl SecretString {
108    /// Redacts the secret for serialization.
109    ///
110    /// By default, a [`SecretString`] is redacted; when redacted, the string is
111    /// replaced with `<REDACTED>` when serialized.
112    pub fn redact(&mut self) {
113        self.redacted = true;
114    }
115
116    /// Unredacts the secret for serialization.
117    pub fn unredact(&mut self) {
118        self.redacted = false;
119    }
120
121    /// Gets the inner [`secrecy::SecretString`].
122    pub fn inner(&self) -> &secrecy::SecretString {
123        &self.inner
124    }
125}
126
127impl From<String> for SecretString {
128    fn from(s: String) -> Self {
129        Self {
130            inner: s.into(),
131            redacted: true,
132        }
133    }
134}
135
136impl From<&str> for SecretString {
137    fn from(s: &str) -> Self {
138        Self {
139            inner: s.into(),
140            redacted: true,
141        }
142    }
143}
144
145impl Default for SecretString {
146    fn default() -> Self {
147        Self {
148            inner: Default::default(),
149            redacted: true,
150        }
151    }
152}
153
154impl serde::Serialize for SecretString {
155    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
156    where
157        S: serde::Serializer,
158    {
159        use secrecy::ExposeSecret;
160
161        if self.redacted {
162            serializer.serialize_str(REDACTED)
163        } else {
164            serializer.serialize_str(self.inner.expose_secret())
165        }
166    }
167}
168
169impl<'de> serde::Deserialize<'de> for SecretString {
170    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
171    where
172        D: serde::Deserializer<'de>,
173    {
174        let inner = secrecy::SecretString::deserialize(deserializer)?;
175        Ok(Self {
176            inner,
177            redacted: true,
178        })
179    }
180}
181
182/// Creates a new type, which can be nulled, for use in configuration structs.
183///
184/// The inner type cannot be a `String` or the sentinel value will never be
185/// recognized.
186#[macro_export]
187macro_rules! nullable_config_type {
188    (
189        $name:ident,
190        $inner:ty,
191        $sentinel:literal,
192        $value:ident,
193        $validation:expr,
194        $expected:literal,
195        $default:expr
196    ) => {
197        #[doc = concat!("Configuration for [`", stringify!($name), "`].")]
198        #[derive(Clone, Debug)]
199        pub struct $name(Option<$inner>);
200
201        impl $name {
202            #[doc = concat!("Get the inner [`", stringify!($inner), "`].")]
203            pub fn inner(&self) -> Option<&$inner> {
204                self.0.as_ref()
205            }
206
207            #[doc = concat!("Try to create a new `", stringify!($name), "` from a `", stringify!($inner), "`.")]
208            pub fn try_new(val: Option<$inner>) -> std::result::Result<Self, anyhow::Error> {
209                match val {
210                    None => Ok(Self(None)),
211                    Some($value) if $validation => Ok(Self(Some($value))),
212                    Some($value) => Err(anyhow::anyhow!(format!(
213                        "expected {}, got `{}`",
214                        $expected, $value
215                    ))),
216                }
217            }
218        }
219
220        impl Default for $name {
221            fn default() -> Self {
222                Self($default)
223            }
224        }
225
226        impl Serialize for $name {
227            fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
228            where
229                S: serde::Serializer,
230            {
231                match self {
232                    $name(None) => $sentinel.serialize(serializer),
233                    $name(Some(i)) => i.serialize(serializer),
234                }
235            }
236        }
237
238        impl<'de> Deserialize<'de> for $name {
239            fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
240            where
241                D: serde::Deserializer<'de>,
242            {
243                #[derive(Deserialize)]
244                #[serde(untagged)]
245                enum Value {
246                    Inner($inner),
247                    Str(String),
248                    Null,
249                }
250
251                match Value::deserialize(deserializer)? {
252                    Value::Inner(i) => $name::try_new(Some(i)).map_err(serde::de::Error::custom),
253                    Value::Str(s) if s == $sentinel => Ok($name(None)),
254                    Value::Str($value) => Err(serde::de::Error::custom(format!(
255                        "expected {} or `{}`, got `{}`",
256                        $expected, $sentinel, $value
257                    ))),
258                    Value::Null => Ok($name(None)),
259                }
260            }
261        }
262    };
263}
264
265/// Represents how an evaluation error or cancellation should be handled by the
266/// engine.
267#[derive(Debug, Default, Copy, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
268#[serde(rename_all = "snake_case")]
269pub enum FailureMode {
270    /// When an error is encountered or evaluation is canceled, evaluation waits
271    /// for any outstanding tasks to complete.
272    #[default]
273    Slow,
274    /// When an error is encountered or evaluation is canceled, any outstanding
275    /// tasks that are executing are immediately canceled and evaluation waits
276    /// for cancellation to complete.
277    Fast,
278}
279
280/// Represents WDL evaluation configuration.
281///
282/// <div class="warning">
283///
284/// By default, serialization of [`Config`] will redact the values of secrets.
285///
286/// Use the [`Config::unredact`] method before serialization to prevent the
287/// secrets from being redacted.
288///
289/// </div>
290#[derive(Debug, Clone, Serialize, Deserialize)]
291#[serde(rename_all = "snake_case", deny_unknown_fields)]
292pub struct Config {
293    /// HTTP configuration.
294    #[serde(default)]
295    pub http: HttpConfig,
296    /// Workflow evaluation configuration.
297    #[serde(default)]
298    pub workflow: WorkflowConfig,
299    /// Task evaluation configuration.
300    #[serde(default)]
301    pub task: TaskConfig,
302    /// The name of the backend to use.
303    #[serde(default = "get_default_backend_name")]
304    pub backend: String,
305    /// Task execution backends configuration.
306    ///
307    /// If the collection is empty and `backend` has the default value, the
308    /// engine default backend is used.
309    #[serde(default)]
310    pub backends: IndexMap<String, BackendConfig>,
311    /// Storage configuration.
312    #[serde(default)]
313    pub storage: StorageConfig,
314    /// (Experimental) Avoid environment-specific output; default is `false`.
315    ///
316    /// If this option is `true`, selected error messages and log output will
317    /// avoid emitting environment-specific output such as absolute paths
318    /// and system resource counts.
319    ///
320    /// This is largely meant to support "golden testing" where a test's success
321    /// depends on matching an expected set of outputs exactly. Cues that
322    /// help users overcome errors, such as the path to a temporary
323    /// directory or the number of CPUs available to the system, confound this
324    /// style of testing. This flag is a best-effort experimental attempt to
325    /// reduce the impact of these differences in order to allow a wider
326    /// range of golden tests to be written.
327    #[serde(default)]
328    pub suppress_env_specific_output: bool,
329    /// (Experimental) Whether experimental features are enabled; default is
330    /// `false`.
331    ///
332    /// Experimental features are provided to users with heavy caveats about
333    /// their stability and rough edges. Use at your own risk, but feedback
334    /// is quite welcome.
335    #[serde(default)]
336    pub experimental_features_enabled: bool,
337    /// The failure mode for workflow or task evaluation.
338    ///
339    /// A value of [`FailureMode::Slow`] will result in evaluation waiting for
340    /// executing tasks to complete upon error or interruption.
341    ///
342    /// A value of [`FailureMode::Fast`] will immediately attempt to cancel
343    /// executing tasks upon error or interruption.
344    #[serde(default, rename = "fail")]
345    pub failure_mode: FailureMode,
346}
347
348impl Default for Config {
349    fn default() -> Self {
350        Self {
351            http: Default::default(),
352            workflow: Default::default(),
353            task: Default::default(),
354            backend: get_default_backend_name(),
355            backends: Default::default(),
356            storage: Default::default(),
357            suppress_env_specific_output: Default::default(),
358            experimental_features_enabled: Default::default(),
359            failure_mode: Default::default(),
360        }
361    }
362}
363
364impl Config {
365    /// Validates the evaluation configuration.
366    pub async fn validate(&self) -> Result<()> {
367        self.http.validate()?;
368        self.workflow.validate()?;
369        self.task.validate()?;
370
371        if self.backends.is_empty() && self.backend == DEFAULT_BACKEND_NAME {
372            // we'll use the default
373        } else {
374            let backend = &self.backend;
375            if !self.backends.contains_key(backend) {
376                bail!("a backend named `{backend}` is not present in the configuration");
377            }
378        }
379
380        for backend in self.backends.values() {
381            backend.validate(self).await?;
382        }
383
384        self.storage.validate()?;
385
386        if self.suppress_env_specific_output && !self.experimental_features_enabled {
387            bail!("`suppress_env_specific_output` requires enabling experimental features");
388        }
389
390        Ok(())
391    }
392
393    /// Redacts the secrets contained in the configuration.
394    ///
395    /// By default, secrets are redacted for serialization.
396    pub fn redact(&mut self) {
397        for backend in self.backends.values_mut() {
398            backend.redact();
399        }
400
401        if let Some(auth) = &mut self.storage.azure.auth {
402            auth.redact();
403        }
404
405        if let Some(auth) = &mut self.storage.s3.auth {
406            auth.redact();
407        }
408
409        if let Some(auth) = &mut self.storage.google.auth {
410            auth.redact();
411        }
412    }
413
414    /// Unredacts the secrets contained in the configuration.
415    ///
416    /// Calling this method will expose secrets for serialization.
417    pub fn unredact(&mut self) {
418        for backend in self.backends.values_mut() {
419            backend.unredact();
420        }
421
422        if let Some(auth) = &mut self.storage.azure.auth {
423            auth.unredact();
424        }
425
426        if let Some(auth) = &mut self.storage.s3.auth {
427            auth.unredact();
428        }
429
430        if let Some(auth) = &mut self.storage.google.auth {
431            auth.unredact();
432        }
433    }
434
435    /// Gets the backend configuration.
436    ///
437    /// Returns an error if the configuration specifies a named backend that
438    /// isn't present in the configuration.
439    pub fn backend(&self) -> Result<Cow<'_, BackendConfig>> {
440        if !self.backends.is_empty() {
441            let backend = &self.backend;
442            return Ok(Cow::Borrowed(self.backends.get(backend).ok_or_else(
443                || anyhow!("a backend named `{backend}` is not present in the configuration"),
444            )?));
445        }
446        // Use the default
447        Ok(Cow::Owned(BackendConfig::default()))
448    }
449
450    /// Creates a new task execution backend based on this configuration.
451    pub(crate) async fn create_backend(
452        self: &Arc<Self>,
453        run_root_dir: &Path,
454        events: Events,
455        cancellation: CancellationContext,
456    ) -> Result<Arc<dyn TaskExecutionBackend>> {
457        use crate::backend::*;
458
459        match self.backend()?.as_ref() {
460            BackendConfig::Local(_) => {
461                warn!(
462                    "the engine is configured to use the local backend: tasks will not be run \
463                     inside of a container"
464                );
465                Ok(Arc::new(LocalBackend::new(
466                    self.clone(),
467                    events,
468                    cancellation,
469                )?))
470            }
471            BackendConfig::Docker(_) => Ok(Arc::new(
472                DockerBackend::new(self.clone(), events, cancellation).await?,
473            )),
474            BackendConfig::Tes(_) => Ok(Arc::new(
475                TesBackend::new(self.clone(), events, cancellation).await?,
476            )),
477            BackendConfig::LsfApptainer(_) => Ok(Arc::new(LsfApptainerBackend::new(
478                self.clone(),
479                run_root_dir,
480                events,
481                cancellation,
482            )?)),
483            BackendConfig::SlurmApptainer(_) => Ok(Arc::new(SlurmApptainerBackend::new(
484                self.clone(),
485                run_root_dir,
486                events,
487                cancellation,
488            )?)),
489        }
490    }
491}
492
493/// Represents HTTP configuration.
494#[derive(Debug, Clone, Serialize, Deserialize)]
495#[serde(rename_all = "snake_case", deny_unknown_fields)]
496pub struct HttpConfig {
497    /// The HTTP download cache location.
498    ///
499    /// Defaults to an operating system specific cache directory for the user.
500    #[serde(default = "get_sentinel_cache_dir")]
501    pub cache_dir: String,
502    /// The number of retries for transferring files.
503    pub retries: usize,
504    /// The maximum parallelism for file transfers.
505    ///
506    /// Defaults to the host's available parallelism.
507    pub parallelism: Parallelism,
508}
509
510nullable_config_type!(
511    Parallelism,
512    usize,
513    "available",
514    value,
515    value > 0,
516    "a positive number",
517    None
518);
519
520impl Default for HttpConfig {
521    fn default() -> Self {
522        Self {
523            cache_dir: get_sentinel_cache_dir(),
524            retries: 5, // Default as defined in `cloud_copy`.
525            parallelism: Default::default(),
526        }
527    }
528}
529
530impl HttpConfig {
531    /// Validates the HTTP configuration.
532    pub fn validate(&self) -> Result<()> {
533        if let Some(parallelism) = self.parallelism.inner()
534            && *parallelism == 0
535        {
536            bail!("configuration value `http.parallelism` cannot be zero");
537        }
538        Ok(())
539    }
540
541    /// Get the HTTP cache dir.
542    pub fn cache_dir(&self) -> Result<PathBuf> {
543        const DOWNLOADS_CACHE_SUBDIR: &str = "downloads";
544
545        if self.using_system_cache_dir() {
546            cache_dir().map(|d| d.join(DOWNLOADS_CACHE_SUBDIR))
547        } else {
548            Ok(PathBuf::from(&self.cache_dir))
549        }
550    }
551
552    /// Is this configuration using a system cache dir?
553    pub fn using_system_cache_dir(&self) -> bool {
554        self.cache_dir == CACHE_DIR_SENTINEL
555    }
556}
557
558/// Represents storage configuration.
559#[derive(Debug, Default, Clone, Serialize, Deserialize)]
560#[serde(rename_all = "snake_case", deny_unknown_fields)]
561pub struct StorageConfig {
562    /// Azure Blob Storage configuration.
563    #[serde(default)]
564    pub azure: AzureStorageConfig,
565    /// AWS S3 configuration.
566    #[serde(default)]
567    pub s3: S3StorageConfig,
568    /// Google Cloud Storage configuration.
569    #[serde(default)]
570    pub google: GoogleStorageConfig,
571}
572
573impl StorageConfig {
574    /// Validates the HTTP configuration.
575    pub fn validate(&self) -> Result<()> {
576        self.azure.validate()?;
577        self.s3.validate()?;
578        self.google.validate()?;
579        Ok(())
580    }
581}
582
583/// Represents authentication information for Azure Blob Storage.
584#[derive(Debug, Default, Clone, Serialize, Deserialize)]
585#[serde(rename_all = "snake_case", deny_unknown_fields)]
586pub struct AzureStorageAuthConfig {
587    /// The Azure Storage account name to use.
588    pub account_name: String,
589    /// The Azure Storage access key to use.
590    pub access_key: SecretString,
591}
592
593impl AzureStorageAuthConfig {
594    /// Validates the Azure Blob Storage authentication configuration.
595    pub fn validate(&self) -> Result<()> {
596        if self.account_name.is_empty() {
597            bail!("configuration value `storage.azure.auth.account_name` is required");
598        }
599
600        if self.access_key.inner.expose_secret().is_empty() {
601            bail!("configuration value `storage.azure.auth.access_key` is required");
602        }
603
604        Ok(())
605    }
606
607    /// Redacts the secrets contained in the Azure Blob Storage storage
608    /// authentication configuration.
609    pub fn redact(&mut self) {
610        self.access_key.redact();
611    }
612
613    /// Unredacts the secrets contained in the Azure Blob Storage authentication
614    /// configuration.
615    pub fn unredact(&mut self) {
616        self.access_key.unredact();
617    }
618}
619
620/// Represents configuration for Azure Blob Storage.
621#[derive(Debug, Default, Clone, Serialize, Deserialize)]
622#[serde(rename_all = "snake_case", deny_unknown_fields)]
623pub struct AzureStorageConfig {
624    /// The Azure Blob Storage authentication configuration.
625    #[serde(default, skip_serializing_if = "Option::is_none")]
626    pub auth: Option<AzureStorageAuthConfig>,
627}
628
629impl AzureStorageConfig {
630    /// Validates the Azure Blob Storage configuration.
631    pub fn validate(&self) -> Result<()> {
632        if let Some(auth) = &self.auth {
633            auth.validate()?;
634        }
635
636        Ok(())
637    }
638}
639
640/// Represents authentication information for AWS S3 storage.
641#[derive(Debug, Default, Clone, Serialize, Deserialize)]
642#[serde(rename_all = "snake_case", deny_unknown_fields)]
643pub struct S3StorageAuthConfig {
644    /// The AWS Access Key ID to use.
645    pub access_key_id: String,
646    /// The AWS Secret Access Key to use.
647    pub secret_access_key: SecretString,
648}
649
650impl S3StorageAuthConfig {
651    /// Validates the AWS S3 storage authentication configuration.
652    pub fn validate(&self) -> Result<()> {
653        if self.access_key_id.is_empty() {
654            bail!("configuration value `storage.s3.auth.access_key_id` is required");
655        }
656
657        if self.secret_access_key.inner.expose_secret().is_empty() {
658            bail!("configuration value `storage.s3.auth.secret_access_key` is required");
659        }
660
661        Ok(())
662    }
663
664    /// Redacts the secrets contained in the AWS S3 storage authentication
665    /// configuration.
666    pub fn redact(&mut self) {
667        self.secret_access_key.redact();
668    }
669
670    /// Unredacts the secrets contained in the AWS S3 storage authentication
671    /// configuration.
672    pub fn unredact(&mut self) {
673        self.secret_access_key.unredact();
674    }
675}
676
677/// Represents configuration for AWS S3 storage.
678#[derive(Debug, Default, Clone, Serialize, Deserialize)]
679#[serde(rename_all = "snake_case", deny_unknown_fields)]
680pub struct S3StorageConfig {
681    /// The default region to use for S3-schemed URLs (e.g.
682    /// `s3://<bucket>/<blob>`).
683    ///
684    /// Defaults to `us-east-1`.
685    #[serde(default, skip_serializing_if = "Option::is_none")]
686    pub region: Option<String>,
687
688    /// The AWS S3 storage authentication configuration.
689    #[serde(default, skip_serializing_if = "Option::is_none")]
690    pub auth: Option<S3StorageAuthConfig>,
691}
692
693impl S3StorageConfig {
694    /// Validates the AWS S3 storage configuration.
695    pub fn validate(&self) -> Result<()> {
696        if let Some(auth) = &self.auth {
697            auth.validate()?;
698        }
699
700        Ok(())
701    }
702}
703
704/// Represents authentication information for Google Cloud Storage.
705#[derive(Debug, Default, Clone, Serialize, Deserialize)]
706#[serde(rename_all = "snake_case", deny_unknown_fields)]
707pub struct GoogleStorageAuthConfig {
708    /// The HMAC Access Key to use.
709    pub access_key: String,
710    /// The HMAC Secret to use.
711    pub secret: SecretString,
712}
713
714impl GoogleStorageAuthConfig {
715    /// Validates the Google Cloud Storage authentication configuration.
716    pub fn validate(&self) -> Result<()> {
717        if self.access_key.is_empty() {
718            bail!("configuration value `storage.google.auth.access_key` is required");
719        }
720
721        if self.secret.inner.expose_secret().is_empty() {
722            bail!("configuration value `storage.google.auth.secret` is required");
723        }
724
725        Ok(())
726    }
727
728    /// Redacts the secrets contained in the Google Cloud Storage authentication
729    /// configuration.
730    pub fn redact(&mut self) {
731        self.secret.redact();
732    }
733
734    /// Unredacts the secrets contained in the Google Cloud Storage
735    /// authentication configuration.
736    pub fn unredact(&mut self) {
737        self.secret.unredact();
738    }
739}
740
741/// Represents configuration for Google Cloud Storage.
742#[derive(Debug, Default, Clone, Serialize, Deserialize)]
743#[serde(rename_all = "snake_case", deny_unknown_fields)]
744pub struct GoogleStorageConfig {
745    /// The Google Cloud Storage authentication configuration.
746    #[serde(default, skip_serializing_if = "Option::is_none")]
747    pub auth: Option<GoogleStorageAuthConfig>,
748}
749
750impl GoogleStorageConfig {
751    /// Validates the Google Cloud Storage configuration.
752    pub fn validate(&self) -> Result<()> {
753        if let Some(auth) = &self.auth {
754            auth.validate()?;
755        }
756
757        Ok(())
758    }
759}
760
761/// Represents workflow evaluation configuration.
762#[derive(Debug, Default, Clone, Serialize, Deserialize)]
763#[serde(rename_all = "snake_case", deny_unknown_fields)]
764pub struct WorkflowConfig {
765    /// Scatter statement evaluation configuration.
766    #[serde(default)]
767    pub scatter: ScatterConfig,
768}
769
770impl WorkflowConfig {
771    /// Validates the workflow configuration.
772    pub fn validate(&self) -> Result<()> {
773        self.scatter.validate()?;
774        Ok(())
775    }
776}
777
778/// The default number of elements to concurrently process for a scatter
779/// statement.
780const DEFAULT_SCATTER_CONCURRENCY: u64 = 1000;
781
782/// Represents scatter statement evaluation configuration.
783#[derive(Debug, Clone, Serialize, Deserialize)]
784#[serde(rename_all = "snake_case", deny_unknown_fields)]
785pub struct ScatterConfig {
786    /// The number of scatter array elements to process concurrently.
787    ///
788    /// Defaults to `1000`.
789    ///
790    /// A value of `0` is invalid.
791    ///
792    /// Lower values use less memory for evaluation and higher values may better
793    /// saturate the task execution backend with tasks to execute for large
794    /// scatters.
795    ///
796    /// This setting does not change how many tasks an execution backend can run
797    /// concurrently, but may affect how many tasks are sent to the backend to
798    /// run at a time.
799    ///
800    /// For example, if `concurrency` was set to 10 and we evaluate the
801    /// following scatters:
802    ///
803    /// ```wdl
804    /// scatter (i in range(100)) {
805    ///     call my_task
806    /// }
807    ///
808    /// scatter (j in range(100)) {
809    ///     call my_task as my_task2
810    /// }
811    /// ```
812    ///
813    /// Here each scatter is independent and therefore there will be 20 calls
814    /// (10 for each scatter) made concurrently. If the task execution
815    /// backend can only execute 5 tasks concurrently, 5 tasks will execute
816    /// and 15 will be "ready" to execute and waiting for an executing task
817    /// to complete.
818    ///
819    /// If instead we evaluate the following scatters:
820    ///
821    /// ```wdl
822    /// scatter (i in range(100)) {
823    ///     scatter (j in range(100)) {
824    ///         call my_task
825    ///     }
826    /// }
827    /// ```
828    ///
829    /// Then there will be 100 calls (10*10 as 10 are made for each outer
830    /// element) made concurrently. If the task execution backend can only
831    /// execute 5 tasks concurrently, 5 tasks will execute and 95 will be
832    /// "ready" to execute and waiting for an executing task to complete.
833    ///
834    /// <div class="warning">
835    /// Warning: nested scatter statements cause exponential memory usage based
836    /// on this value, as each scatter statement evaluation requires allocating
837    /// new scopes for scatter array elements being processed. </div>
838    pub concurrency: u64,
839}
840
841impl Default for ScatterConfig {
842    fn default() -> Self {
843        Self {
844            concurrency: DEFAULT_SCATTER_CONCURRENCY,
845        }
846    }
847}
848
849impl ScatterConfig {
850    /// Validates the scatter configuration.
851    pub fn validate(&self) -> Result<()> {
852        if self.concurrency == 0 {
853            bail!("configuration value `workflow.scatter.concurrency` cannot be zero");
854        }
855
856        Ok(())
857    }
858}
859
860/// Represents the supported call caching modes.
861#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
862#[serde(rename_all = "snake_case")]
863pub enum CallCachingMode {
864    /// Call caching is disabled.
865    ///
866    /// The call cache is not checked and new entries are not added to the
867    /// cache.
868    ///
869    /// This is the default value.
870    #[default]
871    Off,
872    /// Call caching is enabled.
873    ///
874    /// The call cache is checked and new entries are added to the cache.
875    ///
876    /// Defaults the `cacheable` task hint to `true`.
877    On,
878    /// Call caching is enabled only for tasks that explicitly have a
879    /// `cacheable` hint set to `true`.
880    ///
881    /// The call cache is checked and new entries are added to the cache *only*
882    /// for tasks that have the `cacheable` hint set to `true`.
883    ///
884    /// Defaults the `cacheable` task hint to `false`.
885    Explicit,
886}
887
888/// Represents the supported modes for calculating content digests.
889#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
890#[serde(rename_all = "snake_case")]
891pub enum ContentDigestMode {
892    /// Use a strong digest for file content.
893    ///
894    /// Strong digests require hashing all of the contents of a file; this may
895    /// noticeably impact performance for very large files.
896    ///
897    /// This setting guarantees that a modified file will be detected.
898    Strong,
899    /// Use a weak digest for file content.
900    ///
901    /// A weak digest is based solely off of file metadata, such as size and
902    /// last modified time.
903    ///
904    /// This setting cannot guarantee the detection of modified files and may
905    /// result in a modified file not causing a call cache entry to be
906    /// invalidated.
907    ///
908    /// However, it is substantially faster than using a strong digest.
909    #[default]
910    Weak,
911}
912
913/// Represents task evaluation configuration.
914#[derive(Debug, Clone, Serialize, Deserialize)]
915#[serde(rename_all = "snake_case", deny_unknown_fields)]
916pub struct TaskConfig {
917    /// The default maximum number of retries to attempt if a task fails.
918    ///
919    /// A task's `max_retries` requirement will override this value.
920    pub retries: Retries,
921    /// The default container to use if a container is not specified in a task's
922    /// requirements.
923    #[serde(default = "get_default_container")]
924    pub container: String,
925    /// The default shell to use for tasks.
926    ///
927    /// <div class="warning">
928    /// Warning: the use of a shell other than `bash` may lead to tasks that may
929    /// not be portable to other execution engines.
930    ///
931    /// The shell must support a `-c` option to run a specific script file (i.e.
932    /// an evaluated task command).
933    ///
934    /// Note that this option affects all task commands, so every container that
935    /// is used must contain the specified shell.
936    ///
937    /// If using this setting causes your tasks to fail, please do not file an
938    /// issue. </div>
939    #[serde(
940        default = "get_default_shell",
941        skip_serializing_if = "is_default_shell"
942    )]
943    pub shell: String,
944    /// The behavior when a task's `cpu` requirement cannot be met.
945    pub cpu_limit_behavior: TaskResourceLimitBehavior,
946    /// The behavior when a task's `memory` requirement cannot be met.
947    pub memory_limit_behavior: TaskResourceLimitBehavior,
948    /// The call cache directory to use for caching task execution results.
949    ///
950    /// Defaults to an operating system specific cache directory for the user.
951    #[serde(default = "get_sentinel_cache_dir")]
952    pub cache_dir: String,
953    /// The call caching mode to use for tasks.
954    pub cache: CallCachingMode,
955    /// The content digest mode to use.
956    ///
957    /// Used as part of call caching.
958    pub digests: ContentDigestMode,
959    /// Keys of task requirements to exclude from call cache checking.
960    ///
961    /// When specified, these requirement keys will be ignored when
962    /// calculating cache keys and validating cache entries.
963    ///
964    /// This can be useful for requirements that may vary between runs
965    /// but should not invalidate the cache (e.g., dynamic resource
966    /// allocation).
967    #[serde(default)]
968    pub excluded_cache_requirements: HashSet<String>,
969    /// Keys of task hints to exclude from call cache checking.
970    ///
971    /// When specified, these hint keys will be ignored when
972    /// calculating cache keys and validating cache entries.
973    ///
974    /// This can be useful for hints that may vary between runs
975    /// but should not invalidate the cache.
976    #[serde(default)]
977    pub excluded_cache_hints: HashSet<String>,
978    /// Keys of task inputs to exclude from call cache checking.
979    ///
980    /// When specified, these input keys will be ignored when
981    /// calculating cache keys and validating cache entries.
982    ///
983    /// This can be useful for inputs that may vary between runs
984    /// but should not affect the task's output.
985    #[serde(default)]
986    pub excluded_cache_inputs: HashSet<String>,
987}
988
989nullable_config_type!(
990    Retries,
991    u64,
992    "default",
993    value,
994    value <= MAX_RETRIES,
995    "a number less than or equal to 100",
996    None
997);
998
999impl Default for TaskConfig {
1000    fn default() -> Self {
1001        Self {
1002            retries: Default::default(),
1003            container: get_default_container(),
1004            shell: get_default_shell(),
1005            cpu_limit_behavior: Default::default(),
1006            memory_limit_behavior: Default::default(),
1007            cache_dir: get_sentinel_cache_dir(),
1008            cache: Default::default(),
1009            digests: Default::default(),
1010            excluded_cache_requirements: Default::default(),
1011            excluded_cache_hints: Default::default(),
1012            excluded_cache_inputs: Default::default(),
1013        }
1014    }
1015}
1016
1017impl TaskConfig {
1018    /// Validates the task evaluation configuration.
1019    pub fn validate(&self) -> Result<()> {
1020        if self.retries.inner().cloned().unwrap_or(0) > MAX_RETRIES {
1021            bail!("configuration value `task.retries` cannot exceed {MAX_RETRIES}");
1022        }
1023
1024        Ok(())
1025    }
1026
1027    /// Get the configured cache dir if it is set.
1028    pub fn cache_dir(&self) -> Option<PathBuf> {
1029        if self.cache_dir == CACHE_DIR_SENTINEL {
1030            None
1031        } else {
1032            Some(PathBuf::from(&self.cache_dir))
1033        }
1034    }
1035}
1036
1037/// The behavior when a task resource requirement, such as `cpu` or `memory`,
1038/// cannot be met.
1039#[derive(Debug, Default, Clone, Serialize, Deserialize)]
1040#[serde(rename_all = "snake_case", deny_unknown_fields)]
1041pub enum TaskResourceLimitBehavior {
1042    /// Try executing a task with the maximum amount of the resource available
1043    /// when the task's corresponding requirement cannot be met.
1044    TryWithMax,
1045    /// Do not execute a task if its corresponding requirement cannot be met.
1046    ///
1047    /// This is the default behavior.
1048    #[default]
1049    Deny,
1050}
1051
1052/// Represents supported task execution backends.
1053#[derive(Debug, Clone, Serialize, Deserialize)]
1054#[serde(rename_all = "snake_case", tag = "type")]
1055pub enum BackendConfig {
1056    /// Use the local task execution backend.
1057    Local(LocalBackendConfig),
1058    /// Use the Docker task execution backend.
1059    Docker(DockerBackendConfig),
1060    /// Use the TES task execution backend.
1061    Tes(TesBackendConfig),
1062    /// Use the experimental LSF + Apptainer task execution backend.
1063    ///
1064    /// Requires enabling experimental features.
1065    LsfApptainer(LsfApptainerBackendConfig),
1066    /// Use the experimental Slurm + Apptainer task execution backend.
1067    ///
1068    /// Requires enabling experimental features.
1069    SlurmApptainer(SlurmApptainerBackendConfig),
1070}
1071
1072impl Default for BackendConfig {
1073    fn default() -> Self {
1074        Self::Docker(Default::default())
1075    }
1076}
1077
1078impl BackendConfig {
1079    /// Validates the backend configuration.
1080    pub async fn validate(&self, engine_config: &Config) -> Result<()> {
1081        match self {
1082            Self::Local(config) => config.validate(),
1083            Self::Docker(config) => config.validate(),
1084            Self::Tes(config) => config.validate(),
1085            Self::LsfApptainer(config) => config.validate(engine_config).await,
1086            Self::SlurmApptainer(config) => config.validate(engine_config).await,
1087        }
1088    }
1089
1090    /// Converts the backend configuration into a local backend configuration
1091    ///
1092    /// Returns `None` if the backend configuration is not local.
1093    pub fn as_local(&self) -> Option<&LocalBackendConfig> {
1094        match self {
1095            Self::Local(config) => Some(config),
1096            _ => None,
1097        }
1098    }
1099
1100    /// Converts the backend configuration into a Docker backend configuration
1101    ///
1102    /// Returns `None` if the backend configuration is not Docker.
1103    pub fn as_docker(&self) -> Option<&DockerBackendConfig> {
1104        match self {
1105            Self::Docker(config) => Some(config),
1106            _ => None,
1107        }
1108    }
1109
1110    /// Converts the backend configuration into a TES backend configuration
1111    ///
1112    /// Returns `None` if the backend configuration is not TES.
1113    pub fn as_tes(&self) -> Option<&TesBackendConfig> {
1114        match self {
1115            Self::Tes(config) => Some(config),
1116            _ => None,
1117        }
1118    }
1119
1120    /// Converts the backend configuration into a LSF Apptainer backend
1121    /// configuration
1122    ///
1123    /// Returns `None` if the backend configuration is not LSF Apptainer.
1124    pub fn as_lsf_apptainer(&self) -> Option<&LsfApptainerBackendConfig> {
1125        match self {
1126            Self::LsfApptainer(config) => Some(config),
1127            _ => None,
1128        }
1129    }
1130
1131    /// Converts the backend configuration into a Slurm Apptainer backend
1132    /// configuration
1133    ///
1134    /// Returns `None` if the backend configuration is not Slurm Apptainer.
1135    pub fn as_slurm_apptainer(&self) -> Option<&SlurmApptainerBackendConfig> {
1136        match self {
1137            Self::SlurmApptainer(config) => Some(config),
1138            _ => None,
1139        }
1140    }
1141
1142    /// Redacts the secrets contained in the backend configuration.
1143    pub fn redact(&mut self) {
1144        match self {
1145            Self::Local(_) | Self::Docker(_) | Self::LsfApptainer(_) | Self::SlurmApptainer(_) => {}
1146            Self::Tes(config) => config.redact(),
1147        }
1148    }
1149
1150    /// Unredacts the secrets contained in the backend configuration.
1151    pub fn unredact(&mut self) {
1152        match self {
1153            Self::Local(_) | Self::Docker(_) | Self::LsfApptainer(_) | Self::SlurmApptainer(_) => {}
1154            Self::Tes(config) => config.unredact(),
1155        }
1156    }
1157}
1158
1159/// Represents configuration for the local task execution backend.
1160///
1161/// <div class="warning">
1162/// Warning: the local task execution backend spawns processes on the host
1163/// directly without the use of a container; only use this backend on trusted
1164/// WDL. </div>
1165#[derive(Debug, Default, Clone, Serialize, Deserialize)]
1166#[serde(rename_all = "snake_case", deny_unknown_fields)]
1167pub struct LocalBackendConfig {
1168    /// Set the number of CPUs available for task execution.
1169    ///
1170    /// Defaults to the number of logical CPUs for the host.
1171    ///
1172    /// The value cannot be zero or exceed the host's number of CPUs.
1173    #[serde(default, skip_serializing_if = "Option::is_none")]
1174    pub cpu: Option<u64>,
1175
1176    /// Set the total amount of memory for task execution as a unit string (e.g.
1177    /// `2 GiB`).
1178    ///
1179    /// Defaults to the total amount of memory for the host.
1180    ///
1181    /// The value cannot be zero or exceed the host's total amount of memory.
1182    #[serde(default, skip_serializing_if = "Option::is_none")]
1183    pub memory: Option<String>,
1184}
1185
1186impl LocalBackendConfig {
1187    /// Validates the local task execution backend configuration.
1188    pub fn validate(&self) -> Result<()> {
1189        if let Some(cpu) = self.cpu {
1190            if cpu == 0 {
1191                bail!("local backend configuration value `cpu` cannot be zero");
1192            }
1193
1194            let total = SYSTEM.cpus().len() as u64;
1195            if cpu > total {
1196                bail!(
1197                    "local backend configuration value `cpu` cannot exceed the virtual CPUs \
1198                     available to the host ({total})"
1199                );
1200            }
1201        }
1202
1203        if let Some(memory) = &self.memory {
1204            let memory = convert_unit_string(memory).with_context(|| {
1205                format!("local backend configuration value `memory` has invalid value `{memory}`")
1206            })?;
1207
1208            if memory == 0 {
1209                bail!("local backend configuration value `memory` cannot be zero");
1210            }
1211
1212            let total = SYSTEM.total_memory();
1213            if memory > total {
1214                bail!(
1215                    "local backend configuration value `memory` cannot exceed the total memory of \
1216                     the host ({total} bytes)"
1217                );
1218            }
1219        }
1220
1221        Ok(())
1222    }
1223}
1224
1225/// Gets the default value for the docker `cleanup` field.
1226const fn cleanup_default() -> bool {
1227    true
1228}
1229
1230/// Represents configuration for the Docker backend.
1231#[derive(Debug, Clone, Serialize, Deserialize)]
1232#[serde(rename_all = "snake_case", deny_unknown_fields)]
1233pub struct DockerBackendConfig {
1234    /// Whether or not to remove a task's container after the task completes.
1235    ///
1236    /// Defaults to `true`.
1237    #[serde(default = "cleanup_default")]
1238    pub cleanup: bool,
1239}
1240
1241impl DockerBackendConfig {
1242    /// Validates the Docker backend configuration.
1243    pub fn validate(&self) -> Result<()> {
1244        Ok(())
1245    }
1246}
1247
1248impl Default for DockerBackendConfig {
1249    fn default() -> Self {
1250        Self { cleanup: true }
1251    }
1252}
1253
1254/// Represents HTTP basic authentication configuration.
1255#[derive(Debug, Default, Clone, Serialize, Deserialize)]
1256#[serde(rename_all = "snake_case", deny_unknown_fields)]
1257pub struct BasicAuthConfig {
1258    /// The HTTP basic authentication username.
1259    #[serde(default)]
1260    pub username: String,
1261    /// The HTTP basic authentication password.
1262    #[serde(default)]
1263    pub password: SecretString,
1264}
1265
1266impl BasicAuthConfig {
1267    /// Validates the HTTP basic auth configuration.
1268    pub fn validate(&self) -> Result<()> {
1269        Ok(())
1270    }
1271
1272    /// Redacts the secrets contained in the HTTP basic auth configuration.
1273    pub fn redact(&mut self) {
1274        self.password.redact();
1275    }
1276
1277    /// Unredacts the secrets contained in the HTTP basic auth configuration.
1278    pub fn unredact(&mut self) {
1279        self.password.unredact();
1280    }
1281}
1282
1283/// Represents HTTP bearer token authentication configuration.
1284#[derive(Debug, Default, Clone, Serialize, Deserialize)]
1285#[serde(rename_all = "snake_case", deny_unknown_fields)]
1286pub struct BearerAuthConfig {
1287    /// The HTTP bearer authentication token.
1288    #[serde(default)]
1289    pub token: SecretString,
1290}
1291
1292impl BearerAuthConfig {
1293    /// Validates the HTTP bearer auth configuration.
1294    pub fn validate(&self) -> Result<()> {
1295        Ok(())
1296    }
1297
1298    /// Redacts the secrets contained in the HTTP bearer auth configuration.
1299    pub fn redact(&mut self) {
1300        self.token.redact();
1301    }
1302
1303    /// Unredacts the secrets contained in the HTTP bearer auth configuration.
1304    pub fn unredact(&mut self) {
1305        self.token.unredact();
1306    }
1307}
1308
1309/// Represents the kind of authentication for a TES backend.
1310#[derive(Debug, Clone, Serialize, Deserialize)]
1311#[serde(rename_all = "snake_case", tag = "type")]
1312pub enum TesBackendAuthConfig {
1313    /// Use basic authentication for the TES backend.
1314    Basic(BasicAuthConfig),
1315    /// Use bearer token authentication for the TES backend.
1316    Bearer(BearerAuthConfig),
1317}
1318
1319impl TesBackendAuthConfig {
1320    /// Validates the TES backend authentication configuration.
1321    pub fn validate(&self) -> Result<()> {
1322        match self {
1323            Self::Basic(config) => config.validate(),
1324            Self::Bearer(config) => config.validate(),
1325        }
1326    }
1327
1328    /// Redacts the secrets contained in the TES backend authentication
1329    /// configuration.
1330    pub fn redact(&mut self) {
1331        match self {
1332            Self::Basic(auth) => auth.redact(),
1333            Self::Bearer(auth) => auth.redact(),
1334        }
1335    }
1336
1337    /// Unredacts the secrets contained in the TES backend authentication
1338    /// configuration.
1339    pub fn unredact(&mut self) {
1340        match self {
1341            Self::Basic(auth) => auth.unredact(),
1342            Self::Bearer(auth) => auth.unredact(),
1343        }
1344    }
1345}
1346
1347/// Represents configuration for the Task Execution Service (TES) backend.
1348#[derive(Debug, Default, Clone, Serialize, Deserialize)]
1349#[serde(rename_all = "snake_case", deny_unknown_fields)]
1350pub struct TesBackendConfig {
1351    /// The URL of the Task Execution Service.
1352    #[serde(default, skip_serializing_if = "Option::is_none")]
1353    pub url: Option<Url>,
1354
1355    /// The authentication configuration for the TES backend.
1356    #[serde(default, skip_serializing_if = "Option::is_none")]
1357    pub auth: Option<TesBackendAuthConfig>,
1358
1359    /// The root cloud storage URL for storing inputs.
1360    #[serde(default, skip_serializing_if = "Option::is_none")]
1361    pub inputs: Option<Url>,
1362
1363    /// The root cloud storage URL for storing outputs.
1364    #[serde(default, skip_serializing_if = "Option::is_none")]
1365    pub outputs: Option<Url>,
1366
1367    /// The polling interval, in seconds, for checking task status.
1368    ///
1369    /// Defaults to 1 second.
1370    #[serde(default, skip_serializing_if = "Option::is_none")]
1371    pub interval: Option<u64>,
1372
1373    /// The number of retries after encountering an error communicating with the
1374    /// TES server.
1375    ///
1376    /// Defaults to no retries.
1377    pub retries: Option<u32>,
1378
1379    /// The maximum number of concurrent requests the backend will send to the
1380    /// TES server.
1381    ///
1382    /// Defaults to 10 concurrent requests.
1383    #[serde(default, skip_serializing_if = "Option::is_none")]
1384    pub max_concurrency: Option<u32>,
1385
1386    /// Whether or not the TES server URL may use an insecure protocol like
1387    /// HTTP.
1388    #[serde(default)]
1389    pub insecure: bool,
1390}
1391
1392impl TesBackendConfig {
1393    /// Validates the TES backend configuration.
1394    pub fn validate(&self) -> Result<()> {
1395        match &self.url {
1396            Some(url) => {
1397                if !self.insecure && url.scheme() != "https" {
1398                    bail!(
1399                        "TES backend configuration value `url` has invalid value `{url}`: URL \
1400                         must use a HTTPS scheme"
1401                    );
1402                }
1403            }
1404            None => bail!("TES backend configuration value `url` is required"),
1405        }
1406
1407        if let Some(auth) = &self.auth {
1408            auth.validate()?;
1409        }
1410
1411        if let Some(max_concurrency) = self.max_concurrency
1412            && max_concurrency == 0
1413        {
1414            bail!("TES backend configuration value `max_concurrency` cannot be zero");
1415        }
1416
1417        match &self.inputs {
1418            Some(url) => {
1419                if !is_supported_url(url.as_str()) {
1420                    bail!(
1421                        "TES backend storage configuration value `inputs` has invalid value \
1422                         `{url}`: URL scheme is not supported"
1423                    );
1424                }
1425
1426                if !url.path().ends_with('/') {
1427                    bail!(
1428                        "TES backend storage configuration value `inputs` has invalid value \
1429                         `{url}`: URL path must end with a slash"
1430                    );
1431                }
1432            }
1433            None => bail!("TES backend configuration value `inputs` is required"),
1434        }
1435
1436        match &self.outputs {
1437            Some(url) => {
1438                if !is_supported_url(url.as_str()) {
1439                    bail!(
1440                        "TES backend storage configuration value `outputs` has invalid value \
1441                         `{url}`: URL scheme is not supported"
1442                    );
1443                }
1444
1445                if !url.path().ends_with('/') {
1446                    bail!(
1447                        "TES backend storage configuration value `outputs` has invalid value \
1448                         `{url}`: URL path must end with a slash"
1449                    );
1450                }
1451            }
1452            None => bail!("TES backend storage configuration value `outputs` is required"),
1453        }
1454
1455        Ok(())
1456    }
1457
1458    /// Redacts the secrets contained in the TES backend configuration.
1459    pub fn redact(&mut self) {
1460        if let Some(auth) = &mut self.auth {
1461            auth.redact();
1462        }
1463    }
1464
1465    /// Unredacts the secrets contained in the TES backend configuration.
1466    pub fn unredact(&mut self) {
1467        if let Some(auth) = &mut self.auth {
1468            auth.unredact();
1469        }
1470    }
1471}
1472
1473/// Configuration for the Apptainer container runtime.
1474#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
1475#[serde(rename_all = "snake_case", deny_unknown_fields)]
1476pub struct ApptainerConfig {
1477    /// Path to the Apptainer (or Singularity) executable.
1478    ///
1479    /// Defaults to `"apptainer"`. Set to `"singularity"` or a full path
1480    /// (e.g., `/usr/local/bin/apptainer`) if the executable is not on `PATH`
1481    /// or if using Singularity instead.
1482    #[serde(default = "default_apptainer_executable")]
1483    pub executable: String,
1484
1485    /// Path to a shared directory for caching pulled `.sif` images.
1486    ///
1487    /// When set, pulled images are stored in this directory and shared
1488    /// across runs. When unset, images are stored in a per-run directory
1489    /// that is not shared.
1490    #[serde(default, skip_serializing_if = "Option::is_none")]
1491    pub image_cache_dir: Option<PathBuf>,
1492
1493    /// Additional command-line arguments to pass to `apptainer exec` when
1494    /// executing tasks.
1495    pub extra_apptainer_exec_args: Option<Vec<String>>,
1496}
1497
1498/// The default Apptainer executable name.
1499const DEFAULT_APPTAINER_EXECUTABLE: &str = "apptainer";
1500
1501/// Returns the default Apptainer executable name for serde deserialization.
1502fn default_apptainer_executable() -> String {
1503    String::from(DEFAULT_APPTAINER_EXECUTABLE)
1504}
1505
1506impl Default for ApptainerConfig {
1507    fn default() -> Self {
1508        Self {
1509            executable: default_apptainer_executable(),
1510            image_cache_dir: None,
1511            extra_apptainer_exec_args: None,
1512        }
1513    }
1514}
1515
1516impl ApptainerConfig {
1517    /// Validate that Apptainer is appropriately configured.
1518    pub async fn validate(&self) -> Result<(), anyhow::Error> {
1519        Ok(())
1520    }
1521}
1522
1523/// Configuration for an LSF queue.
1524///
1525/// Each queue can optionally have per-task CPU and memory limits set so that
1526/// tasks which are too large to be scheduled on that queue will fail
1527/// immediately instead of pending indefinitely. In the future, these limits may
1528/// be populated or validated by live information from the cluster, but
1529/// for now they must be manually based on the user's understanding of the
1530/// cluster configuration.
1531#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
1532#[serde(rename_all = "snake_case", deny_unknown_fields)]
1533pub struct LsfQueueConfig {
1534    /// The name of the queue; this is the string passed to `bsub -q
1535    /// <queue_name>`.
1536    pub name: String,
1537    /// The maximum number of CPUs this queue can provision for a single task.
1538    pub max_cpu_per_task: Option<u64>,
1539    /// The maximum memory this queue can provision for a single task.
1540    pub max_memory_per_task: Option<ByteSize>,
1541}
1542
1543impl LsfQueueConfig {
1544    /// Validate that this LSF queue exists according to the local `bqueues`.
1545    pub async fn validate(&self, name: &str) -> Result<(), anyhow::Error> {
1546        let queue = &self.name;
1547        ensure!(!queue.is_empty(), "{name}_lsf_queue name cannot be empty");
1548        if let Some(max_cpu_per_task) = self.max_cpu_per_task {
1549            ensure!(
1550                max_cpu_per_task > 0,
1551                "{name}_lsf_queue `{queue}` must allow at least 1 CPU to be provisioned"
1552            );
1553        }
1554        if let Some(max_memory_per_task) = self.max_memory_per_task {
1555            ensure!(
1556                max_memory_per_task.as_u64() > 0,
1557                "{name}_lsf_queue `{queue}` must allow at least some memory to be provisioned"
1558            );
1559        }
1560        match tokio::time::timeout(
1561            // 10 seconds is rather arbitrary; `bqueues` ordinarily returns extremely quickly, but
1562            // we don't want things to run away on a misconfigured system
1563            std::time::Duration::from_secs(10),
1564            Command::new("bqueues").arg(queue).output(),
1565        )
1566        .await
1567        {
1568            Ok(output) => {
1569                let output = output.context("validating LSF queue")?;
1570                if !output.status.success() {
1571                    let stdout = String::from_utf8_lossy(&output.stdout);
1572                    let stderr = String::from_utf8_lossy(&output.stderr);
1573                    error!(%stdout, %stderr, %queue, "failed to validate {name}_lsf_queue");
1574                    Err(anyhow!("failed to validate {name}_lsf_queue `{queue}`"))
1575                } else {
1576                    Ok(())
1577                }
1578            }
1579            Err(_) => Err(anyhow!(
1580                "timed out trying to validate {name}_lsf_queue `{queue}`"
1581            )),
1582        }
1583    }
1584}
1585
1586/// Configuration for the LSF + Apptainer backend.
1587// TODO ACF 2025-09-23: add a Apptainer/Singularity mode config that switches around executable
1588// name, env var names, etc.
1589#[derive(Debug, Default, Clone, serde::Deserialize, serde::Serialize)]
1590#[serde(rename_all = "snake_case", deny_unknown_fields)]
1591pub struct LsfApptainerBackendConfig {
1592    /// The task monitor polling interval, in seconds.
1593    ///
1594    /// Defaults to 30 seconds.
1595    #[serde(default, skip_serializing_if = "Option::is_none")]
1596    pub interval: Option<u64>,
1597    /// The maximum number of concurrent LSF operations the backend will
1598    /// perform.
1599    ///
1600    /// This controls the maximum concurrent number of `bsub` processes the
1601    /// backend will spawn to queue tasks.
1602    ///
1603    /// Defaults to 10 concurrent operations.
1604    #[serde(default, skip_serializing_if = "Option::is_none")]
1605    pub max_concurrency: Option<u32>,
1606    /// Which queue, if any, to specify when submitting normal jobs to LSF.
1607    ///
1608    /// This may be superseded by
1609    /// [`short_task_lsf_queue`][Self::short_task_lsf_queue],
1610    /// [`gpu_lsf_queue`][Self::gpu_lsf_queue], or
1611    /// [`fpga_lsf_queue`][Self::fpga_lsf_queue] for corresponding tasks.
1612    pub default_lsf_queue: Option<LsfQueueConfig>,
1613    /// Which queue, if any, to specify when submitting [short
1614    /// tasks](https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#short_task) to LSF.
1615    ///
1616    /// This may be superseded by [`gpu_lsf_queue`][Self::gpu_lsf_queue] or
1617    /// [`fpga_lsf_queue`][Self::fpga_lsf_queue] for tasks which require
1618    /// specialized hardware.
1619    pub short_task_lsf_queue: Option<LsfQueueConfig>,
1620    /// Which queue, if any, to specify when submitting [tasks which require a
1621    /// GPU](https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#hardware-accelerators-gpu-and--fpga)
1622    /// to LSF.
1623    pub gpu_lsf_queue: Option<LsfQueueConfig>,
1624    /// Which queue, if any, to specify when submitting [tasks which require an
1625    /// FPGA](https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#hardware-accelerators-gpu-and--fpga)
1626    /// to LSF.
1627    pub fpga_lsf_queue: Option<LsfQueueConfig>,
1628    /// Additional command-line arguments to pass to `bsub` when submitting jobs
1629    /// to LSF.
1630    pub extra_bsub_args: Option<Vec<String>>,
1631    /// Prefix to add to every LSF job name before the task identifier. This is
1632    /// truncated as needed to satisfy the byte-oriented LSF job name limit.
1633    #[serde(default, skip_serializing_if = "Option::is_none")]
1634    pub job_name_prefix: Option<String>,
1635    /// The configuration of Apptainer, which is used as the container runtime
1636    /// on the compute nodes where LSF dispatches tasks.
1637    ///
1638    /// Note that this will likely be replaced by an abstraction over multiple
1639    /// container execution runtimes in the future, rather than being
1640    /// hardcoded to Apptainer.
1641    #[serde(default)]
1642    // TODO ACF 2025-10-16: temporarily flatten this into the overall config so that it doesn't
1643    // break existing serialized configs. We'll save breaking the config file format for when we
1644    // actually have meaningful composition of in-place runtimes.
1645    #[serde(flatten)]
1646    pub apptainer_config: ApptainerConfig,
1647}
1648
1649impl LsfApptainerBackendConfig {
1650    /// Validate that the backend is appropriately configured.
1651    pub async fn validate(&self, engine_config: &Config) -> Result<(), anyhow::Error> {
1652        if cfg!(not(unix)) {
1653            bail!("LSF + Apptainer backend is not supported on non-unix platforms");
1654        }
1655
1656        if !engine_config.experimental_features_enabled {
1657            bail!("LSF + Apptainer backend requires enabling experimental features");
1658        }
1659
1660        // Do what we can to validate options that are dependent on the dynamic
1661        // environment. These are a bit fraught, particularly if the behavior of
1662        // the external tools changes based on where a job gets dispatched, but
1663        // querying from the perspective of the current node allows
1664        // us to get better error messages in circumstances typical to a cluster.
1665        if let Some(queue) = &self.default_lsf_queue {
1666            queue.validate("default").await?;
1667        }
1668
1669        if let Some(queue) = &self.short_task_lsf_queue {
1670            queue.validate("short_task").await?;
1671        }
1672
1673        if let Some(queue) = &self.gpu_lsf_queue {
1674            queue.validate("gpu").await?;
1675        }
1676
1677        if let Some(queue) = &self.fpga_lsf_queue {
1678            queue.validate("fpga").await?;
1679        }
1680
1681        if let Some(prefix) = &self.job_name_prefix
1682            && prefix.len() > MAX_LSF_JOB_NAME_PREFIX
1683        {
1684            bail!(
1685                "LSF job name prefix `{prefix}` exceeds the maximum {MAX_LSF_JOB_NAME_PREFIX} \
1686                 bytes"
1687            );
1688        }
1689
1690        self.apptainer_config.validate().await?;
1691
1692        Ok(())
1693    }
1694
1695    /// Get the appropriate LSF queue for a task under this configuration.
1696    ///
1697    /// Specialized hardware requirements are prioritized over other
1698    /// characteristics, with FPGA taking precedence over GPU.
1699    pub(crate) fn lsf_queue_for_task(
1700        &self,
1701        requirements: &HashMap<String, Value>,
1702        hints: &HashMap<String, Value>,
1703    ) -> Option<&LsfQueueConfig> {
1704        // Specialized hardware gets priority.
1705        if let Some(queue) = self.fpga_lsf_queue.as_ref()
1706            && let Some(true) = requirements
1707                .get(wdl_ast::v1::TASK_REQUIREMENT_FPGA)
1708                .and_then(Value::as_boolean)
1709        {
1710            return Some(queue);
1711        }
1712
1713        if let Some(queue) = self.gpu_lsf_queue.as_ref()
1714            && let Some(true) = requirements
1715                .get(wdl_ast::v1::TASK_REQUIREMENT_GPU)
1716                .and_then(Value::as_boolean)
1717        {
1718            return Some(queue);
1719        }
1720
1721        // Then short tasks.
1722        if let Some(queue) = self.short_task_lsf_queue.as_ref()
1723            && let Some(true) = hints
1724                .get(wdl_ast::v1::TASK_HINT_SHORT_TASK)
1725                .and_then(Value::as_boolean)
1726        {
1727            return Some(queue);
1728        }
1729
1730        // Finally the default queue. If this is `None`, `bsub` gets run without a queue
1731        // argument and the cluster's default is used.
1732        self.default_lsf_queue.as_ref()
1733    }
1734}
1735
1736/// Configuration for a Slurm partition.
1737///
1738/// Each partition can optionally have per-task CPU and memory limits set so
1739/// that tasks which are too large to be scheduled on that partition will fail
1740/// immediately instead of pending indefinitely. In the future, these limits may
1741/// be populated or validated by live information from the cluster, but
1742/// for now they must be manually based on the user's understanding of the
1743/// cluster configuration.
1744#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
1745#[serde(rename_all = "snake_case", deny_unknown_fields)]
1746pub struct SlurmPartitionConfig {
1747    /// The name of the partition; this is the string passed to `sbatch
1748    /// --partition=<partition_name>`.
1749    pub name: String,
1750    /// The maximum number of CPUs this partition can provision for a single
1751    /// task.
1752    pub max_cpu_per_task: Option<u64>,
1753    /// The maximum memory this partition can provision for a single task.
1754    pub max_memory_per_task: Option<ByteSize>,
1755}
1756
1757impl SlurmPartitionConfig {
1758    /// Validate that this Slurm partition exists according to the local
1759    /// `sinfo`.
1760    pub async fn validate(&self, name: &str) -> Result<(), anyhow::Error> {
1761        let partition = &self.name;
1762        ensure!(
1763            !partition.is_empty(),
1764            "{name}_slurm_partition name cannot be empty"
1765        );
1766        if let Some(max_cpu_per_task) = self.max_cpu_per_task {
1767            ensure!(
1768                max_cpu_per_task > 0,
1769                "{name}_slurm_partition `{partition}` must allow at least 1 CPU to be provisioned"
1770            );
1771        }
1772        if let Some(max_memory_per_task) = self.max_memory_per_task {
1773            ensure!(
1774                max_memory_per_task.as_u64() > 0,
1775                "{name}_slurm_partition `{partition}` must allow at least some memory to be \
1776                 provisioned"
1777            );
1778        }
1779        match tokio::time::timeout(
1780            // 10 seconds is rather arbitrary; `scontrol` ordinarily returns extremely quickly, but
1781            // we don't want things to run away on a misconfigured system
1782            std::time::Duration::from_secs(10),
1783            Command::new("scontrol")
1784                .arg("show")
1785                .arg("partition")
1786                .arg(partition)
1787                .output(),
1788        )
1789        .await
1790        {
1791            Ok(output) => {
1792                let output = output.context("validating Slurm partition")?;
1793                if !output.status.success() {
1794                    let stdout = String::from_utf8_lossy(&output.stdout);
1795                    let stderr = String::from_utf8_lossy(&output.stderr);
1796                    error!(%stdout, %stderr, %partition, "failed to validate {name}_slurm_partition");
1797                    Err(anyhow!(
1798                        "failed to validate {name}_slurm_partition `{partition}`"
1799                    ))
1800                } else {
1801                    Ok(())
1802                }
1803            }
1804            Err(_) => Err(anyhow!(
1805                "timed out trying to validate {name}_slurm_partition `{partition}`"
1806            )),
1807        }
1808    }
1809}
1810
1811/// Configuration for the Slurm + Apptainer backend.
1812// TODO ACF 2025-09-23: add a Apptainer/Singularity mode config that switches around executable
1813// name, env var names, etc.
1814#[derive(Debug, Default, Clone, serde::Deserialize, serde::Serialize)]
1815#[serde(rename_all = "snake_case", deny_unknown_fields)]
1816pub struct SlurmApptainerBackendConfig {
1817    /// The task monitor polling interval, in seconds.
1818    ///
1819    /// Defaults to 30 seconds.
1820    #[serde(default, skip_serializing_if = "Option::is_none")]
1821    pub interval: Option<u64>,
1822    /// The maximum number of concurrent Slurm operations the backend will
1823    /// perform.
1824    ///
1825    /// This controls the maximum concurrent number of `sbatch` processes the
1826    /// backend will spawn to queue tasks.
1827    ///
1828    /// Defaults to 10 concurrent operations.
1829    #[serde(default, skip_serializing_if = "Option::is_none")]
1830    pub max_concurrency: Option<u32>,
1831    /// Which partition, if any, to specify when submitting normal jobs to
1832    /// Slurm.
1833    ///
1834    /// This may be superseded by
1835    /// [`short_task_slurm_partition`][Self::short_task_slurm_partition],
1836    /// [`gpu_slurm_partition`][Self::gpu_slurm_partition], or
1837    /// [`fpga_slurm_partition`][Self::fpga_slurm_partition] for corresponding
1838    /// tasks.
1839    pub default_slurm_partition: Option<SlurmPartitionConfig>,
1840    /// Which partition, if any, to specify when submitting [short
1841    /// tasks](https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#short_task) to Slurm.
1842    ///
1843    /// This may be superseded by
1844    /// [`gpu_slurm_partition`][Self::gpu_slurm_partition] or
1845    /// [`fpga_slurm_partition`][Self::fpga_slurm_partition] for tasks which
1846    /// require specialized hardware.
1847    pub short_task_slurm_partition: Option<SlurmPartitionConfig>,
1848    /// Which partition, if any, to specify when submitting [tasks which require
1849    /// a GPU](https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#hardware-accelerators-gpu-and--fpga)
1850    /// to Slurm.
1851    pub gpu_slurm_partition: Option<SlurmPartitionConfig>,
1852    /// Which partition, if any, to specify when submitting [tasks which require
1853    /// an FPGA](https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#hardware-accelerators-gpu-and--fpga)
1854    /// to Slurm.
1855    pub fpga_slurm_partition: Option<SlurmPartitionConfig>,
1856    /// Additional command-line arguments to pass to `sbatch` when submitting
1857    /// jobs to Slurm.
1858    pub extra_sbatch_args: Option<Vec<String>>,
1859    /// Prefix to add to every Slurm job name before the task identifier.
1860    #[serde(default, skip_serializing_if = "Option::is_none")]
1861    pub job_name_prefix: Option<String>,
1862    /// The configuration of Apptainer, which is used as the container runtime
1863    /// on the compute nodes where Slurm dispatches tasks.
1864    ///
1865    /// Note that this will likely be replaced by an abstraction over multiple
1866    /// container execution runtimes in the future, rather than being
1867    /// hardcoded to Apptainer.
1868    #[serde(default)]
1869    // TODO ACF 2025-10-16: temporarily flatten this into the overall config so that it doesn't
1870    // break existing serialized configs. We'll save breaking the config file format for when we
1871    // actually have meaningful composition of in-place runtimes.
1872    #[serde(flatten)]
1873    pub apptainer_config: ApptainerConfig,
1874}
1875
1876impl SlurmApptainerBackendConfig {
1877    /// Validate that the backend is appropriately configured.
1878    pub async fn validate(&self, engine_config: &Config) -> Result<(), anyhow::Error> {
1879        if cfg!(not(unix)) {
1880            bail!("Slurm + Apptainer backend is not supported on non-unix platforms");
1881        }
1882        if !engine_config.experimental_features_enabled {
1883            bail!("Slurm + Apptainer backend requires enabling experimental features");
1884        }
1885
1886        // Do what we can to validate options that are dependent on the dynamic
1887        // environment. These are a bit fraught, particularly if the behavior of
1888        // the external tools changes based on where a job gets dispatched, but
1889        // querying from the perspective of the current node allows
1890        // us to get better error messages in circumstances typical to a cluster.
1891        if let Some(partition) = &self.default_slurm_partition {
1892            partition.validate("default").await?;
1893        }
1894        if let Some(partition) = &self.short_task_slurm_partition {
1895            partition.validate("short_task").await?;
1896        }
1897        if let Some(partition) = &self.gpu_slurm_partition {
1898            partition.validate("gpu").await?;
1899        }
1900        if let Some(partition) = &self.fpga_slurm_partition {
1901            partition.validate("fpga").await?;
1902        }
1903
1904        self.apptainer_config.validate().await?;
1905
1906        Ok(())
1907    }
1908
1909    /// Get the appropriate Slurm partition for a task under this configuration.
1910    ///
1911    /// Specialized hardware requirements are prioritized over other
1912    /// characteristics, with FPGA taking precedence over GPU.
1913    pub(crate) fn slurm_partition_for_task(
1914        &self,
1915        requirements: &HashMap<String, Value>,
1916        hints: &HashMap<String, Value>,
1917    ) -> Option<&SlurmPartitionConfig> {
1918        // TODO ACF 2025-09-26: what's the relationship between this code and
1919        // `TaskExecutionConstraints`? Should this be there instead, or be pulling
1920        // values from that instead of directly from `requirements` and `hints`?
1921
1922        // Specialized hardware gets priority.
1923        if let Some(partition) = self.fpga_slurm_partition.as_ref()
1924            && let Some(true) = requirements
1925                .get(wdl_ast::v1::TASK_REQUIREMENT_FPGA)
1926                .and_then(Value::as_boolean)
1927        {
1928            return Some(partition);
1929        }
1930
1931        if let Some(partition) = self.gpu_slurm_partition.as_ref()
1932            && let Some(true) = requirements
1933                .get(wdl_ast::v1::TASK_REQUIREMENT_GPU)
1934                .and_then(Value::as_boolean)
1935        {
1936            return Some(partition);
1937        }
1938
1939        // Then short tasks.
1940        if let Some(partition) = self.short_task_slurm_partition.as_ref()
1941            && let Some(true) = hints
1942                .get(wdl_ast::v1::TASK_HINT_SHORT_TASK)
1943                .and_then(Value::as_boolean)
1944        {
1945            return Some(partition);
1946        }
1947
1948        // Finally the default partition. If this is `None`, `sbatch` gets run without a
1949        // partition argument and the cluster's default is used.
1950        self.default_slurm_partition.as_ref()
1951    }
1952}
1953
1954#[cfg(test)]
1955mod test {
1956    use pretty_assertions::assert_eq;
1957
1958    use super::*;
1959
1960    #[test]
1961    fn redacted_secret() {
1962        let mut secret: SecretString = "secret".into();
1963
1964        assert_eq!(
1965            serde_json::to_string(&secret).unwrap(),
1966            format!(r#""{REDACTED}""#)
1967        );
1968
1969        secret.unredact();
1970        assert_eq!(serde_json::to_string(&secret).unwrap(), r#""secret""#);
1971
1972        secret.redact();
1973        assert_eq!(
1974            serde_json::to_string(&secret).unwrap(),
1975            format!(r#""{REDACTED}""#)
1976        );
1977    }
1978
1979    #[test]
1980    fn redacted_config() {
1981        let config = Config {
1982            backends: [
1983                (
1984                    "first".to_string(),
1985                    BackendConfig::Tes(TesBackendConfig {
1986                        auth: Some(TesBackendAuthConfig::Basic(BasicAuthConfig {
1987                            username: "foo".into(),
1988                            password: "secret".into(),
1989                        })),
1990                        ..Default::default()
1991                    }),
1992                ),
1993                (
1994                    "second".to_string(),
1995                    BackendConfig::Tes(TesBackendConfig {
1996                        auth: Some(TesBackendAuthConfig::Bearer(BearerAuthConfig {
1997                            token: "secret".into(),
1998                        })),
1999                        ..Default::default()
2000                    }),
2001                ),
2002            ]
2003            .into(),
2004            storage: StorageConfig {
2005                azure: AzureStorageConfig {
2006                    auth: Some(AzureStorageAuthConfig {
2007                        account_name: "foo".into(),
2008                        access_key: "secret".into(),
2009                    }),
2010                },
2011                s3: S3StorageConfig {
2012                    auth: Some(S3StorageAuthConfig {
2013                        access_key_id: "foo".into(),
2014                        secret_access_key: "secret".into(),
2015                    }),
2016                    ..Default::default()
2017                },
2018                google: GoogleStorageConfig {
2019                    auth: Some(GoogleStorageAuthConfig {
2020                        access_key: "foo".into(),
2021                        secret: "secret".into(),
2022                    }),
2023                },
2024            },
2025            ..Default::default()
2026        };
2027
2028        let json = serde_json::to_string_pretty(&config).unwrap();
2029        assert!(json.contains("secret"), "`{json}` contains a secret");
2030    }
2031
2032    #[tokio::test]
2033    async fn test_config_validate() {
2034        // Test invalid task config
2035        let mut config = Config::default();
2036        config.task.retries = Retries(Some(255));
2037        assert_eq!(
2038            config.validate().await.unwrap_err().to_string(),
2039            "configuration value `task.retries` cannot exceed 100"
2040        );
2041
2042        // Test invalid scatter concurrency config
2043        let mut config = Config::default();
2044        config.workflow.scatter.concurrency = 0;
2045        assert_eq!(
2046            config.validate().await.unwrap_err().to_string(),
2047            "configuration value `workflow.scatter.concurrency` cannot be zero"
2048        );
2049
2050        // Test invalid backend name
2051        let config = Config {
2052            backend: "foo".into(),
2053            ..Default::default()
2054        };
2055        assert_eq!(
2056            config.validate().await.unwrap_err().to_string(),
2057            "a backend named `foo` is not present in the configuration"
2058        );
2059        let config = Config {
2060            backend: "bar".into(),
2061            backends: [("foo".to_string(), BackendConfig::default())].into(),
2062            ..Default::default()
2063        };
2064        assert_eq!(
2065            config.validate().await.unwrap_err().to_string(),
2066            "a backend named `bar` is not present in the configuration"
2067        );
2068
2069        // Test a singular backend
2070        let config = Config {
2071            backend: "foo".to_string(),
2072            backends: [("foo".to_string(), BackendConfig::default())].into(),
2073            ..Default::default()
2074        };
2075        config.validate().await.expect("config should validate");
2076
2077        // Test invalid local backend cpu config
2078        let config = Config {
2079            backends: [(
2080                "default".to_string(),
2081                BackendConfig::Local(LocalBackendConfig {
2082                    cpu: Some(0),
2083                    ..Default::default()
2084                }),
2085            )]
2086            .into(),
2087            ..Default::default()
2088        };
2089        assert_eq!(
2090            config.validate().await.unwrap_err().to_string(),
2091            "local backend configuration value `cpu` cannot be zero"
2092        );
2093        let config = Config {
2094            backends: [(
2095                "default".to_string(),
2096                BackendConfig::Local(LocalBackendConfig {
2097                    cpu: Some(10000000),
2098                    ..Default::default()
2099                }),
2100            )]
2101            .into(),
2102            ..Default::default()
2103        };
2104        assert!(
2105            config
2106                .validate()
2107                .await
2108                .unwrap_err()
2109                .to_string()
2110                .starts_with(
2111                    "local backend configuration value `cpu` cannot exceed the virtual CPUs \
2112                     available to the host"
2113                )
2114        );
2115
2116        // Test invalid local backend memory config
2117        let config = Config {
2118            backends: [(
2119                "default".to_string(),
2120                BackendConfig::Local(LocalBackendConfig {
2121                    memory: Some("0 GiB".to_string()),
2122                    ..Default::default()
2123                }),
2124            )]
2125            .into(),
2126            ..Default::default()
2127        };
2128        assert_eq!(
2129            config.validate().await.unwrap_err().to_string(),
2130            "local backend configuration value `memory` cannot be zero"
2131        );
2132        let config = Config {
2133            backends: [(
2134                "default".to_string(),
2135                BackendConfig::Local(LocalBackendConfig {
2136                    memory: Some("100 meows".to_string()),
2137                    ..Default::default()
2138                }),
2139            )]
2140            .into(),
2141            ..Default::default()
2142        };
2143        assert_eq!(
2144            config.validate().await.unwrap_err().to_string(),
2145            "local backend configuration value `memory` has invalid value `100 meows`"
2146        );
2147
2148        let config = Config {
2149            backends: [(
2150                "default".to_string(),
2151                BackendConfig::Local(LocalBackendConfig {
2152                    memory: Some("1000 TiB".to_string()),
2153                    ..Default::default()
2154                }),
2155            )]
2156            .into(),
2157            ..Default::default()
2158        };
2159        assert!(
2160            config
2161                .validate()
2162                .await
2163                .unwrap_err()
2164                .to_string()
2165                .starts_with(
2166                    "local backend configuration value `memory` cannot exceed the total memory of \
2167                     the host"
2168                )
2169        );
2170
2171        // Test missing TES URL
2172        let config = Config {
2173            backends: [(
2174                "default".to_string(),
2175                BackendConfig::Tes(Default::default()),
2176            )]
2177            .into(),
2178            ..Default::default()
2179        };
2180        assert_eq!(
2181            config.validate().await.unwrap_err().to_string(),
2182            "TES backend configuration value `url` is required"
2183        );
2184
2185        // Test TES invalid max concurrency
2186        let config = Config {
2187            backends: [(
2188                "default".to_string(),
2189                BackendConfig::Tes(TesBackendConfig {
2190                    url: Some("https://example.com".parse().unwrap()),
2191                    max_concurrency: Some(0),
2192                    ..Default::default()
2193                }),
2194            )]
2195            .into(),
2196            ..Default::default()
2197        };
2198        assert_eq!(
2199            config.validate().await.unwrap_err().to_string(),
2200            "TES backend configuration value `max_concurrency` cannot be zero"
2201        );
2202
2203        // Insecure TES URL
2204        let config = Config {
2205            backends: [(
2206                "default".to_string(),
2207                BackendConfig::Tes(TesBackendConfig {
2208                    url: Some("http://example.com".parse().unwrap()),
2209                    inputs: Some("http://example.com".parse().unwrap()),
2210                    outputs: Some("http://example.com".parse().unwrap()),
2211                    ..Default::default()
2212                }),
2213            )]
2214            .into(),
2215            ..Default::default()
2216        };
2217        assert_eq!(
2218            config.validate().await.unwrap_err().to_string(),
2219            "TES backend configuration value `url` has invalid value `http://example.com/`: URL \
2220             must use a HTTPS scheme"
2221        );
2222
2223        // Allow insecure URL
2224        let config = Config {
2225            backends: [(
2226                "default".to_string(),
2227                BackendConfig::Tes(TesBackendConfig {
2228                    url: Some("http://example.com".parse().unwrap()),
2229                    inputs: Some("http://example.com".parse().unwrap()),
2230                    outputs: Some("http://example.com".parse().unwrap()),
2231                    insecure: true,
2232                    ..Default::default()
2233                }),
2234            )]
2235            .into(),
2236            ..Default::default()
2237        };
2238        config
2239            .validate()
2240            .await
2241            .expect("configuration should validate");
2242
2243        // invalid Parallelism
2244        let mut config = Config::default();
2245        config.http.parallelism = Parallelism(Some(0));
2246        assert_eq!(
2247            config.validate().await.unwrap_err().to_string(),
2248            "configuration value `http.parallelism` cannot be zero"
2249        );
2250
2251        // valid Parallelism
2252        let mut config = Config::default();
2253        config.http.parallelism = Parallelism(Some(5));
2254        assert!(
2255            config.validate().await.is_ok(),
2256            "should pass for valid configuration"
2257        );
2258        let mut config = Config::default();
2259        config.http.parallelism = Parallelism(None);
2260        assert!(
2261            config.validate().await.is_ok(),
2262            "should pass for default (None)"
2263        );
2264
2265        // Test invalid LSF job name prefix
2266        #[cfg(unix)]
2267        {
2268            let job_name_prefix = "A".repeat(MAX_LSF_JOB_NAME_PREFIX * 2);
2269            let mut config = Config {
2270                experimental_features_enabled: true,
2271                ..Default::default()
2272            };
2273            config.backends.insert(
2274                "default".to_string(),
2275                BackendConfig::LsfApptainer(LsfApptainerBackendConfig {
2276                    job_name_prefix: Some(job_name_prefix.clone()),
2277                    ..Default::default()
2278                }),
2279            );
2280            assert_eq!(
2281                config.validate().await.unwrap_err().to_string(),
2282                format!("LSF job name prefix `{job_name_prefix}` exceeds the maximum 100 bytes")
2283            );
2284        }
2285    }
2286}