Skip to main content

wdl_engine/
config.rs

1//! Implementation of engine configuration.
2
3use std::borrow::Cow;
4use std::collections::HashMap;
5use std::collections::HashSet;
6use std::path::Path;
7use std::path::PathBuf;
8use std::sync::Arc;
9
10use anyhow::Context;
11use anyhow::Result;
12use anyhow::anyhow;
13use anyhow::bail;
14use anyhow::ensure;
15use bytesize::ByteSize;
16use indexmap::IndexMap;
17use secrecy::ExposeSecret;
18use serde::Deserialize;
19use serde::Serialize;
20use tokio::process::Command;
21use tracing::error;
22use tracing::warn;
23use url::Url;
24
25use crate::CancellationContext;
26use crate::Events;
27use crate::SYSTEM;
28use crate::Value;
29use crate::backend::TaskExecutionBackend;
30use crate::convert_unit_string;
31use crate::path::is_supported_url;
32
33/// The inclusive maximum number of task retries the engine supports.
34pub(crate) const MAX_RETRIES: u64 = 100;
35
36/// The default task shell.
37pub(crate) const DEFAULT_TASK_SHELL: &str = "bash";
38
39/// The default backend name.
40pub(crate) const DEFAULT_BACKEND_NAME: &str = "default";
41
42/// The maximum size, in bytes, for an LSF job name prefix.
43const MAX_LSF_JOB_NAME_PREFIX: usize = 100;
44
45/// The string that replaces redacted serialization fields.
46const REDACTED: &str = "<REDACTED>";
47
48/// Gets tne default root cache directory for the user.
49pub(crate) fn cache_dir() -> Result<PathBuf> {
50    /// The subdirectory within the user's cache directory for all caches
51    const CACHE_DIR_ROOT: &str = "sprocket";
52
53    Ok(dirs::cache_dir()
54        .context("failed to determine user cache directory")?
55        .join(CACHE_DIR_ROOT))
56}
57
58/// Represents a secret string that is, by default, redacted for serialization.
59///
60/// This type is a wrapper around [`secrecy::SecretString`].
61#[derive(Debug, Clone)]
62pub struct SecretString {
63    /// The inner secret string.
64    ///
65    /// This type is not serializable.
66    inner: secrecy::SecretString,
67    /// Whether or not the secret string is redacted for serialization.
68    ///
69    /// If `true` (the default), `<REDACTED>` is serialized for the string's
70    /// value.
71    ///
72    /// If `false`, the inner secret string is exposed for serialization.
73    redacted: bool,
74}
75
76impl SecretString {
77    /// Redacts the secret for serialization.
78    ///
79    /// By default, a [`SecretString`] is redacted; when redacted, the string is
80    /// replaced with `<REDACTED>` when serialized.
81    pub fn redact(&mut self) {
82        self.redacted = true;
83    }
84
85    /// Unredacts the secret for serialization.
86    pub fn unredact(&mut self) {
87        self.redacted = false;
88    }
89
90    /// Gets the inner [`secrecy::SecretString`].
91    pub fn inner(&self) -> &secrecy::SecretString {
92        &self.inner
93    }
94}
95
96impl From<String> for SecretString {
97    fn from(s: String) -> Self {
98        Self {
99            inner: s.into(),
100            redacted: true,
101        }
102    }
103}
104
105impl From<&str> for SecretString {
106    fn from(s: &str) -> Self {
107        Self {
108            inner: s.into(),
109            redacted: true,
110        }
111    }
112}
113
114impl Default for SecretString {
115    fn default() -> Self {
116        Self {
117            inner: Default::default(),
118            redacted: true,
119        }
120    }
121}
122
123impl serde::Serialize for SecretString {
124    fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
125    where
126        S: serde::Serializer,
127    {
128        use secrecy::ExposeSecret;
129
130        if self.redacted {
131            serializer.serialize_str(REDACTED)
132        } else {
133            serializer.serialize_str(self.inner.expose_secret())
134        }
135    }
136}
137
138impl<'de> serde::Deserialize<'de> for SecretString {
139    fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
140    where
141        D: serde::Deserializer<'de>,
142    {
143        let inner = secrecy::SecretString::deserialize(deserializer)?;
144        Ok(Self {
145            inner,
146            redacted: true,
147        })
148    }
149}
150
151/// Represents how an evaluation error or cancellation should be handled by the
152/// engine.
153#[derive(Debug, Default, Copy, Clone, Serialize, Deserialize, PartialEq, Eq, Hash)]
154#[serde(rename_all = "snake_case")]
155pub enum FailureMode {
156    /// When an error is encountered or evaluation is canceled, evaluation waits
157    /// for any outstanding tasks to complete.
158    #[default]
159    Slow,
160    /// When an error is encountered or evaluation is canceled, any outstanding
161    /// tasks that are executing are immediately canceled and evaluation waits
162    /// for cancellation to complete.
163    Fast,
164}
165
166/// Represents WDL evaluation configuration.
167///
168/// <div class="warning">
169///
170/// By default, serialization of [`Config`] will redact the values of secrets.
171///
172/// Use the [`Config::unredact`] method before serialization to prevent the
173/// secrets from being redacted.
174///
175/// </div>
176#[derive(Debug, Default, Clone, Serialize, Deserialize)]
177#[serde(rename_all = "snake_case", deny_unknown_fields)]
178pub struct Config {
179    /// HTTP configuration.
180    #[serde(default)]
181    pub http: HttpConfig,
182    /// Workflow evaluation configuration.
183    #[serde(default)]
184    pub workflow: WorkflowConfig,
185    /// Task evaluation configuration.
186    #[serde(default)]
187    pub task: TaskConfig,
188    /// The name of the backend to use.
189    ///
190    /// If not specified and `backends` has multiple entries, it will use a name
191    /// of `default`.
192    #[serde(skip_serializing_if = "Option::is_none")]
193    pub backend: Option<String>,
194    /// Task execution backends configuration.
195    ///
196    /// If the collection is empty and `backend` is not specified, the engine
197    /// default backend is used.
198    ///
199    /// If the collection has exactly one entry and `backend` is not specified,
200    /// the singular entry will be used.
201    #[serde(default, skip_serializing_if = "IndexMap::is_empty")]
202    pub backends: IndexMap<String, BackendConfig>,
203    /// Storage configuration.
204    #[serde(default)]
205    pub storage: StorageConfig,
206    /// (Experimental) Avoid environment-specific output; default is `false`.
207    ///
208    /// If this option is `true`, selected error messages and log output will
209    /// avoid emitting environment-specific output such as absolute paths
210    /// and system resource counts.
211    ///
212    /// This is largely meant to support "golden testing" where a test's success
213    /// depends on matching an expected set of outputs exactly. Cues that
214    /// help users overcome errors, such as the path to a temporary
215    /// directory or the number of CPUs available to the system, confound this
216    /// style of testing. This flag is a best-effort experimental attempt to
217    /// reduce the impact of these differences in order to allow a wider
218    /// range of golden tests to be written.
219    #[serde(default)]
220    pub suppress_env_specific_output: bool,
221    /// (Experimental) Whether experimental features are enabled; default is
222    /// `false`.
223    ///
224    /// Experimental features are provided to users with heavy caveats about
225    /// their stability and rough edges. Use at your own risk, but feedback
226    /// is quite welcome.
227    #[serde(default)]
228    pub experimental_features_enabled: bool,
229    /// The failure mode for workflow or task evaluation.
230    ///
231    /// A value of [`FailureMode::Slow`] will result in evaluation waiting for
232    /// executing tasks to complete upon error or interruption.
233    ///
234    /// A value of [`FailureMode::Fast`] will immediately attempt to cancel
235    /// executing tasks upon error or interruption.
236    #[serde(default, rename = "fail")]
237    pub failure_mode: FailureMode,
238}
239
240impl Config {
241    /// Validates the evaluation configuration.
242    pub async fn validate(&self) -> Result<()> {
243        self.http.validate()?;
244        self.workflow.validate()?;
245        self.task.validate()?;
246
247        if self.backend.is_none() && self.backends.len() < 2 {
248            // This is OK, we'll use either the singular backends entry (1) or
249            // the default (0)
250        } else {
251            // Check the backends map for the backend name (or "default")
252            let backend = self.backend.as_deref().unwrap_or(DEFAULT_BACKEND_NAME);
253            if !self.backends.contains_key(backend) {
254                bail!("a backend named `{backend}` is not present in the configuration");
255            }
256        }
257
258        for backend in self.backends.values() {
259            backend.validate(self).await?;
260        }
261
262        self.storage.validate()?;
263
264        if self.suppress_env_specific_output && !self.experimental_features_enabled {
265            bail!("`suppress_env_specific_output` requires enabling experimental features");
266        }
267
268        Ok(())
269    }
270
271    /// Redacts the secrets contained in the configuration.
272    ///
273    /// By default, secrets are redacted for serialization.
274    pub fn redact(&mut self) {
275        for backend in self.backends.values_mut() {
276            backend.redact();
277        }
278
279        if let Some(auth) = &mut self.storage.azure.auth {
280            auth.redact();
281        }
282
283        if let Some(auth) = &mut self.storage.s3.auth {
284            auth.redact();
285        }
286
287        if let Some(auth) = &mut self.storage.google.auth {
288            auth.redact();
289        }
290    }
291
292    /// Unredacts the secrets contained in the configuration.
293    ///
294    /// Calling this method will expose secrets for serialization.
295    pub fn unredact(&mut self) {
296        for backend in self.backends.values_mut() {
297            backend.unredact();
298        }
299
300        if let Some(auth) = &mut self.storage.azure.auth {
301            auth.unredact();
302        }
303
304        if let Some(auth) = &mut self.storage.s3.auth {
305            auth.unredact();
306        }
307
308        if let Some(auth) = &mut self.storage.google.auth {
309            auth.unredact();
310        }
311    }
312
313    /// Gets the backend configuration.
314    ///
315    /// Returns an error if the configuration specifies a named backend that
316    /// isn't present in the configuration.
317    pub fn backend(&self) -> Result<Cow<'_, BackendConfig>> {
318        if self.backend.is_some() || self.backends.len() >= 2 {
319            // Lookup the backend to use
320            let backend = self.backend.as_deref().unwrap_or(DEFAULT_BACKEND_NAME);
321            return Ok(Cow::Borrowed(self.backends.get(backend).ok_or_else(
322                || anyhow!("a backend named `{backend}` is not present in the configuration"),
323            )?));
324        }
325
326        if self.backends.len() == 1 {
327            // Use the singular entry
328            Ok(Cow::Borrowed(self.backends.values().next().unwrap()))
329        } else {
330            // Use the default
331            Ok(Cow::Owned(BackendConfig::default()))
332        }
333    }
334
335    /// Creates a new task execution backend based on this configuration.
336    pub(crate) async fn create_backend(
337        self: &Arc<Self>,
338        run_root_dir: &Path,
339        events: Events,
340        cancellation: CancellationContext,
341    ) -> Result<Arc<dyn TaskExecutionBackend>> {
342        use crate::backend::*;
343
344        match self.backend()?.as_ref() {
345            BackendConfig::Local(_) => {
346                warn!(
347                    "the engine is configured to use the local backend: tasks will not be run \
348                     inside of a container"
349                );
350                Ok(Arc::new(LocalBackend::new(
351                    self.clone(),
352                    events,
353                    cancellation,
354                )?))
355            }
356            BackendConfig::Docker(_) => Ok(Arc::new(
357                DockerBackend::new(self.clone(), events, cancellation).await?,
358            )),
359            BackendConfig::Tes(_) => Ok(Arc::new(
360                TesBackend::new(self.clone(), events, cancellation).await?,
361            )),
362            BackendConfig::LsfApptainer(_) => Ok(Arc::new(LsfApptainerBackend::new(
363                self.clone(),
364                run_root_dir,
365                events,
366                cancellation,
367            )?)),
368            BackendConfig::SlurmApptainer(_) => Ok(Arc::new(SlurmApptainerBackend::new(
369                self.clone(),
370                run_root_dir,
371                events,
372                cancellation,
373            )?)),
374        }
375    }
376}
377
378/// Represents HTTP configuration.
379#[derive(Debug, Default, Clone, Serialize, Deserialize)]
380#[serde(rename_all = "snake_case", deny_unknown_fields)]
381pub struct HttpConfig {
382    /// The HTTP download cache location.
383    ///
384    /// Defaults to an operating system specific cache directory for the user.
385    #[serde(default, skip_serializing_if = "Option::is_none")]
386    pub cache_dir: Option<PathBuf>,
387    /// The number of retries for transferring files.
388    ///
389    /// Defaults to `5`.
390    #[serde(default, skip_serializing_if = "Option::is_none")]
391    pub retries: Option<usize>,
392    /// The maximum parallelism for file transfers.
393    ///
394    /// Defaults to the host's available parallelism.
395    #[serde(default, skip_serializing_if = "Option::is_none")]
396    pub parallelism: Option<usize>,
397}
398
399impl HttpConfig {
400    /// Validates the HTTP configuration.
401    pub fn validate(&self) -> Result<()> {
402        if let Some(parallelism) = self.parallelism
403            && parallelism == 0
404        {
405            bail!("configuration value `http.parallelism` cannot be zero");
406        }
407        Ok(())
408    }
409}
410
411/// Represents storage configuration.
412#[derive(Debug, Default, Clone, Serialize, Deserialize)]
413#[serde(rename_all = "snake_case", deny_unknown_fields)]
414pub struct StorageConfig {
415    /// Azure Blob Storage configuration.
416    #[serde(default)]
417    pub azure: AzureStorageConfig,
418    /// AWS S3 configuration.
419    #[serde(default)]
420    pub s3: S3StorageConfig,
421    /// Google Cloud Storage configuration.
422    #[serde(default)]
423    pub google: GoogleStorageConfig,
424}
425
426impl StorageConfig {
427    /// Validates the HTTP configuration.
428    pub fn validate(&self) -> Result<()> {
429        self.azure.validate()?;
430        self.s3.validate()?;
431        self.google.validate()?;
432        Ok(())
433    }
434}
435
436/// Represents authentication information for Azure Blob Storage.
437#[derive(Debug, Default, Clone, Serialize, Deserialize)]
438#[serde(rename_all = "snake_case", deny_unknown_fields)]
439pub struct AzureStorageAuthConfig {
440    /// The Azure Storage account name to use.
441    pub account_name: String,
442    /// The Azure Storage access key to use.
443    pub access_key: SecretString,
444}
445
446impl AzureStorageAuthConfig {
447    /// Validates the Azure Blob Storage authentication configuration.
448    pub fn validate(&self) -> Result<()> {
449        if self.account_name.is_empty() {
450            bail!("configuration value `storage.azure.auth.account_name` is required");
451        }
452
453        if self.access_key.inner.expose_secret().is_empty() {
454            bail!("configuration value `storage.azure.auth.access_key` is required");
455        }
456
457        Ok(())
458    }
459
460    /// Redacts the secrets contained in the Azure Blob Storage storage
461    /// authentication configuration.
462    pub fn redact(&mut self) {
463        self.access_key.redact();
464    }
465
466    /// Unredacts the secrets contained in the Azure Blob Storage authentication
467    /// configuration.
468    pub fn unredact(&mut self) {
469        self.access_key.unredact();
470    }
471}
472
473/// Represents configuration for Azure Blob Storage.
474#[derive(Debug, Default, Clone, Serialize, Deserialize)]
475#[serde(rename_all = "snake_case", deny_unknown_fields)]
476pub struct AzureStorageConfig {
477    /// The Azure Blob Storage authentication configuration.
478    #[serde(default, skip_serializing_if = "Option::is_none")]
479    pub auth: Option<AzureStorageAuthConfig>,
480}
481
482impl AzureStorageConfig {
483    /// Validates the Azure Blob Storage configuration.
484    pub fn validate(&self) -> Result<()> {
485        if let Some(auth) = &self.auth {
486            auth.validate()?;
487        }
488
489        Ok(())
490    }
491}
492
493/// Represents authentication information for AWS S3 storage.
494#[derive(Debug, Default, Clone, Serialize, Deserialize)]
495#[serde(rename_all = "snake_case", deny_unknown_fields)]
496pub struct S3StorageAuthConfig {
497    /// The AWS Access Key ID to use.
498    pub access_key_id: String,
499    /// The AWS Secret Access Key to use.
500    pub secret_access_key: SecretString,
501}
502
503impl S3StorageAuthConfig {
504    /// Validates the AWS S3 storage authentication configuration.
505    pub fn validate(&self) -> Result<()> {
506        if self.access_key_id.is_empty() {
507            bail!("configuration value `storage.s3.auth.access_key_id` is required");
508        }
509
510        if self.secret_access_key.inner.expose_secret().is_empty() {
511            bail!("configuration value `storage.s3.auth.secret_access_key` is required");
512        }
513
514        Ok(())
515    }
516
517    /// Redacts the secrets contained in the AWS S3 storage authentication
518    /// configuration.
519    pub fn redact(&mut self) {
520        self.secret_access_key.redact();
521    }
522
523    /// Unredacts the secrets contained in the AWS S3 storage authentication
524    /// configuration.
525    pub fn unredact(&mut self) {
526        self.secret_access_key.unredact();
527    }
528}
529
530/// Represents configuration for AWS S3 storage.
531#[derive(Debug, Default, Clone, Serialize, Deserialize)]
532#[serde(rename_all = "snake_case", deny_unknown_fields)]
533pub struct S3StorageConfig {
534    /// The default region to use for S3-schemed URLs (e.g.
535    /// `s3://<bucket>/<blob>`).
536    ///
537    /// Defaults to `us-east-1`.
538    #[serde(default, skip_serializing_if = "Option::is_none")]
539    pub region: Option<String>,
540
541    /// The AWS S3 storage authentication configuration.
542    #[serde(default, skip_serializing_if = "Option::is_none")]
543    pub auth: Option<S3StorageAuthConfig>,
544}
545
546impl S3StorageConfig {
547    /// Validates the AWS S3 storage configuration.
548    pub fn validate(&self) -> Result<()> {
549        if let Some(auth) = &self.auth {
550            auth.validate()?;
551        }
552
553        Ok(())
554    }
555}
556
557/// Represents authentication information for Google Cloud Storage.
558#[derive(Debug, Default, Clone, Serialize, Deserialize)]
559#[serde(rename_all = "snake_case", deny_unknown_fields)]
560pub struct GoogleStorageAuthConfig {
561    /// The HMAC Access Key to use.
562    pub access_key: String,
563    /// The HMAC Secret to use.
564    pub secret: SecretString,
565}
566
567impl GoogleStorageAuthConfig {
568    /// Validates the Google Cloud Storage authentication configuration.
569    pub fn validate(&self) -> Result<()> {
570        if self.access_key.is_empty() {
571            bail!("configuration value `storage.google.auth.access_key` is required");
572        }
573
574        if self.secret.inner.expose_secret().is_empty() {
575            bail!("configuration value `storage.google.auth.secret` is required");
576        }
577
578        Ok(())
579    }
580
581    /// Redacts the secrets contained in the Google Cloud Storage authentication
582    /// configuration.
583    pub fn redact(&mut self) {
584        self.secret.redact();
585    }
586
587    /// Unredacts the secrets contained in the Google Cloud Storage
588    /// authentication configuration.
589    pub fn unredact(&mut self) {
590        self.secret.unredact();
591    }
592}
593
594/// Represents configuration for Google Cloud Storage.
595#[derive(Debug, Default, Clone, Serialize, Deserialize)]
596#[serde(rename_all = "snake_case", deny_unknown_fields)]
597pub struct GoogleStorageConfig {
598    /// The Google Cloud Storage authentication configuration.
599    #[serde(default, skip_serializing_if = "Option::is_none")]
600    pub auth: Option<GoogleStorageAuthConfig>,
601}
602
603impl GoogleStorageConfig {
604    /// Validates the Google Cloud Storage configuration.
605    pub fn validate(&self) -> Result<()> {
606        if let Some(auth) = &self.auth {
607            auth.validate()?;
608        }
609
610        Ok(())
611    }
612}
613
614/// Represents workflow evaluation configuration.
615#[derive(Debug, Default, Clone, Serialize, Deserialize)]
616#[serde(rename_all = "snake_case", deny_unknown_fields)]
617pub struct WorkflowConfig {
618    /// Scatter statement evaluation configuration.
619    #[serde(default)]
620    pub scatter: ScatterConfig,
621}
622
623impl WorkflowConfig {
624    /// Validates the workflow configuration.
625    pub fn validate(&self) -> Result<()> {
626        self.scatter.validate()?;
627        Ok(())
628    }
629}
630
631/// Represents scatter statement evaluation configuration.
632#[derive(Debug, Default, Clone, Serialize, Deserialize)]
633#[serde(rename_all = "snake_case", deny_unknown_fields)]
634pub struct ScatterConfig {
635    /// The number of scatter array elements to process concurrently.
636    ///
637    /// Defaults to `1000`.
638    ///
639    /// A value of `0` is invalid.
640    ///
641    /// Lower values use less memory for evaluation and higher values may better
642    /// saturate the task execution backend with tasks to execute for large
643    /// scatters.
644    ///
645    /// This setting does not change how many tasks an execution backend can run
646    /// concurrently, but may affect how many tasks are sent to the backend to
647    /// run at a time.
648    ///
649    /// For example, if `concurrency` was set to 10 and we evaluate the
650    /// following scatters:
651    ///
652    /// ```wdl
653    /// scatter (i in range(100)) {
654    ///     call my_task
655    /// }
656    ///
657    /// scatter (j in range(100)) {
658    ///     call my_task as my_task2
659    /// }
660    /// ```
661    ///
662    /// Here each scatter is independent and therefore there will be 20 calls
663    /// (10 for each scatter) made concurrently. If the task execution
664    /// backend can only execute 5 tasks concurrently, 5 tasks will execute
665    /// and 15 will be "ready" to execute and waiting for an executing task
666    /// to complete.
667    ///
668    /// If instead we evaluate the following scatters:
669    ///
670    /// ```wdl
671    /// scatter (i in range(100)) {
672    ///     scatter (j in range(100)) {
673    ///         call my_task
674    ///     }
675    /// }
676    /// ```
677    ///
678    /// Then there will be 100 calls (10*10 as 10 are made for each outer
679    /// element) made concurrently. If the task execution backend can only
680    /// execute 5 tasks concurrently, 5 tasks will execute and 95 will be
681    /// "ready" to execute and waiting for an executing task to complete.
682    ///
683    /// <div class="warning">
684    /// Warning: nested scatter statements cause exponential memory usage based
685    /// on this value, as each scatter statement evaluation requires allocating
686    /// new scopes for scatter array elements being processed. </div>
687    #[serde(default, skip_serializing_if = "Option::is_none")]
688    pub concurrency: Option<u64>,
689}
690
691impl ScatterConfig {
692    /// Validates the scatter configuration.
693    pub fn validate(&self) -> Result<()> {
694        if let Some(concurrency) = self.concurrency
695            && concurrency == 0
696        {
697            bail!("configuration value `workflow.scatter.concurrency` cannot be zero");
698        }
699
700        Ok(())
701    }
702}
703
704/// Represents the supported call caching modes.
705#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
706#[serde(rename_all = "snake_case")]
707pub enum CallCachingMode {
708    /// Call caching is disabled.
709    ///
710    /// The call cache is not checked and new entries are not added to the
711    /// cache.
712    ///
713    /// This is the default value.
714    #[default]
715    Off,
716    /// Call caching is enabled.
717    ///
718    /// The call cache is checked and new entries are added to the cache.
719    ///
720    /// Defaults the `cacheable` task hint to `true`.
721    On,
722    /// Call caching is enabled only for tasks that explicitly have a
723    /// `cacheable` hint set to `true`.
724    ///
725    /// The call cache is checked and new entries are added to the cache *only*
726    /// for tasks that have the `cacheable` hint set to `true`.
727    ///
728    /// Defaults the `cacheable` task hint to `false`.
729    Explicit,
730}
731
732/// Represents the supported modes for calculating content digests.
733#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
734#[serde(rename_all = "snake_case")]
735pub enum ContentDigestMode {
736    /// Use a strong digest for file content.
737    ///
738    /// Strong digests require hashing all of the contents of a file; this may
739    /// noticeably impact performance for very large files.
740    ///
741    /// This setting guarantees that a modified file will be detected.
742    Strong,
743    /// Use a weak digest for file content.
744    ///
745    /// A weak digest is based solely off of file metadata, such as size and
746    /// last modified time.
747    ///
748    /// This setting cannot guarantee the detection of modified files and may
749    /// result in a modified file not causing a call cache entry to be
750    /// invalidated.
751    ///
752    /// However, it is substantially faster than using a strong digest.
753    #[default]
754    Weak,
755}
756
757/// Represents task evaluation configuration.
758#[derive(Debug, Default, Clone, Serialize, Deserialize)]
759#[serde(rename_all = "snake_case", deny_unknown_fields)]
760pub struct TaskConfig {
761    /// The default maximum number of retries to attempt if a task fails.
762    ///
763    /// A task's `max_retries` requirement will override this value.
764    ///
765    /// Defaults to 0 (no retries).
766    #[serde(default, skip_serializing_if = "Option::is_none")]
767    pub retries: Option<u64>,
768    /// The default container to use if a container is not specified in a task's
769    /// requirements.
770    ///
771    /// Defaults to `ubuntu:latest`.
772    #[serde(default, skip_serializing_if = "Option::is_none")]
773    pub container: Option<String>,
774    /// The default shell to use for tasks.
775    ///
776    /// Defaults to `bash`.
777    ///
778    /// <div class="warning">
779    /// Warning: the use of a shell other than `bash` may lead to tasks that may
780    /// not be portable to other execution engines.
781    ///
782    /// The shell must support a `-c` option to run a specific script file (i.e.
783    /// an evaluated task command).
784    ///
785    /// Note that this option affects all task commands, so every container that
786    /// is used must contain the specified shell.
787    ///
788    /// If using this setting causes your tasks to fail, please do not file an
789    /// issue. </div>
790    #[serde(default, skip_serializing_if = "Option::is_none")]
791    pub shell: Option<String>,
792    /// The behavior when a task's `cpu` requirement cannot be met.
793    #[serde(default)]
794    pub cpu_limit_behavior: TaskResourceLimitBehavior,
795    /// The behavior when a task's `memory` requirement cannot be met.
796    #[serde(default)]
797    pub memory_limit_behavior: TaskResourceLimitBehavior,
798    /// The call cache directory to use for caching task execution results.
799    ///
800    /// Defaults to an operating system specific cache directory for the user.
801    #[serde(default, skip_serializing_if = "Option::is_none")]
802    pub cache_dir: Option<PathBuf>,
803    /// The call caching mode to use for tasks.
804    #[serde(default)]
805    pub cache: CallCachingMode,
806    /// The content digest mode to use.
807    ///
808    /// Used as part of call caching.
809    #[serde(default)]
810    pub digests: ContentDigestMode,
811    /// Keys of task requirements to exclude from call cache checking.
812    ///
813    /// When specified, these requirement keys will be ignored when
814    /// calculating cache keys and validating cache entries.
815    ///
816    /// This can be useful for requirements that may vary between runs
817    /// but should not invalidate the cache (e.g., dynamic resource
818    /// allocation).
819    #[serde(default)]
820    pub excluded_cache_requirements: HashSet<String>,
821    /// Keys of task hints to exclude from call cache checking.
822    ///
823    /// When specified, these hint keys will be ignored when
824    /// calculating cache keys and validating cache entries.
825    ///
826    /// This can be useful for hints that may vary between runs
827    /// but should not invalidate the cache.
828    #[serde(default)]
829    pub excluded_cache_hints: HashSet<String>,
830    /// Keys of task inputs to exclude from call cache checking.
831    ///
832    /// When specified, these input keys will be ignored when
833    /// calculating cache keys and validating cache entries.
834    ///
835    /// This can be useful for inputs that may vary between runs
836    /// but should not affect the task's output.
837    #[serde(default)]
838    pub excluded_cache_inputs: HashSet<String>,
839}
840
841impl TaskConfig {
842    /// Validates the task evaluation configuration.
843    pub fn validate(&self) -> Result<()> {
844        if self.retries.unwrap_or(0) > MAX_RETRIES {
845            bail!("configuration value `task.retries` cannot exceed {MAX_RETRIES}");
846        }
847
848        Ok(())
849    }
850}
851
852/// The behavior when a task resource requirement, such as `cpu` or `memory`,
853/// cannot be met.
854#[derive(Debug, Default, Clone, Serialize, Deserialize)]
855#[serde(rename_all = "snake_case", deny_unknown_fields)]
856pub enum TaskResourceLimitBehavior {
857    /// Try executing a task with the maximum amount of the resource available
858    /// when the task's corresponding requirement cannot be met.
859    TryWithMax,
860    /// Do not execute a task if its corresponding requirement cannot be met.
861    ///
862    /// This is the default behavior.
863    #[default]
864    Deny,
865}
866
867/// Represents supported task execution backends.
868#[derive(Debug, Clone, Serialize, Deserialize)]
869#[serde(rename_all = "snake_case", tag = "type")]
870pub enum BackendConfig {
871    /// Use the local task execution backend.
872    Local(LocalBackendConfig),
873    /// Use the Docker task execution backend.
874    Docker(DockerBackendConfig),
875    /// Use the TES task execution backend.
876    Tes(TesBackendConfig),
877    /// Use the experimental LSF + Apptainer task execution backend.
878    ///
879    /// Requires enabling experimental features.
880    LsfApptainer(LsfApptainerBackendConfig),
881    /// Use the experimental Slurm + Apptainer task execution backend.
882    ///
883    /// Requires enabling experimental features.
884    SlurmApptainer(SlurmApptainerBackendConfig),
885}
886
887impl Default for BackendConfig {
888    fn default() -> Self {
889        Self::Docker(Default::default())
890    }
891}
892
893impl BackendConfig {
894    /// Validates the backend configuration.
895    pub async fn validate(&self, engine_config: &Config) -> Result<()> {
896        match self {
897            Self::Local(config) => config.validate(),
898            Self::Docker(config) => config.validate(),
899            Self::Tes(config) => config.validate(),
900            Self::LsfApptainer(config) => config.validate(engine_config).await,
901            Self::SlurmApptainer(config) => config.validate(engine_config).await,
902        }
903    }
904
905    /// Converts the backend configuration into a local backend configuration
906    ///
907    /// Returns `None` if the backend configuration is not local.
908    pub fn as_local(&self) -> Option<&LocalBackendConfig> {
909        match self {
910            Self::Local(config) => Some(config),
911            _ => None,
912        }
913    }
914
915    /// Converts the backend configuration into a Docker backend configuration
916    ///
917    /// Returns `None` if the backend configuration is not Docker.
918    pub fn as_docker(&self) -> Option<&DockerBackendConfig> {
919        match self {
920            Self::Docker(config) => Some(config),
921            _ => None,
922        }
923    }
924
925    /// Converts the backend configuration into a TES backend configuration
926    ///
927    /// Returns `None` if the backend configuration is not TES.
928    pub fn as_tes(&self) -> Option<&TesBackendConfig> {
929        match self {
930            Self::Tes(config) => Some(config),
931            _ => None,
932        }
933    }
934
935    /// Converts the backend configuration into a LSF Apptainer backend
936    /// configuration
937    ///
938    /// Returns `None` if the backend configuration is not LSF Apptainer.
939    pub fn as_lsf_apptainer(&self) -> Option<&LsfApptainerBackendConfig> {
940        match self {
941            Self::LsfApptainer(config) => Some(config),
942            _ => None,
943        }
944    }
945
946    /// Converts the backend configuration into a Slurm Apptainer backend
947    /// configuration
948    ///
949    /// Returns `None` if the backend configuration is not Slurm Apptainer.
950    pub fn as_slurm_apptainer(&self) -> Option<&SlurmApptainerBackendConfig> {
951        match self {
952            Self::SlurmApptainer(config) => Some(config),
953            _ => None,
954        }
955    }
956
957    /// Redacts the secrets contained in the backend configuration.
958    pub fn redact(&mut self) {
959        match self {
960            Self::Local(_) | Self::Docker(_) | Self::LsfApptainer(_) | Self::SlurmApptainer(_) => {}
961            Self::Tes(config) => config.redact(),
962        }
963    }
964
965    /// Unredacts the secrets contained in the backend configuration.
966    pub fn unredact(&mut self) {
967        match self {
968            Self::Local(_) | Self::Docker(_) | Self::LsfApptainer(_) | Self::SlurmApptainer(_) => {}
969            Self::Tes(config) => config.unredact(),
970        }
971    }
972}
973
974/// Represents configuration for the local task execution backend.
975///
976/// <div class="warning">
977/// Warning: the local task execution backend spawns processes on the host
978/// directly without the use of a container; only use this backend on trusted
979/// WDL. </div>
980#[derive(Debug, Default, Clone, Serialize, Deserialize)]
981#[serde(rename_all = "snake_case", deny_unknown_fields)]
982pub struct LocalBackendConfig {
983    /// Set the number of CPUs available for task execution.
984    ///
985    /// Defaults to the number of logical CPUs for the host.
986    ///
987    /// The value cannot be zero or exceed the host's number of CPUs.
988    #[serde(default, skip_serializing_if = "Option::is_none")]
989    pub cpu: Option<u64>,
990
991    /// Set the total amount of memory for task execution as a unit string (e.g.
992    /// `2 GiB`).
993    ///
994    /// Defaults to the total amount of memory for the host.
995    ///
996    /// The value cannot be zero or exceed the host's total amount of memory.
997    #[serde(default, skip_serializing_if = "Option::is_none")]
998    pub memory: Option<String>,
999}
1000
1001impl LocalBackendConfig {
1002    /// Validates the local task execution backend configuration.
1003    pub fn validate(&self) -> Result<()> {
1004        if let Some(cpu) = self.cpu {
1005            if cpu == 0 {
1006                bail!("local backend configuration value `cpu` cannot be zero");
1007            }
1008
1009            let total = SYSTEM.cpus().len() as u64;
1010            if cpu > total {
1011                bail!(
1012                    "local backend configuration value `cpu` cannot exceed the virtual CPUs \
1013                     available to the host ({total})"
1014                );
1015            }
1016        }
1017
1018        if let Some(memory) = &self.memory {
1019            let memory = convert_unit_string(memory).with_context(|| {
1020                format!("local backend configuration value `memory` has invalid value `{memory}`")
1021            })?;
1022
1023            if memory == 0 {
1024                bail!("local backend configuration value `memory` cannot be zero");
1025            }
1026
1027            let total = SYSTEM.total_memory();
1028            if memory > total {
1029                bail!(
1030                    "local backend configuration value `memory` cannot exceed the total memory of \
1031                     the host ({total} bytes)"
1032                );
1033            }
1034        }
1035
1036        Ok(())
1037    }
1038}
1039
1040/// Gets the default value for the docker `cleanup` field.
1041const fn cleanup_default() -> bool {
1042    true
1043}
1044
1045/// Represents configuration for the Docker backend.
1046#[derive(Debug, Clone, Serialize, Deserialize)]
1047#[serde(rename_all = "snake_case", deny_unknown_fields)]
1048pub struct DockerBackendConfig {
1049    /// Whether or not to remove a task's container after the task completes.
1050    ///
1051    /// Defaults to `true`.
1052    #[serde(default = "cleanup_default")]
1053    pub cleanup: bool,
1054}
1055
1056impl DockerBackendConfig {
1057    /// Validates the Docker backend configuration.
1058    pub fn validate(&self) -> Result<()> {
1059        Ok(())
1060    }
1061}
1062
1063impl Default for DockerBackendConfig {
1064    fn default() -> Self {
1065        Self { cleanup: true }
1066    }
1067}
1068
1069/// Represents HTTP basic authentication configuration.
1070#[derive(Debug, Default, Clone, Serialize, Deserialize)]
1071#[serde(rename_all = "snake_case", deny_unknown_fields)]
1072pub struct BasicAuthConfig {
1073    /// The HTTP basic authentication username.
1074    #[serde(default)]
1075    pub username: String,
1076    /// The HTTP basic authentication password.
1077    #[serde(default)]
1078    pub password: SecretString,
1079}
1080
1081impl BasicAuthConfig {
1082    /// Validates the HTTP basic auth configuration.
1083    pub fn validate(&self) -> Result<()> {
1084        Ok(())
1085    }
1086
1087    /// Redacts the secrets contained in the HTTP basic auth configuration.
1088    pub fn redact(&mut self) {
1089        self.password.redact();
1090    }
1091
1092    /// Unredacts the secrets contained in the HTTP basic auth configuration.
1093    pub fn unredact(&mut self) {
1094        self.password.unredact();
1095    }
1096}
1097
1098/// Represents HTTP bearer token authentication configuration.
1099#[derive(Debug, Default, Clone, Serialize, Deserialize)]
1100#[serde(rename_all = "snake_case", deny_unknown_fields)]
1101pub struct BearerAuthConfig {
1102    /// The HTTP bearer authentication token.
1103    #[serde(default)]
1104    pub token: SecretString,
1105}
1106
1107impl BearerAuthConfig {
1108    /// Validates the HTTP bearer auth configuration.
1109    pub fn validate(&self) -> Result<()> {
1110        Ok(())
1111    }
1112
1113    /// Redacts the secrets contained in the HTTP bearer auth configuration.
1114    pub fn redact(&mut self) {
1115        self.token.redact();
1116    }
1117
1118    /// Unredacts the secrets contained in the HTTP bearer auth configuration.
1119    pub fn unredact(&mut self) {
1120        self.token.unredact();
1121    }
1122}
1123
1124/// Represents the kind of authentication for a TES backend.
1125#[derive(Debug, Clone, Serialize, Deserialize)]
1126#[serde(rename_all = "snake_case", tag = "type")]
1127pub enum TesBackendAuthConfig {
1128    /// Use basic authentication for the TES backend.
1129    Basic(BasicAuthConfig),
1130    /// Use bearer token authentication for the TES backend.
1131    Bearer(BearerAuthConfig),
1132}
1133
1134impl TesBackendAuthConfig {
1135    /// Validates the TES backend authentication configuration.
1136    pub fn validate(&self) -> Result<()> {
1137        match self {
1138            Self::Basic(config) => config.validate(),
1139            Self::Bearer(config) => config.validate(),
1140        }
1141    }
1142
1143    /// Redacts the secrets contained in the TES backend authentication
1144    /// configuration.
1145    pub fn redact(&mut self) {
1146        match self {
1147            Self::Basic(auth) => auth.redact(),
1148            Self::Bearer(auth) => auth.redact(),
1149        }
1150    }
1151
1152    /// Unredacts the secrets contained in the TES backend authentication
1153    /// configuration.
1154    pub fn unredact(&mut self) {
1155        match self {
1156            Self::Basic(auth) => auth.unredact(),
1157            Self::Bearer(auth) => auth.unredact(),
1158        }
1159    }
1160}
1161
1162/// Represents configuration for the Task Execution Service (TES) backend.
1163#[derive(Debug, Default, Clone, Serialize, Deserialize)]
1164#[serde(rename_all = "snake_case", deny_unknown_fields)]
1165pub struct TesBackendConfig {
1166    /// The URL of the Task Execution Service.
1167    #[serde(default, skip_serializing_if = "Option::is_none")]
1168    pub url: Option<Url>,
1169
1170    /// The authentication configuration for the TES backend.
1171    #[serde(default, skip_serializing_if = "Option::is_none")]
1172    pub auth: Option<TesBackendAuthConfig>,
1173
1174    /// The root cloud storage URL for storing inputs.
1175    #[serde(default, skip_serializing_if = "Option::is_none")]
1176    pub inputs: Option<Url>,
1177
1178    /// The root cloud storage URL for storing outputs.
1179    #[serde(default, skip_serializing_if = "Option::is_none")]
1180    pub outputs: Option<Url>,
1181
1182    /// The polling interval, in seconds, for checking task status.
1183    ///
1184    /// Defaults to 1 second.
1185    #[serde(default, skip_serializing_if = "Option::is_none")]
1186    pub interval: Option<u64>,
1187
1188    /// The number of retries after encountering an error communicating with the
1189    /// TES server.
1190    ///
1191    /// Defaults to no retries.
1192    pub retries: Option<u32>,
1193
1194    /// The maximum number of concurrent requests the backend will send to the
1195    /// TES server.
1196    ///
1197    /// Defaults to 10 concurrent requests.
1198    #[serde(default, skip_serializing_if = "Option::is_none")]
1199    pub max_concurrency: Option<u32>,
1200
1201    /// Whether or not the TES server URL may use an insecure protocol like
1202    /// HTTP.
1203    #[serde(default)]
1204    pub insecure: bool,
1205}
1206
1207impl TesBackendConfig {
1208    /// Validates the TES backend configuration.
1209    pub fn validate(&self) -> Result<()> {
1210        match &self.url {
1211            Some(url) => {
1212                if !self.insecure && url.scheme() != "https" {
1213                    bail!(
1214                        "TES backend configuration value `url` has invalid value `{url}`: URL \
1215                         must use a HTTPS scheme"
1216                    );
1217                }
1218            }
1219            None => bail!("TES backend configuration value `url` is required"),
1220        }
1221
1222        if let Some(auth) = &self.auth {
1223            auth.validate()?;
1224        }
1225
1226        if let Some(max_concurrency) = self.max_concurrency
1227            && max_concurrency == 0
1228        {
1229            bail!("TES backend configuration value `max_concurrency` cannot be zero");
1230        }
1231
1232        match &self.inputs {
1233            Some(url) => {
1234                if !is_supported_url(url.as_str()) {
1235                    bail!(
1236                        "TES backend storage configuration value `inputs` has invalid value \
1237                         `{url}`: URL scheme is not supported"
1238                    );
1239                }
1240
1241                if !url.path().ends_with('/') {
1242                    bail!(
1243                        "TES backend storage configuration value `inputs` has invalid value \
1244                         `{url}`: URL path must end with a slash"
1245                    );
1246                }
1247            }
1248            None => bail!("TES backend configuration value `inputs` is required"),
1249        }
1250
1251        match &self.outputs {
1252            Some(url) => {
1253                if !is_supported_url(url.as_str()) {
1254                    bail!(
1255                        "TES backend storage configuration value `outputs` has invalid value \
1256                         `{url}`: URL scheme is not supported"
1257                    );
1258                }
1259
1260                if !url.path().ends_with('/') {
1261                    bail!(
1262                        "TES backend storage configuration value `outputs` has invalid value \
1263                         `{url}`: URL path must end with a slash"
1264                    );
1265                }
1266            }
1267            None => bail!("TES backend storage configuration value `outputs` is required"),
1268        }
1269
1270        Ok(())
1271    }
1272
1273    /// Redacts the secrets contained in the TES backend configuration.
1274    pub fn redact(&mut self) {
1275        if let Some(auth) = &mut self.auth {
1276            auth.redact();
1277        }
1278    }
1279
1280    /// Unredacts the secrets contained in the TES backend configuration.
1281    pub fn unredact(&mut self) {
1282        if let Some(auth) = &mut self.auth {
1283            auth.unredact();
1284        }
1285    }
1286}
1287
1288/// Configuration for the Apptainer container runtime.
1289#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
1290#[serde(rename_all = "snake_case", deny_unknown_fields)]
1291pub struct ApptainerConfig {
1292    /// Path to the Apptainer (or Singularity) executable.
1293    ///
1294    /// Defaults to `"apptainer"`. Set to `"singularity"` or a full path
1295    /// (e.g., `/usr/local/bin/apptainer`) if the executable is not on `PATH`
1296    /// or if using Singularity instead.
1297    #[serde(default = "default_apptainer_executable")]
1298    pub executable: String,
1299
1300    /// Path to a shared directory for caching pulled `.sif` images.
1301    ///
1302    /// When set, pulled images are stored in this directory and shared
1303    /// across runs. When unset, images are stored in a per-run directory
1304    /// that is not shared.
1305    #[serde(default, skip_serializing_if = "Option::is_none")]
1306    pub image_cache_dir: Option<PathBuf>,
1307
1308    /// Additional command-line arguments to pass to `apptainer exec` when
1309    /// executing tasks.
1310    pub extra_apptainer_exec_args: Option<Vec<String>>,
1311}
1312
1313/// The default Apptainer executable name.
1314const DEFAULT_APPTAINER_EXECUTABLE: &str = "apptainer";
1315
1316/// Returns the default Apptainer executable name for serde deserialization.
1317fn default_apptainer_executable() -> String {
1318    String::from(DEFAULT_APPTAINER_EXECUTABLE)
1319}
1320
1321impl Default for ApptainerConfig {
1322    fn default() -> Self {
1323        Self {
1324            executable: default_apptainer_executable(),
1325            image_cache_dir: None,
1326            extra_apptainer_exec_args: None,
1327        }
1328    }
1329}
1330
1331impl ApptainerConfig {
1332    /// Validate that Apptainer is appropriately configured.
1333    pub async fn validate(&self) -> Result<(), anyhow::Error> {
1334        Ok(())
1335    }
1336}
1337
1338/// Configuration for an LSF queue.
1339///
1340/// Each queue can optionally have per-task CPU and memory limits set so that
1341/// tasks which are too large to be scheduled on that queue will fail
1342/// immediately instead of pending indefinitely. In the future, these limits may
1343/// be populated or validated by live information from the cluster, but
1344/// for now they must be manually based on the user's understanding of the
1345/// cluster configuration.
1346#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
1347#[serde(rename_all = "snake_case", deny_unknown_fields)]
1348pub struct LsfQueueConfig {
1349    /// The name of the queue; this is the string passed to `bsub -q
1350    /// <queue_name>`.
1351    pub name: String,
1352    /// The maximum number of CPUs this queue can provision for a single task.
1353    pub max_cpu_per_task: Option<u64>,
1354    /// The maximum memory this queue can provision for a single task.
1355    pub max_memory_per_task: Option<ByteSize>,
1356}
1357
1358impl LsfQueueConfig {
1359    /// Validate that this LSF queue exists according to the local `bqueues`.
1360    pub async fn validate(&self, name: &str) -> Result<(), anyhow::Error> {
1361        let queue = &self.name;
1362        ensure!(!queue.is_empty(), "{name}_lsf_queue name cannot be empty");
1363        if let Some(max_cpu_per_task) = self.max_cpu_per_task {
1364            ensure!(
1365                max_cpu_per_task > 0,
1366                "{name}_lsf_queue `{queue}` must allow at least 1 CPU to be provisioned"
1367            );
1368        }
1369        if let Some(max_memory_per_task) = self.max_memory_per_task {
1370            ensure!(
1371                max_memory_per_task.as_u64() > 0,
1372                "{name}_lsf_queue `{queue}` must allow at least some memory to be provisioned"
1373            );
1374        }
1375        match tokio::time::timeout(
1376            // 10 seconds is rather arbitrary; `bqueues` ordinarily returns extremely quickly, but
1377            // we don't want things to run away on a misconfigured system
1378            std::time::Duration::from_secs(10),
1379            Command::new("bqueues").arg(queue).output(),
1380        )
1381        .await
1382        {
1383            Ok(output) => {
1384                let output = output.context("validating LSF queue")?;
1385                if !output.status.success() {
1386                    let stdout = String::from_utf8_lossy(&output.stdout);
1387                    let stderr = String::from_utf8_lossy(&output.stderr);
1388                    error!(%stdout, %stderr, %queue, "failed to validate {name}_lsf_queue");
1389                    Err(anyhow!("failed to validate {name}_lsf_queue `{queue}`"))
1390                } else {
1391                    Ok(())
1392                }
1393            }
1394            Err(_) => Err(anyhow!(
1395                "timed out trying to validate {name}_lsf_queue `{queue}`"
1396            )),
1397        }
1398    }
1399}
1400
1401/// Configuration for the LSF + Apptainer backend.
1402// TODO ACF 2025-09-23: add a Apptainer/Singularity mode config that switches around executable
1403// name, env var names, etc.
1404#[derive(Debug, Default, Clone, serde::Deserialize, serde::Serialize)]
1405#[serde(rename_all = "snake_case", deny_unknown_fields)]
1406pub struct LsfApptainerBackendConfig {
1407    /// The task monitor polling interval, in seconds.
1408    ///
1409    /// Defaults to 30 seconds.
1410    #[serde(default, skip_serializing_if = "Option::is_none")]
1411    pub interval: Option<u64>,
1412    /// The maximum number of concurrent LSF operations the backend will
1413    /// perform.
1414    ///
1415    /// This controls the maximum concurrent number of `bsub` processes the
1416    /// backend will spawn to queue tasks.
1417    ///
1418    /// Defaults to 10 concurrent operations.
1419    #[serde(default, skip_serializing_if = "Option::is_none")]
1420    pub max_concurrency: Option<u32>,
1421    /// Which queue, if any, to specify when submitting normal jobs to LSF.
1422    ///
1423    /// This may be superseded by
1424    /// [`short_task_lsf_queue`][Self::short_task_lsf_queue],
1425    /// [`gpu_lsf_queue`][Self::gpu_lsf_queue], or
1426    /// [`fpga_lsf_queue`][Self::fpga_lsf_queue] for corresponding tasks.
1427    pub default_lsf_queue: Option<LsfQueueConfig>,
1428    /// Which queue, if any, to specify when submitting [short
1429    /// tasks](https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#short_task) to LSF.
1430    ///
1431    /// This may be superseded by [`gpu_lsf_queue`][Self::gpu_lsf_queue] or
1432    /// [`fpga_lsf_queue`][Self::fpga_lsf_queue] for tasks which require
1433    /// specialized hardware.
1434    pub short_task_lsf_queue: Option<LsfQueueConfig>,
1435    /// Which queue, if any, to specify when submitting [tasks which require a
1436    /// GPU](https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#hardware-accelerators-gpu-and--fpga)
1437    /// to LSF.
1438    pub gpu_lsf_queue: Option<LsfQueueConfig>,
1439    /// Which queue, if any, to specify when submitting [tasks which require an
1440    /// FPGA](https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#hardware-accelerators-gpu-and--fpga)
1441    /// to LSF.
1442    pub fpga_lsf_queue: Option<LsfQueueConfig>,
1443    /// Additional command-line arguments to pass to `bsub` when submitting jobs
1444    /// to LSF.
1445    pub extra_bsub_args: Option<Vec<String>>,
1446    /// Prefix to add to every LSF job name before the task identifier. This is
1447    /// truncated as needed to satisfy the byte-oriented LSF job name limit.
1448    #[serde(default, skip_serializing_if = "Option::is_none")]
1449    pub job_name_prefix: Option<String>,
1450    /// The configuration of Apptainer, which is used as the container runtime
1451    /// on the compute nodes where LSF dispatches tasks.
1452    ///
1453    /// Note that this will likely be replaced by an abstraction over multiple
1454    /// container execution runtimes in the future, rather than being
1455    /// hardcoded to Apptainer.
1456    #[serde(default)]
1457    // TODO ACF 2025-10-16: temporarily flatten this into the overall config so that it doesn't
1458    // break existing serialized configs. We'll save breaking the config file format for when we
1459    // actually have meaningful composition of in-place runtimes.
1460    #[serde(flatten)]
1461    pub apptainer_config: ApptainerConfig,
1462}
1463
1464impl LsfApptainerBackendConfig {
1465    /// Validate that the backend is appropriately configured.
1466    pub async fn validate(&self, engine_config: &Config) -> Result<(), anyhow::Error> {
1467        if cfg!(not(unix)) {
1468            bail!("LSF + Apptainer backend is not supported on non-unix platforms");
1469        }
1470
1471        if !engine_config.experimental_features_enabled {
1472            bail!("LSF + Apptainer backend requires enabling experimental features");
1473        }
1474
1475        // Do what we can to validate options that are dependent on the dynamic
1476        // environment. These are a bit fraught, particularly if the behavior of
1477        // the external tools changes based on where a job gets dispatched, but
1478        // querying from the perspective of the current node allows
1479        // us to get better error messages in circumstances typical to a cluster.
1480        if let Some(queue) = &self.default_lsf_queue {
1481            queue.validate("default").await?;
1482        }
1483
1484        if let Some(queue) = &self.short_task_lsf_queue {
1485            queue.validate("short_task").await?;
1486        }
1487
1488        if let Some(queue) = &self.gpu_lsf_queue {
1489            queue.validate("gpu").await?;
1490        }
1491
1492        if let Some(queue) = &self.fpga_lsf_queue {
1493            queue.validate("fpga").await?;
1494        }
1495
1496        if let Some(prefix) = &self.job_name_prefix
1497            && prefix.len() > MAX_LSF_JOB_NAME_PREFIX
1498        {
1499            bail!(
1500                "LSF job name prefix `{prefix}` exceeds the maximum {MAX_LSF_JOB_NAME_PREFIX} \
1501                 bytes"
1502            );
1503        }
1504
1505        self.apptainer_config.validate().await?;
1506
1507        Ok(())
1508    }
1509
1510    /// Get the appropriate LSF queue for a task under this configuration.
1511    ///
1512    /// Specialized hardware requirements are prioritized over other
1513    /// characteristics, with FPGA taking precedence over GPU.
1514    pub(crate) fn lsf_queue_for_task(
1515        &self,
1516        requirements: &HashMap<String, Value>,
1517        hints: &HashMap<String, Value>,
1518    ) -> Option<&LsfQueueConfig> {
1519        // Specialized hardware gets priority.
1520        if let Some(queue) = self.fpga_lsf_queue.as_ref()
1521            && let Some(true) = requirements
1522                .get(wdl_ast::v1::TASK_REQUIREMENT_FPGA)
1523                .and_then(Value::as_boolean)
1524        {
1525            return Some(queue);
1526        }
1527
1528        if let Some(queue) = self.gpu_lsf_queue.as_ref()
1529            && let Some(true) = requirements
1530                .get(wdl_ast::v1::TASK_REQUIREMENT_GPU)
1531                .and_then(Value::as_boolean)
1532        {
1533            return Some(queue);
1534        }
1535
1536        // Then short tasks.
1537        if let Some(queue) = self.short_task_lsf_queue.as_ref()
1538            && let Some(true) = hints
1539                .get(wdl_ast::v1::TASK_HINT_SHORT_TASK)
1540                .and_then(Value::as_boolean)
1541        {
1542            return Some(queue);
1543        }
1544
1545        // Finally the default queue. If this is `None`, `bsub` gets run without a queue
1546        // argument and the cluster's default is used.
1547        self.default_lsf_queue.as_ref()
1548    }
1549}
1550
1551/// Configuration for a Slurm partition.
1552///
1553/// Each partition can optionally have per-task CPU and memory limits set so
1554/// that tasks which are too large to be scheduled on that partition will fail
1555/// immediately instead of pending indefinitely. In the future, these limits may
1556/// be populated or validated by live information from the cluster, but
1557/// for now they must be manually based on the user's understanding of the
1558/// cluster configuration.
1559#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
1560#[serde(rename_all = "snake_case", deny_unknown_fields)]
1561pub struct SlurmPartitionConfig {
1562    /// The name of the partition; this is the string passed to `sbatch
1563    /// --partition=<partition_name>`.
1564    pub name: String,
1565    /// The maximum number of CPUs this partition can provision for a single
1566    /// task.
1567    pub max_cpu_per_task: Option<u64>,
1568    /// The maximum memory this partition can provision for a single task.
1569    pub max_memory_per_task: Option<ByteSize>,
1570}
1571
1572impl SlurmPartitionConfig {
1573    /// Validate that this Slurm partition exists according to the local
1574    /// `sinfo`.
1575    pub async fn validate(&self, name: &str) -> Result<(), anyhow::Error> {
1576        let partition = &self.name;
1577        ensure!(
1578            !partition.is_empty(),
1579            "{name}_slurm_partition name cannot be empty"
1580        );
1581        if let Some(max_cpu_per_task) = self.max_cpu_per_task {
1582            ensure!(
1583                max_cpu_per_task > 0,
1584                "{name}_slurm_partition `{partition}` must allow at least 1 CPU to be provisioned"
1585            );
1586        }
1587        if let Some(max_memory_per_task) = self.max_memory_per_task {
1588            ensure!(
1589                max_memory_per_task.as_u64() > 0,
1590                "{name}_slurm_partition `{partition}` must allow at least some memory to be \
1591                 provisioned"
1592            );
1593        }
1594        match tokio::time::timeout(
1595            // 10 seconds is rather arbitrary; `scontrol` ordinarily returns extremely quickly, but
1596            // we don't want things to run away on a misconfigured system
1597            std::time::Duration::from_secs(10),
1598            Command::new("scontrol")
1599                .arg("show")
1600                .arg("partition")
1601                .arg(partition)
1602                .output(),
1603        )
1604        .await
1605        {
1606            Ok(output) => {
1607                let output = output.context("validating Slurm partition")?;
1608                if !output.status.success() {
1609                    let stdout = String::from_utf8_lossy(&output.stdout);
1610                    let stderr = String::from_utf8_lossy(&output.stderr);
1611                    error!(%stdout, %stderr, %partition, "failed to validate {name}_slurm_partition");
1612                    Err(anyhow!(
1613                        "failed to validate {name}_slurm_partition `{partition}`"
1614                    ))
1615                } else {
1616                    Ok(())
1617                }
1618            }
1619            Err(_) => Err(anyhow!(
1620                "timed out trying to validate {name}_slurm_partition `{partition}`"
1621            )),
1622        }
1623    }
1624}
1625
1626/// Configuration for the Slurm + Apptainer backend.
1627// TODO ACF 2025-09-23: add a Apptainer/Singularity mode config that switches around executable
1628// name, env var names, etc.
1629#[derive(Debug, Default, Clone, serde::Deserialize, serde::Serialize)]
1630#[serde(rename_all = "snake_case", deny_unknown_fields)]
1631pub struct SlurmApptainerBackendConfig {
1632    /// The task monitor polling interval, in seconds.
1633    ///
1634    /// Defaults to 30 seconds.
1635    #[serde(default, skip_serializing_if = "Option::is_none")]
1636    pub interval: Option<u64>,
1637    /// The maximum number of concurrent Slurm operations the backend will
1638    /// perform.
1639    ///
1640    /// This controls the maximum concurrent number of `sbatch` processes the
1641    /// backend will spawn to queue tasks.
1642    ///
1643    /// Defaults to 10 concurrent operations.
1644    #[serde(default, skip_serializing_if = "Option::is_none")]
1645    pub max_concurrency: Option<u32>,
1646    /// Which partition, if any, to specify when submitting normal jobs to
1647    /// Slurm.
1648    ///
1649    /// This may be superseded by
1650    /// [`short_task_slurm_partition`][Self::short_task_slurm_partition],
1651    /// [`gpu_slurm_partition`][Self::gpu_slurm_partition], or
1652    /// [`fpga_slurm_partition`][Self::fpga_slurm_partition] for corresponding
1653    /// tasks.
1654    pub default_slurm_partition: Option<SlurmPartitionConfig>,
1655    /// Which partition, if any, to specify when submitting [short
1656    /// tasks](https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#short_task) to Slurm.
1657    ///
1658    /// This may be superseded by
1659    /// [`gpu_slurm_partition`][Self::gpu_slurm_partition] or
1660    /// [`fpga_slurm_partition`][Self::fpga_slurm_partition] for tasks which
1661    /// require specialized hardware.
1662    pub short_task_slurm_partition: Option<SlurmPartitionConfig>,
1663    /// Which partition, if any, to specify when submitting [tasks which require
1664    /// a GPU](https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#hardware-accelerators-gpu-and--fpga)
1665    /// to Slurm.
1666    pub gpu_slurm_partition: Option<SlurmPartitionConfig>,
1667    /// Which partition, if any, to specify when submitting [tasks which require
1668    /// an FPGA](https://github.com/openwdl/wdl/blob/wdl-1.2/SPEC.md#hardware-accelerators-gpu-and--fpga)
1669    /// to Slurm.
1670    pub fpga_slurm_partition: Option<SlurmPartitionConfig>,
1671    /// Additional command-line arguments to pass to `sbatch` when submitting
1672    /// jobs to Slurm.
1673    pub extra_sbatch_args: Option<Vec<String>>,
1674    /// Prefix to add to every Slurm job name before the task identifier.
1675    #[serde(default, skip_serializing_if = "Option::is_none")]
1676    pub job_name_prefix: Option<String>,
1677    /// The configuration of Apptainer, which is used as the container runtime
1678    /// on the compute nodes where Slurm dispatches tasks.
1679    ///
1680    /// Note that this will likely be replaced by an abstraction over multiple
1681    /// container execution runtimes in the future, rather than being
1682    /// hardcoded to Apptainer.
1683    #[serde(default)]
1684    // TODO ACF 2025-10-16: temporarily flatten this into the overall config so that it doesn't
1685    // break existing serialized configs. We'll save breaking the config file format for when we
1686    // actually have meaningful composition of in-place runtimes.
1687    #[serde(flatten)]
1688    pub apptainer_config: ApptainerConfig,
1689}
1690
1691impl SlurmApptainerBackendConfig {
1692    /// Validate that the backend is appropriately configured.
1693    pub async fn validate(&self, engine_config: &Config) -> Result<(), anyhow::Error> {
1694        if cfg!(not(unix)) {
1695            bail!("Slurm + Apptainer backend is not supported on non-unix platforms");
1696        }
1697        if !engine_config.experimental_features_enabled {
1698            bail!("Slurm + Apptainer backend requires enabling experimental features");
1699        }
1700
1701        // Do what we can to validate options that are dependent on the dynamic
1702        // environment. These are a bit fraught, particularly if the behavior of
1703        // the external tools changes based on where a job gets dispatched, but
1704        // querying from the perspective of the current node allows
1705        // us to get better error messages in circumstances typical to a cluster.
1706        if let Some(partition) = &self.default_slurm_partition {
1707            partition.validate("default").await?;
1708        }
1709        if let Some(partition) = &self.short_task_slurm_partition {
1710            partition.validate("short_task").await?;
1711        }
1712        if let Some(partition) = &self.gpu_slurm_partition {
1713            partition.validate("gpu").await?;
1714        }
1715        if let Some(partition) = &self.fpga_slurm_partition {
1716            partition.validate("fpga").await?;
1717        }
1718
1719        self.apptainer_config.validate().await?;
1720
1721        Ok(())
1722    }
1723
1724    /// Get the appropriate Slurm partition for a task under this configuration.
1725    ///
1726    /// Specialized hardware requirements are prioritized over other
1727    /// characteristics, with FPGA taking precedence over GPU.
1728    pub(crate) fn slurm_partition_for_task(
1729        &self,
1730        requirements: &HashMap<String, Value>,
1731        hints: &HashMap<String, Value>,
1732    ) -> Option<&SlurmPartitionConfig> {
1733        // TODO ACF 2025-09-26: what's the relationship between this code and
1734        // `TaskExecutionConstraints`? Should this be there instead, or be pulling
1735        // values from that instead of directly from `requirements` and `hints`?
1736
1737        // Specialized hardware gets priority.
1738        if let Some(partition) = self.fpga_slurm_partition.as_ref()
1739            && let Some(true) = requirements
1740                .get(wdl_ast::v1::TASK_REQUIREMENT_FPGA)
1741                .and_then(Value::as_boolean)
1742        {
1743            return Some(partition);
1744        }
1745
1746        if let Some(partition) = self.gpu_slurm_partition.as_ref()
1747            && let Some(true) = requirements
1748                .get(wdl_ast::v1::TASK_REQUIREMENT_GPU)
1749                .and_then(Value::as_boolean)
1750        {
1751            return Some(partition);
1752        }
1753
1754        // Then short tasks.
1755        if let Some(partition) = self.short_task_slurm_partition.as_ref()
1756            && let Some(true) = hints
1757                .get(wdl_ast::v1::TASK_HINT_SHORT_TASK)
1758                .and_then(Value::as_boolean)
1759        {
1760            return Some(partition);
1761        }
1762
1763        // Finally the default partition. If this is `None`, `sbatch` gets run without a
1764        // partition argument and the cluster's default is used.
1765        self.default_slurm_partition.as_ref()
1766    }
1767}
1768
1769#[cfg(test)]
1770mod test {
1771    use pretty_assertions::assert_eq;
1772
1773    use super::*;
1774
1775    #[test]
1776    fn redacted_secret() {
1777        let mut secret: SecretString = "secret".into();
1778
1779        assert_eq!(
1780            serde_json::to_string(&secret).unwrap(),
1781            format!(r#""{REDACTED}""#)
1782        );
1783
1784        secret.unredact();
1785        assert_eq!(serde_json::to_string(&secret).unwrap(), r#""secret""#);
1786
1787        secret.redact();
1788        assert_eq!(
1789            serde_json::to_string(&secret).unwrap(),
1790            format!(r#""{REDACTED}""#)
1791        );
1792    }
1793
1794    #[test]
1795    fn redacted_config() {
1796        let config = Config {
1797            backends: [
1798                (
1799                    "first".to_string(),
1800                    BackendConfig::Tes(TesBackendConfig {
1801                        auth: Some(TesBackendAuthConfig::Basic(BasicAuthConfig {
1802                            username: "foo".into(),
1803                            password: "secret".into(),
1804                        })),
1805                        ..Default::default()
1806                    }),
1807                ),
1808                (
1809                    "second".to_string(),
1810                    BackendConfig::Tes(TesBackendConfig {
1811                        auth: Some(TesBackendAuthConfig::Bearer(BearerAuthConfig {
1812                            token: "secret".into(),
1813                        })),
1814                        ..Default::default()
1815                    }),
1816                ),
1817            ]
1818            .into(),
1819            storage: StorageConfig {
1820                azure: AzureStorageConfig {
1821                    auth: Some(AzureStorageAuthConfig {
1822                        account_name: "foo".into(),
1823                        access_key: "secret".into(),
1824                    }),
1825                },
1826                s3: S3StorageConfig {
1827                    auth: Some(S3StorageAuthConfig {
1828                        access_key_id: "foo".into(),
1829                        secret_access_key: "secret".into(),
1830                    }),
1831                    ..Default::default()
1832                },
1833                google: GoogleStorageConfig {
1834                    auth: Some(GoogleStorageAuthConfig {
1835                        access_key: "foo".into(),
1836                        secret: "secret".into(),
1837                    }),
1838                },
1839            },
1840            ..Default::default()
1841        };
1842
1843        let json = serde_json::to_string_pretty(&config).unwrap();
1844        assert!(json.contains("secret"), "`{json}` contains a secret");
1845    }
1846
1847    #[tokio::test]
1848    async fn test_config_validate() {
1849        // Test invalid task config
1850        let mut config = Config::default();
1851        config.task.retries = Some(1000000);
1852        assert_eq!(
1853            config.validate().await.unwrap_err().to_string(),
1854            "configuration value `task.retries` cannot exceed 100"
1855        );
1856
1857        // Test invalid scatter concurrency config
1858        let mut config = Config::default();
1859        config.workflow.scatter.concurrency = Some(0);
1860        assert_eq!(
1861            config.validate().await.unwrap_err().to_string(),
1862            "configuration value `workflow.scatter.concurrency` cannot be zero"
1863        );
1864
1865        // Test invalid backend name
1866        let config = Config {
1867            backend: Some("foo".into()),
1868            ..Default::default()
1869        };
1870        assert_eq!(
1871            config.validate().await.unwrap_err().to_string(),
1872            "a backend named `foo` is not present in the configuration"
1873        );
1874        let config = Config {
1875            backend: Some("bar".into()),
1876            backends: [("foo".to_string(), BackendConfig::default())].into(),
1877            ..Default::default()
1878        };
1879        assert_eq!(
1880            config.validate().await.unwrap_err().to_string(),
1881            "a backend named `bar` is not present in the configuration"
1882        );
1883
1884        // Test a singular backend
1885        let config = Config {
1886            backends: [("foo".to_string(), BackendConfig::default())].into(),
1887            ..Default::default()
1888        };
1889        config.validate().await.expect("config should validate");
1890
1891        // Test invalid local backend cpu config
1892        let config = Config {
1893            backends: [(
1894                "default".to_string(),
1895                BackendConfig::Local(LocalBackendConfig {
1896                    cpu: Some(0),
1897                    ..Default::default()
1898                }),
1899            )]
1900            .into(),
1901            ..Default::default()
1902        };
1903        assert_eq!(
1904            config.validate().await.unwrap_err().to_string(),
1905            "local backend configuration value `cpu` cannot be zero"
1906        );
1907        let config = Config {
1908            backends: [(
1909                "default".to_string(),
1910                BackendConfig::Local(LocalBackendConfig {
1911                    cpu: Some(10000000),
1912                    ..Default::default()
1913                }),
1914            )]
1915            .into(),
1916            ..Default::default()
1917        };
1918        assert!(
1919            config
1920                .validate()
1921                .await
1922                .unwrap_err()
1923                .to_string()
1924                .starts_with(
1925                    "local backend configuration value `cpu` cannot exceed the virtual CPUs \
1926                     available to the host"
1927                )
1928        );
1929
1930        // Test invalid local backend memory config
1931        let config = Config {
1932            backends: [(
1933                "default".to_string(),
1934                BackendConfig::Local(LocalBackendConfig {
1935                    memory: Some("0 GiB".to_string()),
1936                    ..Default::default()
1937                }),
1938            )]
1939            .into(),
1940            ..Default::default()
1941        };
1942        assert_eq!(
1943            config.validate().await.unwrap_err().to_string(),
1944            "local backend configuration value `memory` cannot be zero"
1945        );
1946        let config = Config {
1947            backends: [(
1948                "default".to_string(),
1949                BackendConfig::Local(LocalBackendConfig {
1950                    memory: Some("100 meows".to_string()),
1951                    ..Default::default()
1952                }),
1953            )]
1954            .into(),
1955            ..Default::default()
1956        };
1957        assert_eq!(
1958            config.validate().await.unwrap_err().to_string(),
1959            "local backend configuration value `memory` has invalid value `100 meows`"
1960        );
1961
1962        let config = Config {
1963            backends: [(
1964                "default".to_string(),
1965                BackendConfig::Local(LocalBackendConfig {
1966                    memory: Some("1000 TiB".to_string()),
1967                    ..Default::default()
1968                }),
1969            )]
1970            .into(),
1971            ..Default::default()
1972        };
1973        assert!(
1974            config
1975                .validate()
1976                .await
1977                .unwrap_err()
1978                .to_string()
1979                .starts_with(
1980                    "local backend configuration value `memory` cannot exceed the total memory of \
1981                     the host"
1982                )
1983        );
1984
1985        // Test missing TES URL
1986        let config = Config {
1987            backends: [(
1988                "default".to_string(),
1989                BackendConfig::Tes(Default::default()),
1990            )]
1991            .into(),
1992            ..Default::default()
1993        };
1994        assert_eq!(
1995            config.validate().await.unwrap_err().to_string(),
1996            "TES backend configuration value `url` is required"
1997        );
1998
1999        // Test TES invalid max concurrency
2000        let config = Config {
2001            backends: [(
2002                "default".to_string(),
2003                BackendConfig::Tes(TesBackendConfig {
2004                    url: Some("https://example.com".parse().unwrap()),
2005                    max_concurrency: Some(0),
2006                    ..Default::default()
2007                }),
2008            )]
2009            .into(),
2010            ..Default::default()
2011        };
2012        assert_eq!(
2013            config.validate().await.unwrap_err().to_string(),
2014            "TES backend configuration value `max_concurrency` cannot be zero"
2015        );
2016
2017        // Insecure TES URL
2018        let config = Config {
2019            backends: [(
2020                "default".to_string(),
2021                BackendConfig::Tes(TesBackendConfig {
2022                    url: Some("http://example.com".parse().unwrap()),
2023                    inputs: Some("http://example.com".parse().unwrap()),
2024                    outputs: Some("http://example.com".parse().unwrap()),
2025                    ..Default::default()
2026                }),
2027            )]
2028            .into(),
2029            ..Default::default()
2030        };
2031        assert_eq!(
2032            config.validate().await.unwrap_err().to_string(),
2033            "TES backend configuration value `url` has invalid value `http://example.com/`: URL \
2034             must use a HTTPS scheme"
2035        );
2036
2037        // Allow insecure URL
2038        let config = Config {
2039            backends: [(
2040                "default".to_string(),
2041                BackendConfig::Tes(TesBackendConfig {
2042                    url: Some("http://example.com".parse().unwrap()),
2043                    inputs: Some("http://example.com".parse().unwrap()),
2044                    outputs: Some("http://example.com".parse().unwrap()),
2045                    insecure: true,
2046                    ..Default::default()
2047                }),
2048            )]
2049            .into(),
2050            ..Default::default()
2051        };
2052        config
2053            .validate()
2054            .await
2055            .expect("configuration should validate");
2056
2057        let mut config = Config::default();
2058        config.http.parallelism = Some(0);
2059        assert_eq!(
2060            config.validate().await.unwrap_err().to_string(),
2061            "configuration value `http.parallelism` cannot be zero"
2062        );
2063
2064        let mut config = Config::default();
2065        config.http.parallelism = Some(5);
2066        assert!(
2067            config.validate().await.is_ok(),
2068            "should pass for valid configuration"
2069        );
2070
2071        let mut config = Config::default();
2072        config.http.parallelism = None;
2073        assert!(
2074            config.validate().await.is_ok(),
2075            "should pass for default (None)"
2076        );
2077
2078        // Test invalid LSF job name prefix
2079        #[cfg(unix)]
2080        {
2081            let job_name_prefix = "A".repeat(MAX_LSF_JOB_NAME_PREFIX * 2);
2082            let mut config = Config {
2083                experimental_features_enabled: true,
2084                ..Default::default()
2085            };
2086            config.backends.insert(
2087                "default".to_string(),
2088                BackendConfig::LsfApptainer(LsfApptainerBackendConfig {
2089                    job_name_prefix: Some(job_name_prefix.clone()),
2090                    ..Default::default()
2091                }),
2092            );
2093            assert_eq!(
2094                config.validate().await.unwrap_err().to_string(),
2095                format!("LSF job name prefix `{job_name_prefix}` exceeds the maximum 100 bytes")
2096            );
2097        }
2098    }
2099}