wdl-engine 0.13.2

Execution engine for Workflow Description Language (WDL) documents.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
//! Support for using Apptainer (a.k.a. Singularity) container runtime.
//!
//! There are two primary responsibilities of this module: `.sif` image cache
//! management and command script generation.
//!
//! The entrypoint for both of these is [`ApptainerRuntime::generate_script`].

use std::collections::HashMap;
use std::fmt::Write as _;
use std::path::Path;
use std::path::PathBuf;
use std::path::absolute;
use std::process::Stdio;
use std::sync::Arc;
use std::sync::Mutex;

use anyhow::Context as _;
use anyhow::Result;
use anyhow::anyhow;
use anyhow::bail;
use tokio::process::Command;
use tokio::sync::OnceCell;
use tokio_retry2::Retry;
use tokio_retry2::RetryError;
use tokio_retry2::strategy::ExponentialBackoff;
use tokio_util::sync::CancellationToken;
use tracing::debug;
use tracing::warn;

use crate::Value;
use crate::backend::ExecuteTaskRequest;
use crate::backend::PullResults;
use crate::config::ApptainerConfig;
use crate::v1::requirements::ContainerSource;

/// The name of the images cache directory.
const IMAGES_CACHE_DIR: &str = "apptainer-images";

/// The guest working directory.
const GUEST_WORK_DIR: &str = "/mnt/task/work";

/// The guest path for the command file.
const GUEST_COMMAND_PATH: &str = "/mnt/task/command";

/// The path to the container's stdout.
const GUEST_STDOUT_PATH: &str = "/mnt/task/stdout";

/// The path to the container's stderr.
const GUEST_STDERR_PATH: &str = "/mnt/task/stderr";

/// The environment variable prefix for Apptainer.
const APPTAINER_ENV_PREFIX: &str = "APPTAINERENV";

/// The environment variable prefix for Singularity.
const SINGULARITY_ENV_PREFIX: &str = "SINGULARITYENV";

/// Represents the Apptainer container runtime.
#[derive(Debug)]
pub struct ApptainerRuntime {
    /// The cache directory for `.sif` images.
    cache_dir: PathBuf,
    /// The map of container source to `.sif` path.
    images: Mutex<HashMap<ContainerSource, Arc<OnceCell<PathBuf>>>>,
}

impl ApptainerRuntime {
    /// Creates a new [`ApptainerRuntime`] with the specified root directory.
    ///
    /// If `image_cache_dir` is provided, it is used as the directory for
    /// caching `.sif` images. Otherwise, a default subdirectory is created
    /// within the given root.
    pub fn new(root_dir: &Path, image_cache_dir: Option<&Path>) -> Result<Self> {
        let cache_dir = image_cache_dir
            .map(Path::to_path_buf)
            .unwrap_or_else(|| root_dir.join(IMAGES_CACHE_DIR));

        Ok(Self {
            cache_dir: absolute(&cache_dir).with_context(|| {
                format!(
                    "failed to make path `{path}` absolute",
                    path = cache_dir.display()
                )
            })?,
            images: Default::default(),
        })
    }

    /// Generates the script to run the given task using the Apptainer runtime.
    ///
    /// Returns the generated script along with the [`ContainerSource`] that
    /// was actually pulled and selected for execution.
    ///
    /// # Shared filesystem assumptions
    ///
    /// The returned script should be run in an environment that shares a
    /// filesystem with the environment where this method is invoked, except
    /// for node-specific mounts like `/tmp` and `/var`. This assumption
    /// typically holds on HPC systems with shared filesystems like Lustre or
    /// GPFS.
    pub async fn generate_script(
        &self,
        config: &ApptainerConfig,
        shell: &str,
        request: &ExecuteTaskRequest<'_>,
        token: CancellationToken,
    ) -> Result<Option<(String, ContainerSource)>> {
        let results = match self
            .pull_first_available_image(
                &config.executable,
                request
                    .constraints
                    .container
                    .as_deref()
                    .ok_or_else(|| anyhow!("task does not use a container"))?,
                token,
            )
            .await
        {
            Some(results) => results,
            None => return Ok(None),
        };

        let (container, path) = results
            .successful_container()
            .ok_or_else(|| anyhow!("{results}"))?;
        let container = container.clone();
        let path = path.clone();

        Ok(Some((
            self.generate_apptainer_script(config, shell, &path, request)
                .await?,
            container,
        )))
    }

    /// Generate the script, given a container path that's already assumed to be
    /// populated.
    ///
    /// This is a separate method in order to facilitate testing, and should not
    /// be called from outside this module.
    async fn generate_apptainer_script(
        &self,
        config: &ApptainerConfig,
        shell: &str,
        container_sif: &Path,
        request: &ExecuteTaskRequest<'_>,
    ) -> Result<String> {
        // Create a temp dir for the container's execution within the attempt dir
        // hierarchy. On many HPC systems, `/tmp` is mapped to a relatively
        // small, local scratch disk that can fill up easily. Mapping the
        // container's `/tmp` and `/var/tmp` paths to the filesystem we're using
        // for other inputs and outputs prevents this from being a capacity problem,
        // though potentially at the expense of execution speed if the
        // non-`/tmp` filesystem is significantly slower.
        let container_tmp_path = request.temp_dir.join("container_tmp");
        tokio::fs::DirBuilder::new()
            .recursive(true)
            .create(&container_tmp_path)
            .await
            .with_context(|| {
                format!(
                    "failed to create container /tmp directory at `{path}`",
                    path = container_tmp_path.display()
                )
            })?;
        let container_var_tmp_path = request.temp_dir.join("container_var_tmp");
        tokio::fs::DirBuilder::new()
            .recursive(true)
            .create(&container_var_tmp_path)
            .await
            .with_context(|| {
                format!(
                    "failed to create container /var/tmp directory at `{path}`",
                    path = container_var_tmp_path.display()
                )
            })?;

        let env_prefix = if config.executable.contains("singularity") {
            SINGULARITY_ENV_PREFIX
        } else {
            APPTAINER_ENV_PREFIX
        };

        let mut apptainer_command = String::new();
        writeln!(&mut apptainer_command, "#!/usr/bin/env bash")?;
        for (k, v) in request.env.iter() {
            writeln!(&mut apptainer_command, "export {env_prefix}_{k}={v:?}")?;
        }
        writeln!(&mut apptainer_command, "{} -v exec \\", config.executable)?;
        writeln!(&mut apptainer_command, "--pwd \"{GUEST_WORK_DIR}\" \\")?;
        writeln!(&mut apptainer_command, "--containall --cleanenv \\")?;
        for input in request.backend_inputs {
            writeln!(
                &mut apptainer_command,
                "--mount type=bind,src=\"{host_path}\",dst=\"{guest_path}\",ro \\",
                host_path = input
                    .local_path()
                    .ok_or_else(|| anyhow!("input not localized: {input:?}"))?
                    .display(),
                guest_path = input
                    .guest_path()
                    .ok_or_else(|| anyhow!("guest path missing: {input:?}"))?,
            )?;
        }
        writeln!(
            &mut apptainer_command,
            "--mount type=bind,src=\"{}\",dst=\"{GUEST_COMMAND_PATH}\",ro \\",
            request.command_path().display()
        )?;
        writeln!(
            &mut apptainer_command,
            "--mount type=bind,src=\"{}\",dst=\"{GUEST_WORK_DIR}\" \\",
            request.work_dir().display()
        )?;
        writeln!(
            &mut apptainer_command,
            "--mount type=bind,src=\"{}\",dst=\"/tmp\" \\",
            container_tmp_path.display()
        )?;
        writeln!(
            &mut apptainer_command,
            "--mount type=bind,src=\"{}\",dst=\"/var/tmp\" \\",
            container_var_tmp_path.display()
        )?;
        writeln!(
            &mut apptainer_command,
            "--mount type=bind,src=\"{}\",dst=\"{GUEST_STDOUT_PATH}\" \\",
            request.stdout_path().display()
        )?;
        writeln!(
            &mut apptainer_command,
            "--mount type=bind,src=\"{}\",dst=\"{GUEST_STDERR_PATH}\" \\",
            request.stderr_path().display()
        )?;

        if let Some(true) = request
            .requirements
            .get(wdl_ast::v1::TASK_REQUIREMENT_GPU)
            .and_then(Value::as_boolean)
        {
            writeln!(&mut apptainer_command, "--nv \\")?;
        }

        for arg in config
            .extra_apptainer_exec_args
            .as_deref()
            .unwrap_or_default()
        {
            writeln!(&mut apptainer_command, "{arg} \\")?;
        }

        writeln!(&mut apptainer_command, "\"{}\" \\", container_sif.display())?;
        writeln!(
            &mut apptainer_command,
            "{shell} -c \"\\\"{GUEST_COMMAND_PATH}\\\" > \\\"{GUEST_STDOUT_PATH}\\\" 2> \
             \\\"{GUEST_STDERR_PATH}\\\"\" \\"
        )?;
        let attempt_dir = request.attempt_dir;
        let apptainer_stdout_path = attempt_dir.join("apptainer.stdout");
        let apptainer_stderr_path = attempt_dir.join("apptainer.stderr");
        writeln!(
            &mut apptainer_command,
            "> \"{stdout}\" 2> \"{stderr}\"",
            stdout = apptainer_stdout_path.display(),
            stderr = apptainer_stderr_path.display()
        )?;
        Ok(apptainer_command)
    }

    /// Pulls the image for the given container source and returns the path to
    /// the image file (SIF).
    ///
    /// If the container source is already a SIF file, the given source path is
    /// returned.
    ///
    /// If the image has already been pulled, the pull is skipped and the path
    /// to the previous location is returned.
    pub(crate) async fn pull_image(
        &self,
        executable: &str,
        container: &ContainerSource,
        token: CancellationToken,
    ) -> Result<Option<PathBuf>> {
        // For local SIF files, return the path directly.
        if let ContainerSource::SifFile(path) = container {
            return Ok(Some(path.clone()));
        }

        // For unknown container sources, error early.
        if let ContainerSource::Unknown(s) = container {
            bail!("unknown container source `{s}`");
        }

        // For registry-based images, pull and cache.
        let once = {
            let mut map = self.images.lock().unwrap();
            map.entry(container.clone())
                .or_insert_with(|| Arc::new(OnceCell::new()))
                .clone()
        };

        let pull = once.get_or_try_init(|| async move {
            // SAFETY: the next two `unwrap` calls are safe because the source can't be a
            // file or an unknown source at this point
            let mut path = self.cache_dir.join(container.scheme().unwrap());
            for part in container.name().unwrap().split("/") {
                for part in part.split(':') {
                    path.push(part);
                }
            }

            path.add_extension("sif");

            if path.exists() {
                debug!(path = %path.display(), "Apptainer image `{container:#}` already cached; using existing image");
                return Ok(path);
            }

            if let Some(parent) = path.parent() {
                tokio::fs::create_dir_all(parent).await.with_context(|| {
                    format!(
                        "failed to create directory `{parent}`",
                        parent = parent.display()
                    )
                })?;
            }

            let container = format!("{container:#}");
            let executable = executable.to_string();

            Retry::spawn_notify(
                // TODO ACF 2025-09-22: configure the retry behavior based on actual experience
                // with flakiness of the container registries. This is a
                // finger-in-the-wind guess at some reasonable parameters that
                // shouldn't lead to us making our own problems worse by
                // overwhelming registries with repeated retries.
                ExponentialBackoff::from_millis(50)
                    .max_delay_millis(60_000)
                    .take(10),
                || Self::try_pull_image(&executable, &container, &path),
                {
                    let executable = executable.clone();
                    move |e: &anyhow::Error, _| {
                        warn!(e = %e, "`{executable} pull` failed");
                    }
                },
            )
            .await
            .with_context(|| format!("failed pulling Apptainer image `{container}`"))?;

            debug!(path = %path.display(), "Apptainer image `{container}` pulled successfully");
            Ok(path)
        });

        tokio::select! {
            _ = token.cancelled() => Ok(None),
            res = pull => res.map(|p| Some(p.clone())),
        }
    }

    /// Attempts to pull the first available image from a list of candidates.
    ///
    /// Iterates through the candidates in order, returning the path of the
    /// first image that pulls successfully. Returns a [`PullResults`]
    /// containing the outcome of each attempt, stopping after the first
    /// success. Returns `None` if a pull was cancelled.
    pub(crate) async fn pull_first_available_image(
        &self,
        executable: &str,
        candidates: &[ContainerSource],
        token: CancellationToken,
    ) -> Option<PullResults<PathBuf>> {
        let mut results = PullResults::default();

        for candidate in candidates {
            debug!("attempting to pull container image `{candidate:#}`");
            match self.pull_image(executable, candidate, token.clone()).await {
                Ok(Some(path)) => {
                    debug!("successfully pulled container image `{candidate:#}`");
                    results.push(candidate.clone(), Ok(path));
                    return Some(results);
                }
                Ok(None) => return None,
                Err(e) => {
                    warn!("failed to pull container image `{candidate:#}`: {e:#}");
                    results.push(candidate.clone(), Err(e));
                }
            }
        }

        Some(results)
    }

    /// Tries to pull an image.
    ///
    /// The tricky thing about this function is determining whether a failure is
    /// transient or permanent. When in doubt, choose transient; the downside is
    /// a permanent failure may take longer to finally bring down an
    /// execution, but this is better for a long-running task than letting a
    /// transient failure bring it down before a retry.
    ///
    /// `apptainer pull` doesn't have a well-defined interface for us to tell
    /// whether a failure is transient, but as we gain experience recognizing
    /// its output patterns, we can enhance the fidelity of the error
    /// handling.
    async fn try_pull_image(
        executable: &str,
        image: &str,
        path: &Path,
    ) -> Result<(), RetryError<anyhow::Error>> {
        debug!("spawning `{executable}` to pull image `{image}`");

        let child = Command::new(executable)
            .stdin(Stdio::null())
            .stdout(Stdio::piped())
            .stderr(Stdio::piped())
            .arg("pull")
            .arg(path)
            .arg(image)
            .spawn()
            .with_context(|| {
                format!(
                    "failed to spawn `{executable} pull '{path}' '{image}'`",
                    path = path.display()
                )
            })
            // If the system can't handle spawning a process, we're better off failing quickly
            .map_err(RetryError::permanent)?;

        let output = child
            .wait_with_output()
            .await
            .context(format!("failed to wait for `{executable}`"))
            .map_err(RetryError::permanent)?;
        if !output.status.success() {
            let permanent = if let Ok(stderr) = str::from_utf8(&output.stderr) {
                let mut permanent = false;
                // A collection of strings observed in `apptainer pull` stderr in unrecoverable
                // conditions. Finding one of these in the output marks the attempt as a
                // permanent failure.
                let needles = ["manifest unknown", "403 (Forbidden)"];
                for needle in needles {
                    if stderr.contains(needle) {
                        permanent = true;
                        break;
                    }
                }

                permanent
            } else {
                false
            };

            let e = anyhow!(
                "`{executable}` failed: {status}: {stderr}",
                status = output.status,
                stderr = str::from_utf8(&output.stderr)
                    .unwrap_or("<output not UTF-8>")
                    .trim()
            );
            return if permanent {
                Err(RetryError::permanent(e))
            } else {
                Err(RetryError::transient(e))
            };
        }

        Ok(())
    }
}

#[cfg(test)]
mod tests {
    use indexmap::IndexMap;
    use tempfile::TempDir;
    use url::Url;

    use super::*;
    use crate::ONE_GIBIBYTE;
    use crate::TaskInputs;
    use crate::backend::ExecuteTaskRequest;
    use crate::backend::TaskExecutionConstraints;
    use crate::config::DEFAULT_TASK_SHELL;

    #[tokio::test]
    async fn example_task_generates() {
        let root = TempDir::new().unwrap();

        let mut env = IndexMap::new();
        env.insert("FOO".to_string(), "bar".to_string());
        env.insert("BAZ".to_string(), "\"quux\"".to_string());

        let runtime = ApptainerRuntime::new(&root.path().join("runs"), None).unwrap();
        let _ = runtime
            .generate_script(
                &ApptainerConfig::default(),
                DEFAULT_TASK_SHELL,
                &ExecuteTaskRequest {
                    id: "example-task",
                    command: "echo hello",
                    inputs: &TaskInputs::default(),
                    backend_inputs: &[],
                    requirements: &Default::default(),
                    hints: &Default::default(),
                    env: &env,
                    constraints: &TaskExecutionConstraints {
                        container: Some(vec![
                            String::from(
                                Url::from_file_path(root.path().join("non-existent.sif")).unwrap(),
                            )
                            .parse()
                            .unwrap(),
                        ]),
                        cpu: 1.0,
                        memory: ONE_GIBIBYTE as u64,
                        gpu: Default::default(),
                        fpga: Default::default(),
                        disks: Default::default(),
                    },
                    attempt_dir: &root.path().join("0"),
                    temp_dir: &root.path().join("temp"),
                },
                CancellationToken::new(),
            )
            .await
            .inspect_err(|e| eprintln!("{e:#?}"))
            .expect("example task script should generate");
    }

    // `shellcheck` works quite differently on Windows, and since we're not going to
    // run Apptainer on Windows anytime soon, we limit this test to Unixy
    // systems
    #[cfg(unix)]
    #[tokio::test]
    async fn example_task_shellchecks() {
        use tokio::process::Command;

        use crate::config::DEFAULT_TASK_SHELL;

        let root = TempDir::new().unwrap();

        let mut env = IndexMap::new();
        env.insert("FOO".to_string(), "bar".to_string());
        env.insert("BAZ".to_string(), "\"quux\"".to_string());

        let runtime = ApptainerRuntime::new(&root.path().join("runs"), None).unwrap();
        let (script, _) = runtime
            .generate_script(
                &ApptainerConfig::default(),
                DEFAULT_TASK_SHELL,
                &ExecuteTaskRequest {
                    id: "example-task",
                    command: "echo hello",
                    inputs: &TaskInputs::default(),
                    backend_inputs: &[],
                    requirements: &Default::default(),
                    hints: &Default::default(),
                    env: &env,
                    constraints: &TaskExecutionConstraints {
                        container: Some(vec![
                            String::from(
                                Url::from_file_path(root.path().join("non-existent.sif")).unwrap(),
                            )
                            .parse()
                            .unwrap(),
                        ]),
                        cpu: 1.0,
                        memory: ONE_GIBIBYTE as u64,
                        gpu: Default::default(),
                        fpga: Default::default(),
                        disks: Default::default(),
                    },
                    attempt_dir: &root.path().join("0"),
                    temp_dir: &root.path().join("temp"),
                },
                CancellationToken::new(),
            )
            .await
            .inspect_err(|e| eprintln!("{e:#?}"))
            .expect("example task script should generate")
            .expect("operation should not be canceled");
        let script_file = root.path().join("apptainer_script");
        tokio::fs::write(&script_file, &script)
            .await
            .expect("can write script to disk");
        let shellcheck_status = Command::new("shellcheck")
            .arg("--shell=bash")
            .arg("--severity=style")
            // all the quotes in the generated `--mount` args look suspicious but are okay
            .arg("--exclude=SC2140")
            .arg(&script_file)
            .status()
            .await
            .unwrap();
        assert!(shellcheck_status.success());
    }
}