wdl_engine/backend/
apptainer.rs

1//! Support for using Apptainer (a.k.a. Singularity) as an in-place container
2//! runtime for task execution.
3//!
4//! There are two primary responsibilities of this module: `.sif` image cache
5//! management and command script generation. The entrypoint for both of these
6//! is [`ApptainerConfig::prepare_apptainer_command()`].
7
8use std::fmt::Write as _;
9use std::path::Path;
10
11use anyhow::Context as _;
12use anyhow::anyhow;
13use images::ApptainerImages;
14use tokio_util::sync::CancellationToken;
15use tracing::warn;
16
17use super::TaskSpawnRequest;
18use crate::Value;
19
20mod images;
21
22/// The guest working directory.
23const GUEST_WORK_DIR: &str = "/mnt/task/work";
24
25/// The guest path for the command file.
26const GUEST_COMMAND_PATH: &str = "/mnt/task/command";
27
28/// The path to the container's stdout.
29const GUEST_STDOUT_PATH: &str = "/mnt/task/stdout";
30
31/// The path to the container's stderr.
32const GUEST_STDERR_PATH: &str = "/mnt/task/stderr";
33
34/// Configuration for the Apptainer container runtime.
35#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
36pub struct ApptainerConfig {
37    /// Additional command-line arguments to pass to `apptainer exec` when
38    /// executing tasks.
39    pub extra_apptainer_exec_args: Option<Vec<String>>,
40    /// Deprecated field.
41    ///
42    /// This was kept for compatibility with previous versions of the Apptainer
43    /// configuration fields, and may be removed in a future version. The
44    /// Apptainer images are now stored at the top level root directory of an
45    /// evaluation.
46    #[serde(default)]
47    #[deprecated]
48    pub apptainer_images_dir: Option<String>,
49}
50
51impl ApptainerConfig {
52    /// Validate that Apptainer is appropriately configured.
53    pub async fn validate(&self) -> Result<(), anyhow::Error> {
54        #[expect(deprecated)]
55        if self.apptainer_images_dir.is_some() {
56            warn!(
57                "`apptainer_images_dir` is deprecated and no longer has an effect. Converted \
58                 images are stored in the output directory for each run."
59            );
60        }
61        Ok(())
62    }
63}
64
65/// The state of an Apptainer backend for a given top-level execution.
66///
67/// As Apptainer-converted images are shared throughout all task executions of a
68/// given invocation, only one of these state structures should be constructed
69/// per top-level task/workflow execution.
70#[derive(Debug)]
71pub struct ApptainerState {
72    /// The config.
73    config: ApptainerConfig,
74    /// The images.
75    images: ApptainerImages,
76}
77
78impl ApptainerState {
79    /// Create a new [`ApptainerState`].
80    // TODO ACF 2025-11-18: Here's a good example of why we should have separate
81    // config types for user-facing configuration and internal use. The root dir is
82    // really a configuration option, but we want the engine to be able to specify
83    // it based on how it's invoked. We *don't* want it to be something the user of
84    // `sprocket` can put in their `sprocket.toml`. We can and have played games
85    // with hiding certain fields from `serde`, but really we should rearrange these
86    // types so that we have more direct control over what's in the user interface
87    // vs not.
88    pub fn new(config: &ApptainerConfig, run_root_dir: &Path) -> Self {
89        let images = ApptainerImages::new(run_root_dir);
90        Self {
91            config: config.clone(),
92            images,
93        }
94    }
95
96    /// Prepare for an Apptainer execution by ensuring the image cache is
97    /// populated with the necessary container, and return a Bash script
98    /// that invokes the task's `command` in the container context.
99    ///
100    /// # Shared filesystem assumptions
101    ///
102    /// The returned script should be run in an environment that shares a
103    /// filesystem with the environment where this method is invoked, except
104    /// for node-specific mounts like `/tmp` and `/var`. This assumption
105    /// typically holds on HPC systems with shared filesystems like Lustre or
106    /// GPFS.
107    pub async fn prepare_apptainer_command(
108        &self,
109        container: &str,
110        cancellation_token: CancellationToken,
111        spawn_request: &TaskSpawnRequest,
112    ) -> Result<String, anyhow::Error> {
113        let container_sif = self
114            .images
115            .sif_for_container(container, cancellation_token)
116            .await?;
117        self.generate_apptainer_script(&container_sif, spawn_request)
118            .await
119    }
120
121    /// Generate the script, given a container path that's already assumed to be
122    /// populated.
123    ///
124    /// This is a separate method in order to facilitate testing, and should not
125    /// be called from outside this module.
126    async fn generate_apptainer_script(
127        &self,
128        container_sif: &Path,
129        spawn_request: &TaskSpawnRequest,
130    ) -> Result<String, anyhow::Error> {
131        // Create a temp dir for the container's execution within the attempt dir
132        // hierarchy. On many HPC systems, `/tmp` is mapped to a relatively
133        // small, local scratch disk that can fill up easily. Mapping the
134        // container's `/tmp` and `/var/tmp` paths to the filesystem we're using
135        // for other inputs and outputs prevents this from being a capacity problem,
136        // though potentially at the expense of execution speed if the
137        // non-`/tmp` filesystem is significantly slower.
138        let container_tmp_path = spawn_request.temp_dir().join("container_tmp");
139        tokio::fs::DirBuilder::new()
140            .recursive(true)
141            .create(&container_tmp_path)
142            .await
143            .with_context(|| {
144                format!(
145                    "failed to create container /tmp directory at `{path}`",
146                    path = container_tmp_path.display()
147                )
148            })?;
149        let container_var_tmp_path = spawn_request.temp_dir().join("container_var_tmp");
150        tokio::fs::DirBuilder::new()
151            .recursive(true)
152            .create(&container_var_tmp_path)
153            .await
154            .with_context(|| {
155                format!(
156                    "failed to create container /var/tmp directory at `{path}`",
157                    path = container_var_tmp_path.display()
158                )
159            })?;
160
161        let mut apptainer_command = String::new();
162        writeln!(&mut apptainer_command, "#!/usr/bin/env bash")?;
163        for (k, v) in spawn_request.env().iter() {
164            writeln!(&mut apptainer_command, "export APPTAINERENV_{k}={v:?}")?;
165        }
166        writeln!(&mut apptainer_command, "apptainer -v exec \\")?;
167        writeln!(&mut apptainer_command, "--pwd \"{GUEST_WORK_DIR}\" \\")?;
168        writeln!(&mut apptainer_command, "--containall --cleanenv \\")?;
169        for input in spawn_request.inputs() {
170            writeln!(
171                &mut apptainer_command,
172                "--mount type=bind,src=\"{host_path}\",dst=\"{guest_path}\",ro \\",
173                host_path = input
174                    .local_path()
175                    .ok_or_else(|| anyhow!("input not localized: {input:?}"))?
176                    .display(),
177                guest_path = input
178                    .guest_path()
179                    .ok_or_else(|| anyhow!("guest path missing: {input:?}"))?,
180            )?;
181        }
182        writeln!(
183            &mut apptainer_command,
184            "--mount type=bind,src=\"{}\",dst=\"{GUEST_COMMAND_PATH}\",ro \\",
185            spawn_request.wdl_command_host_path().display()
186        )?;
187        writeln!(
188            &mut apptainer_command,
189            "--mount type=bind,src=\"{}\",dst=\"{GUEST_WORK_DIR}\" \\",
190            spawn_request.wdl_work_dir_host_path().display()
191        )?;
192        writeln!(
193            &mut apptainer_command,
194            "--mount type=bind,src=\"{}\",dst=\"/tmp\" \\",
195            container_tmp_path.display()
196        )?;
197        writeln!(
198            &mut apptainer_command,
199            "--mount type=bind,src=\"{}\",dst=\"/var/tmp\" \\",
200            container_var_tmp_path.display()
201        )?;
202        writeln!(
203            &mut apptainer_command,
204            "--mount type=bind,src=\"{}\",dst=\"{GUEST_STDOUT_PATH}\" \\",
205            spawn_request.wdl_stdout_host_path().display()
206        )?;
207        writeln!(
208            &mut apptainer_command,
209            "--mount type=bind,src=\"{}\",dst=\"{GUEST_STDERR_PATH}\" \\",
210            spawn_request.wdl_stderr_host_path().display()
211        )?;
212        if let Some(true) = spawn_request
213            .requirements()
214            .get(wdl_ast::v1::TASK_REQUIREMENT_GPU)
215            .and_then(Value::as_boolean)
216        {
217            writeln!(&mut apptainer_command, "--nv \\")?;
218        }
219        if let Some(args) = &self.config.extra_apptainer_exec_args {
220            for arg in args {
221                writeln!(&mut apptainer_command, "{arg} \\")?;
222            }
223        }
224        writeln!(&mut apptainer_command, "\"{}\" \\", container_sif.display())?;
225        writeln!(
226            &mut apptainer_command,
227            "bash -c \"\\\"{GUEST_COMMAND_PATH}\\\" > \\\"{GUEST_STDOUT_PATH}\\\" 2> \
228             \\\"{GUEST_STDERR_PATH}\\\"\" \\"
229        )?;
230        let attempt_dir = spawn_request.attempt_dir();
231        let apptainer_stdout_path = attempt_dir.join("apptainer.stdout");
232        let apptainer_stderr_path = attempt_dir.join("apptainer.stderr");
233        writeln!(
234            &mut apptainer_command,
235            "> \"{stdout}\" 2> \"{stderr}\"",
236            stdout = apptainer_stdout_path.display(),
237            stderr = apptainer_stderr_path.display()
238        )?;
239        Ok(apptainer_command)
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use std::collections::HashMap;
246    use std::sync::Arc;
247
248    use indexmap::IndexMap;
249    use tempfile::TempDir;
250    use tokio::process::Command;
251
252    use super::*;
253    use crate::TaskSpawnInfo;
254    use crate::http::Transferer;
255    use crate::v1::test::TestEnv;
256
257    fn mk_example_task() -> (TempDir, ApptainerState, TaskSpawnRequest) {
258        let tmp = tempfile::tempdir().unwrap();
259        let state = ApptainerState::new(&ApptainerConfig::default(), tmp.path());
260        let mut env = IndexMap::new();
261        env.insert("FOO".to_string(), "bar".to_string());
262        env.insert("BAZ".to_string(), "\"quux\"".to_string());
263        let info = TaskSpawnInfo::new(
264            "echo hello".to_string(),
265            vec![],
266            HashMap::new().into(),
267            HashMap::new().into(),
268            env.into(),
269            Arc::new(TestEnv::default()) as Arc<dyn Transferer>,
270        );
271        let spawn_request = TaskSpawnRequest {
272            id: "example_task".to_string(),
273            info,
274            attempt: 0,
275            attempt_dir: tmp.path().join("0"),
276            task_eval_root: tmp.path().to_path_buf(),
277            temp_dir: tmp.path().join("tmp"),
278        };
279        (tmp, state, spawn_request)
280    }
281
282    #[tokio::test]
283    async fn example_task_generates() {
284        let (tmp, state, spawn_request) = mk_example_task();
285        let _ = state
286            .generate_apptainer_script(&tmp.path().join("non-existent.sif"), &spawn_request)
287            .await
288            .inspect_err(|e| eprintln!("{e:#?}"))
289            .expect("example task script should generate");
290    }
291
292    #[tokio::test]
293    // `shellcheck` works quite differently on Windows, and since we're not going to run Apptainer
294    // on Windows anytime soon, we limit this test to Unixy systems
295    #[cfg(unix)]
296    async fn example_task_shellchecks() {
297        let (tmp, state, spawn_request) = mk_example_task();
298        let script = state
299            .generate_apptainer_script(&tmp.path().join("non-existent.sif"), &spawn_request)
300            .await
301            .inspect_err(|e| eprintln!("{e:#?}"))
302            .expect("example task script should generate");
303        let script_file = tmp.path().join("apptainer_script");
304        tokio::fs::write(&script_file, &script)
305            .await
306            .expect("can write script to disk");
307        let shellcheck_status = Command::new("shellcheck")
308            .arg("--shell=bash")
309            .arg("--severity=style")
310            // all the quotes in the generated `--mount` args look suspicious but are okay
311            .arg("--exclude=SC2140")
312            .arg(&script_file)
313            .status()
314            .await
315            .unwrap();
316        assert!(shellcheck_status.success());
317    }
318}