row/scheduler.rs
1// Copyright (c) 2024-2025 The Regents of the University of Michigan.
2// Part of row, released under the BSD 3-Clause License.
3
4pub mod bash;
5pub mod slurm;
6
7use serde_json::Value;
8use std::collections::{HashMap, HashSet};
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11use std::sync::atomic::AtomicBool;
12
13use crate::Error;
14use crate::workflow::Action;
15
16/// A `Scheduler` creates and submits job scripts.
17pub trait Scheduler {
18 /** Make a job script given an `Action` and a list of directories.
19
20 # Arguments
21 * `action`: The action to submit.
22 * `directories`: The directories to include in the submission.
23 * `workspace_path`: The relative path to the workspace directory from the workflow root.
24 * `directory_values`: Maps directory names to JSON values.
25
26 `make_script` must use expand `{workspace_path`} and `{\JSON pointer}`
27 templates in the action's command.
28
29 # Returns
30 A `String` containing the job script.
31
32 # Errors
33 Returns `Err<row::Error>` when the script cannot be created.
34 */
35 fn make_script(
36 &self,
37 action: &Action,
38 directories: &[PathBuf],
39 workspace_path: &Path,
40 directory_values: &HashMap<PathBuf, Value>,
41 ) -> Result<String, Error>;
42
43 /** Submit a job to the scheduler.
44
45 # Arguments
46 * `workflow_root`: The working directory the action should be submitted from.
47 * `action`: The action to submit.
48 * `directories`: The directories to include in the submission.
49 * `workspace_path`: The relative path to the workspace directory from the workflow root.
50 * `directory_values`: Maps directory names to JSON values.
51 * `should_terminate`: Set to true when the user terminates the process.
52
53 # Returns
54 `Ok(job_id_option)` on success.
55 Schedulers that queue jobs should set `job_id_option = Some(job_id)`.
56 Schedulers that execute jobs immediately should set `job_id_option = None`.
57
58 # Early termination.
59 Implementations should periodically check `should_terminate` and
60 exit early (if possible) with `Err(Error::Interrupted)` when set.
61
62 # Errors
63 Returns `Err(row::Error)` on error, which may be due to a non-zero exit
64 status from the submission.
65 */
66 fn submit(
67 &self,
68 workflow_root: &Path,
69 action: &Action,
70 directories: &[PathBuf],
71 workspace_path: &Path,
72 directory_values: &HashMap<PathBuf, Value>,
73 should_terminate: Arc<AtomicBool>,
74 ) -> Result<Option<u32>, Error>;
75
76 /** Query the scheduler and determine which jobs remain active.
77
78 # Arguments
79 * `jobs`: Identifiers to query
80
81 `active_jobs` returns a `ActiveJobs` object, which provides the final
82 result via a method. This allows implementations to be asynchronous so
83 that long-running subprocesses can complete in the background while the
84 collar performs other work.
85
86 # Errors
87 Returns `Err<row::Error>` when the job queue query cannot be executed.
88 */
89 fn active_jobs(&self, jobs: &[u32]) -> Result<Box<dyn ActiveJobs>, Error>;
90}
91
92/// Deferred result containing jobs that are still active on the cluster.
93pub trait ActiveJobs {
94 /** Complete the operation and return the currently active jobs.
95
96 # Errors
97 Returns `Err<row::Error>` when the job queue query cannot be executed.
98 */
99 fn get(self: Box<Self>) -> Result<HashSet<u32>, Error>;
100}