row/
scheduler.rs

1// Copyright (c) 2024-2025 The Regents of the University of Michigan.
2// Part of row, released under the BSD 3-Clause License.
3
4pub mod bash;
5pub mod slurm;
6
7use serde_json::Value;
8use std::collections::{HashMap, HashSet};
9use std::path::{Path, PathBuf};
10use std::sync::Arc;
11use std::sync::atomic::AtomicBool;
12
13use crate::Error;
14use crate::workflow::Action;
15
16/// A `Scheduler` creates and submits job scripts.
17pub trait Scheduler {
18    /** Make a job script given an `Action` and a list of directories.
19
20    # Arguments
21    * `action`: The action to submit.
22    * `directories`: The directories to include in the submission.
23    * `workspace_path`: The relative path to the workspace directory from the workflow root.
24    * `directory_values`: Maps directory names to JSON values.
25
26    `make_script` must use expand `{workspace_path`} and `{\JSON pointer}`
27    templates in the action's command.
28
29    # Returns
30    A `String` containing the job script.
31
32    # Errors
33    Returns `Err<row::Error>` when the script cannot be created.
34    */
35    fn make_script(
36        &self,
37        action: &Action,
38        directories: &[PathBuf],
39        workspace_path: &Path,
40        directory_values: &HashMap<PathBuf, Value>,
41    ) -> Result<String, Error>;
42
43    /** Submit a job to the scheduler.
44
45    # Arguments
46    * `workflow_root`: The working directory the action should be submitted from.
47    * `action`: The action to submit.
48    * `directories`: The directories to include in the submission.
49    * `workspace_path`: The relative path to the workspace directory from the workflow root.
50    * `directory_values`: Maps directory names to JSON values.
51    * `should_terminate`: Set to true when the user terminates the process.
52
53    # Returns
54    `Ok(job_id_option)` on success.
55    Schedulers that queue jobs should set `job_id_option = Some(job_id)`.
56    Schedulers that execute jobs immediately should set `job_id_option = None`.
57
58    # Early termination.
59    Implementations should periodically check `should_terminate` and
60    exit early (if possible) with `Err(Error::Interrupted)` when set.
61
62    # Errors
63    Returns `Err(row::Error)` on error, which may be due to a non-zero exit
64    status from the submission.
65    */
66    fn submit(
67        &self,
68        workflow_root: &Path,
69        action: &Action,
70        directories: &[PathBuf],
71        workspace_path: &Path,
72        directory_values: &HashMap<PathBuf, Value>,
73        should_terminate: Arc<AtomicBool>,
74    ) -> Result<Option<u32>, Error>;
75
76    /** Query the scheduler and determine which jobs remain active.
77
78    # Arguments
79    * `jobs`: Identifiers to query
80
81    `active_jobs` returns a `ActiveJobs` object, which provides the final
82    result via a method. This allows implementations to be asynchronous so
83    that long-running subprocesses can complete in the background while the
84    collar performs other work.
85
86    # Errors
87    Returns `Err<row::Error>` when the job queue query cannot be executed.
88    */
89    fn active_jobs(&self, jobs: &[u32]) -> Result<Box<dyn ActiveJobs>, Error>;
90}
91
92/// Deferred result containing jobs that are still active on the cluster.
93pub trait ActiveJobs {
94    /** Complete the operation and return the currently active jobs.
95
96    # Errors
97    Returns `Err<row::Error>` when the job queue query cannot be executed.
98    */
99    fn get(self: Box<Self>) -> Result<HashSet<u32>, Error>;
100}