slurm_spank_compat/
lib.rs

1//!Rust bindings for writing Slurm SPANK Plugins
2//!# Introduction
3//!This crate allows to write Slurm SPANK plugins using Rust. To learn more
4//!about capabilities available through SPANK please refer to the official
5//![`SPANK documentation`].
6//!
7//! [`SPANK documentation`]: https://slurm.schedmd.com/spank.html
8//!
9//!To create a SPANK plugin using this crate, you need to define a struct for
10//!which you implement the [`Plugin`] trait and to make it available as a SPANK
11//!plugin using the [`SPANK_PLUGIN!`] macro.
12//!
13//!The methods of the Plugin trait correspond to the callbacks defined by the
14//!SPANK API such as [`init_post_opt`], [`task_post_fork`] etc. These methods
15//!have a default implementation which means you only need to implement the
16//!callbacks relevant for your plugin.
17//!
18//! [`init_post_opt`]: crate::Plugin::init_post_opt
19//!
20//! [`task_post_fork`]: crate::Plugin::task_post_fork
21//!
22//!Each callback method is passed a [`SpankHandle`] reference which allows to
23//!interact with Slurm through the SPANK API.
24//!
25//!When returning an [`Err`] from a callback an error message will be displayed
26//!and/or logged by default, depending on the context. This behaviour may be
27//!overridden by the [`report_error`] method. A default [`Subscriber`] is also
28//!configured to facilitate the use of the [`tracing`] crate for logging and
29//!error reporting while using SPANK log facilities, such as in the example
30//!below. This can be overridden by the [`setup`] method.
31//!
32//! [`report_error`]: crate::Plugin::report_error
33//!
34//! [`Subscriber`]: tracing::Subscriber
35//!
36//! [`setup`]: crate::Plugin::setup
37//!
38//!# Example: hello.so
39//!The following example implements a simple hello world plugin. A more complete
40//!example is provided in the example directory of the repository which shows
41//!how to implement the same renice plugin that is given as an example of the C
42//!SPANK API in the Slurm [`SPANK documentation`].
43//!```rust,no_run
44#![doc = include_str!("hello.md")]
45//! ```
46//! The following Cargo.toml can be used to build this example plugin
47//!```toml
48//! [package]
49//! name = "slurm-spank-hello"
50//! version = "0.1.0"
51//! edition = "2021"
52//!
53//! [lib]
54//! crate-type = ["cdylib"]
55//!
56//! [dependencies]
57//! eyre = "0.6.8"
58//! tracing = "0.1.37"
59//! slurm-spank-compat = "0.1"
60//!```
61use lazy_static::lazy_static;
62use libc::{gid_t, pid_t, uid_t};
63use num_enum::{FromPrimitive, IntoPrimitive, TryFromPrimitive};
64use std::borrow::Cow;
65use std::collections::HashMap;
66use std::convert::{TryFrom, TryInto};
67use std::error::Error;
68use std::ffi::{CStr, CString, OsStr, OsString};
69use std::fmt;
70use std::os::raw::{c_char, c_int};
71use std::os::unix::ffi::OsStrExt;
72use std::panic::catch_unwind;
73use std::panic::UnwindSafe;
74use std::sync::Mutex;
75use std::{ptr, slice};
76use tracing::{error, span};
77use tracing_core::{Event, Subscriber};
78use tracing_subscriber::fmt::{
79    format::Writer, layer, FmtContext, FormatEvent, FormatFields, FormattedFields,
80};
81use tracing_subscriber::prelude::*;
82use tracing_subscriber::registry::LookupSpan;
83use tracing_subscriber::{EnvFilter, Registry};
84
85#[allow(non_upper_case_globals)]
86#[allow(non_camel_case_types)]
87#[allow(non_snake_case)]
88#[doc(hidden)]
89pub mod spank_sys {
90    #[repr(C)]
91    #[derive(Debug, Copy, Clone)]
92    pub struct spank_option {
93        pub name: *const ::std::os::raw::c_char,
94        pub arginfo: *const ::std::os::raw::c_char,
95        pub usage: *const ::std::os::raw::c_char,
96        pub has_arg: ::std::os::raw::c_int,
97        pub val: ::std::os::raw::c_int,
98        pub cb: spank_opt_cb_f,
99    }
100
101    include!(concat!(env!("OUT_DIR"), "/generated.rs"));
102}
103
104#[doc(hidden)]
105pub use byte_strings;
106
107/// Handle to the Slurm interface exposed to SPANK plugins. It provides methods
108/// to query Slurm from a plugin.
109pub struct SpankHandle<'a> {
110    spank: spank_sys::spank_t,
111    argc: c_int,
112    argv: *const *const c_char,
113    opt_cache: &'a mut OptionCache,
114}
115
116macro_rules! spank_item_getter {
117    ($(#[$outer:meta])* $name:ident, $spank_item:path, $arg_name:ident, $arg_type:ty, $result_type:ty) => {
118        $(#[$outer])*
119        pub fn $name(&self, $arg_name: $arg_type) -> Result<$result_type, SpankError> {
120            let mut res: $result_type = <$result_type>::default();
121            let res_ptr: *mut $result_type = &mut res;
122            match unsafe {
123                spank_sys::spank_get_item(self.spank, $spank_item.into(), $arg_name, res_ptr)
124            } {
125                spank_sys::ESPANK_SUCCESS => Ok(res),
126                spank_sys::slurm_err_t_ESPANK_NOEXIST => Err(SpankError::from_noexist($arg_name)),
127                e => Err(SpankError::from_spank_item("spank_get_item", $spank_item, e)),
128            }
129        }
130    };
131    ($(#[$outer:meta])* $name:ident, $spank_item:path, &str) => {
132        $(#[$outer])*
133        pub fn $name(&self) -> Result<&str, SpankError> {
134            let mut res: *const c_char = ptr::null_mut();
135            let res_ptr: *mut *const c_char = &mut res;
136            match unsafe { spank_sys::spank_get_item(self.spank, $spank_item.into(), res_ptr) } {
137                spank_sys::ESPANK_SUCCESS => {
138                    if res.is_null() {
139                        panic!("Received unexpected null pointer from spank_get_item")
140                    } else {
141                        let cstr = unsafe { CStr::from_ptr(res) };
142                        cstr.to_str()
143                            .map_err(|_| SpankError::Utf8Error(cstr.to_string_lossy().to_string()))
144                    }
145                }
146                e => Err(SpankError::from_spank_item("spank_get_item", $spank_item, e)),
147            }
148        }
149    };
150    ($(#[$outer:meta])* $name:ident, $spank_item:path,$result_type:ty) => {
151        $(#[$outer])*
152        pub fn $name(&self) -> Result<$result_type, SpankError> {
153            let mut res: $result_type = <$result_type>::default();
154            let res_ptr: *mut $result_type = &mut res;
155            match unsafe { spank_sys::spank_get_item(self.spank, $spank_item.into(), res_ptr) } {
156                spank_sys::ESPANK_SUCCESS => Ok(res),
157                e => Err(SpankError::from_spank_item("spank_get_item", $spank_item, e)),
158            }
159        }
160    };
161}
162
163fn os_value_to_lossy(value: Cow<'_, OsStr>) -> Cow<'_, str> {
164    match value {
165        Cow::Borrowed(value) => value.to_string_lossy(),
166        Cow::Owned(value) => match value.into_string() {
167            Ok(utf8_value) => Cow::from(utf8_value),
168            Err(value) => Cow::from(value.to_string_lossy().into_owned()),
169        },
170    }
171}
172
173fn os_value_to_str(value: Cow<'_, OsStr>) -> Result<Cow<'_, str>, SpankError> {
174    match value {
175        Cow::Borrowed(value) => Ok(Cow::from(
176            value
177                .to_str()
178                .ok_or_else(|| SpankError::from_os_str(value))?,
179        )),
180        Cow::Owned(value) => {
181            Ok(Cow::from(value.into_string().map_err(|value_err| {
182                SpankError::from_os_str(&value_err)
183            })?))
184        }
185    }
186}
187
188// XXX: Unfortunately, according to the documentation, there are some contexts
189// where you can only use callbacks (init_post_opt) and others where you can
190// only use getopt (prolog/epilog). This is an attempt at providing a uniform
191// interface by caching callbacks or calls to getopt which feels quite hackish.
192// We should try to find a cleaner interface.
193#[derive(Default, Debug)]
194#[doc(hidden)]
195pub struct OptionCache {
196    pub options: Vec<String>,
197    pub values: HashMap<String, Option<OsString>>,
198}
199
200impl<'a> SpankHandle<'a> {
201    /// Returns the context in which the calling plugin is loaded.
202    pub fn context(&self) -> Result<Context, SpankError> {
203        let ctx = unsafe { spank_sys::spank_context() };
204        Context::try_from(ctx).map_err(|_| {
205            SpankError::from_spank("spank_context", spank_sys::slurm_err_t_ESPANK_ERROR)
206        })
207    }
208
209    /// Registers a plugin-provided option dynamically. This function is only
210    /// valid when called from a plugin's `init()`, and must be guaranteed to be
211    /// called in all contexts in which it is used (local, remote, allocator).
212    pub fn register_option(&mut self, spank_opt: SpankOption) -> Result<(), SpankError> {
213        let arginfo = match &spank_opt.arginfo {
214            None => None,
215            Some(info) => Some(CString::new(info as &str).map_err(|_| SpankError::from_str(info))?),
216        };
217        let name = CString::new(&spank_opt.name as &str)
218            .map_err(|_| SpankError::from_str(&spank_opt.name))?;
219        let usage = match spank_opt.usage {
220            None => None,
221            Some(usage) => {
222                Some(CString::new(&usage as &str).map_err(|_| SpankError::from_str(&usage))?)
223            }
224        };
225
226        let mut c_spank_opt = spank_sys::spank_option {
227            name: name.as_ptr(),
228            has_arg: arginfo.is_some() as i32,
229            cb: Some(spank_option_callback),
230            arginfo: match arginfo {
231                Some(ref arginfo) => arginfo.as_ptr(),
232                None => ptr::null(),
233            },
234            usage: match usage {
235                Some(ref usage) => usage.as_ptr(),
236                None => ptr::null(),
237            },
238            val: self
239                .opt_cache
240                .options
241                .len()
242                .try_into()
243                .expect("Argument table overflow"),
244        };
245
246        match unsafe { spank_sys::spank_option_register(self.spank, &mut c_spank_opt) } {
247            spank_sys::ESPANK_SUCCESS => {
248                self.opt_cache.options.push(spank_opt.name);
249                Ok(())
250            }
251            e => Err(SpankError::from_spank("spank_option_register", e)),
252        }
253    }
254
255    /// Returns the list of arguments configured in the `plugstack.conf` file
256    /// for this plugin
257    pub fn plugin_argv(&self) -> Result<Vec<&str>, SpankError> {
258        self.argv_to_vec(self.argc as usize, self.argv)
259    }
260
261    /// Prepends the vector of str `argv` to the argument vector of the task
262    /// to be spawned. This function can be invoked from the following
263    /// functions: slurm_spank_task_init_privileged, and slurm_spank_task_init.
264    ///
265    /// An error is returned if called outside of a task context or if the
266    /// argument vector is invalid.
267    pub fn prepend_task_argv(&self, argv: Vec<&str>) -> Result<(), SpankError> {
268        let c_argv: Vec<CString> = argv
269            .iter()
270            .map(|&arg| CString::new(arg).map_err(|_| SpankError::from_str(arg)))
271            .collect::<Result<Vec<CString>, SpankError>>()?;
272
273        self.prepend_task_cstring(c_argv)
274    }
275
276    /// Prepends the vector of OsStr `argv` to the argument vector of the task
277    /// to be spawned. This function can be invoked from the following
278    /// functions: slurm_spank_task_init_privileged, and slurm_spank_task_init.
279    ///
280    /// An error is returned if called outside of a task context or if the
281    /// argument vector is invalid.
282    pub fn prepend_task_argv_os(&self, argv: Vec<&OsStr>) -> Result<(), SpankError> {
283        let c_argv: Vec<CString> = argv
284            .iter()
285            .map(|&arg| {
286                CString::new(arg.as_bytes())
287                    .map_err(|_| SpankError::CStringError(arg.to_string_lossy().to_string()))
288            })
289            .collect::<Result<Vec<CString>, SpankError>>()?;
290
291        self.prepend_task_cstring(c_argv)
292    }
293
294    fn prepend_task_cstring(&self, argv: Vec<CString>) -> Result<(), SpankError> {
295        let mut c_argv_ptrs: Vec<*const c_char> = argv.iter().map(|arg| arg.as_ptr()).collect();
296        let c_argv_ptr: *mut *const c_char = c_argv_ptrs.as_mut_ptr();
297        let count = i32::try_from(argv.len()).map_err(|_| SpankError::Overflow(argv.len()))?;
298
299        match unsafe { spank_sys::spank_prepend_task_argv(self.spank, count, c_argv_ptr) } {
300            spank_sys::ESPANK_SUCCESS => Ok(()),
301            e => Err(SpankError::from_spank("spank_prepend_task_argv", e)),
302        }
303    }
304
305    fn argv_to_vec(
306        &self,
307        argc: usize,
308        argv: *const *const c_char,
309    ) -> Result<Vec<&str>, SpankError> {
310        unsafe { slice::from_raw_parts(argv, argc) }
311            .iter()
312            .map(|&arg| {
313                let cstr = unsafe { CStr::from_ptr(arg) };
314                cstr.to_str().map_err(|_| SpankError::from_cstr(cstr))
315            })
316            .collect::<Result<Vec<_>, _>>()
317    }
318
319    fn argv_to_vec_os(&self, argc: usize, argv: *const *const c_char) -> Vec<&OsStr> {
320        unsafe { slice::from_raw_parts(argv, argc) }
321            .iter()
322            .map(|&arg| OsStr::from_bytes(unsafe { CStr::from_ptr(arg) }.to_bytes()))
323            .collect()
324    }
325
326    ///  Retrieves the environment variable `name` from the job's environment as
327    ///  a String
328    ///
329    ///  This function returns Ok(none) if the environment variable is not set.
330    ///  It returns an error if the value is not a valid UTF-8 string or if
331    ///  called outside of remote context. To access job environment variables
332    ///  from local context, use std::env directly
333    pub fn getenv<N: AsRef<OsStr>>(&self, name: N) -> Result<Option<String>, SpankError> {
334        match self.do_getenv_os(name, spank_sys::spank_getenv)? {
335            None => Ok(None),
336            Some(env) => Ok(Some(
337                env.into_string().map_err(|e| SpankError::from_os_str(&e))?,
338            )),
339        }
340    }
341
342    ///  Retrieves the environment variable `name` from the job's environment as
343    ///  a lossy String
344    ///
345    ///  If the value contains invalid UTF-8 code points, those invalid points
346    ///  will be replaced with � (U+FFFD). This function returns Ok(none) if the
347    ///  environment variable is not set. It returns an error if called outside
348    ///  of remote context. To access job environment variables from local
349    ///  context, use std::env directly
350    pub fn getenv_lossy<N: AsRef<OsStr>>(&self, name: N) -> Result<Option<String>, SpankError> {
351        self.do_getenv_os(name, spank_sys::spank_getenv)
352            .map(|env| env.map(|s| s.to_string_lossy().into_owned()))
353    }
354
355    ///  Retrieves the environment variable `name` from the job's environment as
356    ///  an OsString
357    ///
358    ///  The return value is an OsString which can hold arbitrary sequences of
359    ///  bytes on Unix-like systems. This function returns Ok(none) if the
360    ///  environment variable is not set. It returns an error if called outside
361    ///  of remote context. To access job environment variables from local
362    ///  context, use std::env directly
363    pub fn getenv_os<N: AsRef<OsStr>>(&self, name: N) -> Result<Option<OsString>, SpankError> {
364        self.do_getenv_os(name, spank_sys::spank_getenv)
365    }
366
367    ///  Retrieves the environment variable `name` from the job's control
368    ///  environment as a String
369    ///
370    ///  This function returns Ok(none) if the environment variable is not set.
371    ///  It returns an error if the value is not a valid UTF-8 string or if
372    ///  called outside of local/allocator context. To access job control environment
373    ///  variables from job script context, use std::env directly.
374    pub fn job_control_getenv<N: AsRef<OsStr>>(
375        &self,
376        name: N,
377    ) -> Result<Option<String>, SpankError> {
378        match self.do_getenv_os(name, spank_sys::spank_job_control_getenv)? {
379            None => Ok(None),
380            Some(env) => Ok(Some(
381                env.into_string().map_err(|e| SpankError::from_os_str(&e))?,
382            )),
383        }
384    }
385
386    ///  Retrieves the environment variable `name` from the job's control
387    ///  environment as a lossy String
388    ///
389    ///  If the value contains invalid UTF-8 code points, those invalid points
390    ///  will be replaced with � (U+FFFD). This function returns Ok(none) if the
391    ///  environment variable is not set. It returns an error if called outside
392    ///  of local/allocator context. To access job control environment variables from
393    ///  job script context, use std::env directly.
394    pub fn job_control_getenv_lossy<N: AsRef<OsStr>>(
395        &self,
396        name: N,
397    ) -> Result<Option<String>, SpankError> {
398        self.do_getenv_os(name, spank_sys::spank_job_control_getenv)
399            .map(|env| env.map(|s| s.to_string_lossy().into_owned()))
400    }
401
402    ///  Retrieves the environment variable `name` from the job's control
403    ///  environment as an OsString
404    ///
405    ///  The return value is an OsString which can hold arbitrary sequences of
406    ///  bytes on Unix-like systems. This function returns Ok(none) if the
407    ///  environment variable is not set. It returns an error if called outside
408    ///  of local/allocator context. To access job control environment variables from
409    ///  job script context, use std::env directly.
410    pub fn job_control_getenv_os<N: AsRef<OsStr>>(
411        &self,
412        name: N,
413    ) -> Result<Option<OsString>, SpankError> {
414        self.do_getenv_os(name, spank_sys::spank_job_control_getenv)
415    }
416
417    fn do_getenv_os<N: AsRef<OsStr>>(
418        &self,
419        name: N,
420        spank_fn: unsafe extern "C" fn(
421            spank_sys::spank_t,
422            *const c_char,
423            *mut c_char,
424            c_int,
425        ) -> spank_sys::spank_err_t,
426    ) -> Result<Option<OsString>, SpankError> {
427        let mut max_size = 4096;
428        let c_name = CString::new(name.as_ref().as_bytes())
429            .map_err(|_| SpankError::from_str(&name.as_ref().to_string_lossy()))?;
430        loop {
431            let mut buffer = vec![0; max_size];
432            let buffer_ptr = buffer.as_mut_ptr();
433            match unsafe {
434                spank_fn(
435                    self.spank,
436                    c_name.as_ptr(),
437                    buffer_ptr as *mut c_char,
438                    max_size as i32,
439                )
440            } {
441                spank_sys::slurm_err_t_ESPANK_ENV_NOEXIST => return Ok(None),
442                spank_sys::ESPANK_SUCCESS => {
443                    let cstr = unsafe { CStr::from_ptr(buffer_ptr) };
444                    return Ok(Some(OsStr::from_bytes(cstr.to_bytes()).to_os_string()));
445                }
446                spank_sys::slurm_err_t_ESPANK_NOSPACE => {
447                    max_size *= 2;
448                    continue;
449                }
450                e => return Err(SpankError::from_spank("spank_getenv", e)),
451            }
452        }
453    }
454
455    /// Sets the environment variable `name` in the job's environment to the
456    /// provided `value`.
457    ///
458    /// Existing values will be overwritten if `overwrite` is set. This function
459    /// will return an error if called outside of remote context. To access job
460    /// environment variables from local context, use std::env directly.
461    pub fn setenv<N: AsRef<OsStr>, V: AsRef<OsStr>>(
462        &self,
463        name: N,
464        value: V,
465        overwrite: bool,
466    ) -> Result<(), SpankError> {
467        self.do_setenv(name, value, overwrite, spank_sys::spank_setenv)
468    }
469
470    /// Sets the environment variable `name` in the job's control environment to
471    /// the provided `value`.
472    ///
473    /// Existing values will be overwritten if `overwrite` is set. This function
474    /// will return an error if called outside of local context. To access job
475    /// control environment variables from remote context, use std::env directly.
476    pub fn job_control_setenv<N: AsRef<OsStr>, V: AsRef<OsStr>>(
477        &self,
478        name: N,
479        value: V,
480        overwrite: bool,
481    ) -> Result<(), SpankError> {
482        self.do_setenv(name, value, overwrite, spank_sys::spank_job_control_setenv)
483    }
484
485    pub fn do_setenv<N: AsRef<OsStr>, V: AsRef<OsStr>>(
486        &self,
487        name: N,
488        value: V,
489        overwrite: bool,
490        spank_fn: unsafe extern "C" fn(
491            spank_sys::spank_t,
492            *const c_char,
493            *const c_char,
494            c_int,
495        ) -> spank_sys::spank_err_t,
496    ) -> Result<(), SpankError> {
497        let c_name = CString::new(name.as_ref().as_bytes())
498            .map_err(|_| SpankError::from_os_str(name.as_ref()))?;
499        let c_value = CString::new(value.as_ref().as_bytes())
500            .map_err(|_| SpankError::from_os_str(value.as_ref()))?;
501
502        match unsafe {
503            spank_fn(
504                self.spank,
505                c_name.as_ptr(),
506                c_value.as_ptr(),
507                overwrite as c_int,
508            )
509        } {
510            spank_sys::ESPANK_SUCCESS => Ok(()),
511            spank_sys::slurm_err_t_ESPANK_ENV_EXISTS => Err(SpankError::EnvExists(
512                name.as_ref().to_string_lossy().to_string(),
513            )),
514            e => Err(SpankError::from_spank("spank_setenv", e)),
515        }
516    }
517
518    /// Unsets the environment variable `name` in the job's environment.
519    ///
520    /// This function is a no-op if the variable is already unset. It will return an
521    /// error if called outside of remote context. To access the job variables
522    /// from local context, use std::env directly.
523    pub fn unsetenv<N: AsRef<OsStr>>(&self, name: N) -> Result<(), SpankError> {
524        self.do_unsetenv(name, spank_sys::spank_unsetenv)
525    }
526
527    /// Unsets the environment variable `name` in the job's control environment.
528    ///
529    /// This function is a no-op if the variable is already unset. It will
530    /// return an error if called outside of local/allocator context. To access job
531    /// control environment variables from remote context, use std::env
532    /// directly.
533    pub fn job_control_unsetenv<N: AsRef<OsStr>>(&self, name: N) -> Result<(), SpankError> {
534        self.do_unsetenv(name, spank_sys::spank_job_control_unsetenv)
535    }
536
537    fn do_unsetenv<N: AsRef<OsStr>>(
538        &self,
539        name: N,
540        spank_fn: unsafe extern "C" fn(spank_sys::spank_t, *const c_char) -> spank_sys::spank_err_t,
541    ) -> Result<(), SpankError> {
542        let c_name = CString::new(name.as_ref().as_bytes())
543            .map_err(|_| SpankError::from_os_str(name.as_ref()))?;
544
545        match unsafe { spank_fn(self.spank, c_name.as_ptr()) } {
546            spank_sys::ESPANK_SUCCESS => Ok(()),
547            e => Err(SpankError::from_spank("spank_unsetenv", e)),
548        }
549    }
550
551    fn getopt_os(&self, name: &str) -> Result<Option<OsString>, SpankError> {
552        let name_c = if let Ok(n) = CString::new(name) {
553            n
554        } else {
555            return Err(SpankError::from_str(name));
556        };
557
558        let mut c_spank_opt = spank_sys::spank_option {
559            name: name_c.as_ptr(),
560            has_arg: 1,
561            cb: None,
562            usage: ptr::null(),
563            arginfo: ptr::null(),
564            val: 0,
565        };
566
567        let mut optarg: *mut c_char = ptr::null_mut();
568
569        match unsafe { spank_sys::spank_option_getopt(self.spank, &mut c_spank_opt, &mut optarg) } {
570            spank_sys::ESPANK_SUCCESS => {
571                if !optarg.is_null() {
572                    Ok(Some(
573                        OsStr::from_bytes(unsafe { CStr::from_ptr(optarg) }.to_bytes())
574                            .to_os_string(),
575                    ))
576                } else {
577                    Ok(None)
578                }
579            }
580            e => Err(SpankError::from_spank("spank_option_getopt", e)),
581        }
582    }
583    /// Returns the value set for the option `name` as a lossy String
584    ///
585    /// If the value contains invalid UTF-8 code points, those invalid points
586    /// will be replaced with � (U+FFFD). If the option was specified multiple
587    /// times, this function returns the last value provided.
588    ///
589    /// *WARNING*: If options have not yet been processed (e.g in init callbacks
590    /// or all slurmd contexts), this function will always return None.
591    ///
592    /// *WARNING*: This function always returns None for options which don't
593    /// take values (flag options created without takes_value()) no matter whether
594    /// they were used or not. To check whether a flag was set, use
595    /// is_option_set.
596    pub fn get_option_value_lossy(&self, name: &str) -> Option<Cow<'_, str>> {
597        self.get_option_value_os(name).map(os_value_to_lossy)
598    }
599
600    /// Returns the value set for the option `name` as a String
601    ///
602    /// An error is returned if the value cannot be converted to a String. If
603    /// the option was specified multiple times, it returns the last value
604    /// provided.
605    ///
606    /// *WARNING*: If options have not yet been processed (e.g in init callbacks
607    /// or all slurmd contexts), this function will always return None.
608    ///
609    /// *WARNING*: This function always returns None for options which don't
610    /// take values (flag options created without takes_value()) no matter whether
611    /// they were used or not. To check whether a flag was set, use
612    /// is_option_set.
613    pub fn get_option_value(&self, name: &str) -> Result<Option<Cow<'_, str>>, SpankError> {
614        match self.get_option_value_os(name) {
615            Some(val) => Ok(Some(os_value_to_str(val)?)),
616            None => Ok(None),
617        }
618    }
619
620    /// Returns the value set for the option `name` as an OsString
621    ///
622    /// If the option was specified multiple times, it returns the last value
623    /// provided.
624    ///
625    /// *WARNING*: If options have not yet been processed (e.g in init callbacks
626    /// or all slurmd contexts), this function will always return None.
627    ///
628    /// *WARNING*: This function always returns None for options which don't
629    /// take values (flag options created without takes_value()) no matter whether
630    /// they were used or not. To check whether a flag was set, use
631    /// get_option_count.
632    pub fn get_option_value_os(&self, name: &str) -> Option<Cow<'_, OsStr>> {
633        match self.context() {
634            Ok(Context::JobScript) => self
635                .getopt_os(name)
636                .ok() // We made sure call from the correct context
637                .map(|opt| opt.map(Cow::from))
638                .unwrap_or(None),
639            _ => {
640                if let Some(Some(ref value)) = self.opt_cache.values.get(name) {
641                    Some(Cow::from(value))
642                } else {
643                    None
644                }
645            }
646        }
647    }
648
649    /// Returns whether an option was set
650    ///
651    /// Use this function to process flag options.
652    ///
653    /// *WARNING*: If options have not yet been processed (e.g in init callbacks
654    /// or all slurmd contexts), this function will always return false.
655    pub fn is_option_set(&self, name: &str) -> bool {
656        match self.context() {
657            Ok(Context::JobScript) => self.getopt_os(name).is_ok(),
658            _ => self.opt_cache.values.get(name).is_some(),
659        }
660    }
661
662    spank_item_getter!(
663        /// Returns the primary group id
664        job_gid,
665        SpankItem::JobGid,
666        gid_t
667    );
668    spank_item_getter!(
669        /// Returns the user id
670        job_uid,
671        SpankItem::JobUid,
672        uid_t
673    );
674    spank_item_getter!(
675        /// Returns the  job id
676        job_id,
677        SpankItem::JobId,
678        u32
679    );
680    spank_item_getter!(
681        /// Returns the job step id
682        job_stepid,
683        SpankItem::JobStepid,
684        u32
685    );
686    spank_item_getter!(
687        /// Returns the total number of nodes in job
688        job_nnodes,
689        SpankItem::JobNnodes,
690        u32
691    );
692    spank_item_getter!(
693        /// Returns the relative id of this node
694        job_nodeid,
695        SpankItem::JobNodeid,
696        u32
697    );
698    spank_item_getter!(
699        /// Returns the number of local tasks
700        job_local_task_count,
701        SpankItem::JobLocalTaskCount,
702        u32
703    );
704    spank_item_getter!(
705        /// Returns the total number of tasks in job
706        job_total_task_count,
707        SpankItem::JobTotalTaskCount,
708        u32
709    );
710    spank_item_getter!(
711        /// Returns the number of CPUs used by this job
712        job_ncpus,
713        SpankItem::JobNcpus,
714        u16
715    );
716
717    /// Returns the job command arguments as Vec<&str>. An error is returned if
718    /// arguments are not valid UTF-8
719    pub fn job_argv(&self) -> Result<Vec<&str>, SpankError> {
720        self.job_argv_c()
721            .and_then(|(argc, argv)| self.argv_to_vec(argc, argv))
722    }
723
724    /// Returns the job command args as Vec<&OsStr>
725    pub fn job_argv_os(&self) -> Result<Vec<&OsStr>, SpankError> {
726        self.job_argv_c()
727            .map(|(argc, argv)| self.argv_to_vec_os(argc, argv))
728    }
729
730    fn job_argv_c(&self) -> Result<(usize, *const *const c_char), SpankError> {
731        let mut argc: c_int = 0;
732        let mut argv: *const *const c_char = ptr::null_mut();
733
734        let argc_ptr: *mut c_int = &mut argc;
735        let argv_ptr: *mut *const *const c_char = &mut argv;
736
737        match unsafe {
738            spank_sys::spank_get_item(self.spank, SpankItem::JobArgv.into(), argc_ptr, argv_ptr)
739        } {
740            spank_sys::ESPANK_SUCCESS => {
741                if argv.is_null() {
742                    panic!("spank_get_item returned unexpected NULL ptr");
743                }
744                Ok((argc as usize, argv))
745            }
746            e => Err(SpankError::from_spank("spank_get_item", e)),
747        }
748    }
749
750    /// Returns the job environment variables as a Vec<&str>. An error is
751    /// returned if variables are not valid UTF-8
752    pub fn job_env(&self) -> Result<Vec<&str>, SpankError> {
753        self.job_env_c()
754            .and_then(|(argc, argv)| self.argv_to_vec(argc, argv))
755    }
756
757    /// Returns the job environment variables as an array of Vec<&OsStr>
758    pub fn job_env_os(&self) -> Result<Vec<&OsStr>, SpankError> {
759        self.job_env_c()
760            .map(|(argc, argv)| self.argv_to_vec_os(argc, argv))
761    }
762
763    fn job_env_c(&self) -> Result<(usize, *const *const c_char), SpankError> {
764        let mut envv: *const *const c_char = ptr::null_mut();
765
766        match unsafe { spank_sys::spank_get_item(self.spank, SpankItem::JobEnv.into(), &mut envv) }
767        {
768            spank_sys::ESPANK_SUCCESS => {
769                if envv.is_null() {
770                    panic!("spank_get_item returned unexpected NULL ptr")
771                }
772                let mut argc: isize = 0;
773                while !unsafe { *envv.offset(argc) }.is_null() {
774                    argc += 1;
775                }
776                Ok((argc as usize, envv))
777            }
778            e => Err(SpankError::from_spank("spank_get_item", e)),
779        }
780    }
781
782    spank_item_getter!(
783        /// Returns the local task id
784        task_id,
785        SpankItem::TaskId,
786        c_int
787    );
788
789    spank_item_getter!(
790        /// Returns the global task id
791        task_global_id,
792        SpankItem::TaskGlobalId,
793        u32
794    );
795
796    spank_item_getter!(
797        /// Returns the exit status of the current task if exited
798        task_exit_status,
799        SpankItem::TaskExitStatus,
800        c_int
801    );
802
803    spank_item_getter!(
804        /// Returns the pid of the current task
805        task_pid,
806        SpankItem::TaskPid,
807        pid_t
808    );
809    spank_item_getter!(
810        /// Returns the the global task id corresponding to the specified pid
811        pid_to_global_id,
812        SpankItem::JobPidToGlobalId,
813        pid,
814        pid_t,
815        u32
816    );
817    spank_item_getter!(
818        /// Returns the local task id corresponding to the specified pid
819        pid_to_local_id,
820        SpankItem::JobPidToLocalId,
821        pid,
822        pid_t,
823        u32
824    );
825    spank_item_getter!(
826        /// Returns the local task id corresponding to the specified global id
827        local_to_global_id,
828        SpankItem::JobLocalToGlobalId,
829        local_id,
830        u32,
831        u32
832    );
833    spank_item_getter!(
834        /// Returns the global task id corresponding to the specified local id
835        global_to_local_id,
836        SpankItem::JobGlobalToLocalId,
837        global_id,
838        u32,
839        u32
840    );
841
842    /// Returns the list of supplementary gids for the current job
843    pub fn job_supplementary_gids(&self) -> Result<Vec<gid_t>, SpankError> {
844        let mut gidc: c_int = 0;
845        let mut gidv: *const gid_t = ptr::null_mut();
846
847        let gidc_ptr: *mut c_int = &mut gidc;
848        let gidv_ptr: *mut *const gid_t = &mut gidv;
849
850        match unsafe {
851            spank_sys::spank_get_item(
852                self.spank,
853                SpankItem::JobSupplementaryGids.into(),
854                gidv_ptr,
855                gidc_ptr,
856            )
857        } {
858            spank_sys::ESPANK_SUCCESS => {
859                Ok(unsafe { slice::from_raw_parts(gidv, gidc as usize) }.to_vec())
860            }
861            e => Err(SpankError::from_spank("spank_get_item", e)),
862        }
863    }
864
865    spank_item_getter!(
866        /// Returns the current Slurm version
867        slurm_version,
868        SpankItem::SlurmVersion,
869        &str
870    );
871
872    spank_item_getter!(
873        /// Returns the major release number of Slurm
874        slurm_version_major,
875        SpankItem::SlurmVersionMajor,
876        &str
877    );
878    spank_item_getter!(
879        /// Returns the minor release number of Slurm
880        slurm_version_minor,
881        SpankItem::SlurmVersionMinor,
882        &str
883    );
884    spank_item_getter!(
885        /// Returns the micro release number of Slurm
886        slurm_version_micro,
887        SpankItem::SlurmVersionMicro,
888        &str
889    );
890    spank_item_getter!(
891        /// Returns the number of CPUs allocated per task. Returns 1 if --overcommit option is used
892        step_cpus_per_task,
893        SpankItem::StepCpusPerTask,
894        u64
895    );
896
897    spank_item_getter!(
898        /// Returns the list of allocated cores for the job
899        job_alloc_cores,
900        SpankItem::JobAllocCores,
901        &str
902    );
903    spank_item_getter!(
904        /// Returns the amount of allocated memory for the job in MB
905        job_alloc_mem,
906        SpankItem::JobAllocMem,
907        u64
908    );
909    spank_item_getter!(
910        /// Returns the list of allocated cores for the step
911        step_alloc_cores,
912        SpankItem::StepAllocCores,
913        &str
914    );
915    spank_item_getter!(
916        /// Returns the amount of allocated memory for the step in MB
917        step_alloc_mem,
918        SpankItem::StepAllocMem,
919        u64
920    );
921    spank_item_getter!(
922        /// Returns the restart count for the job
923        slurm_restart_count,
924        SpankItem::SlurmRestartCount,
925        u32
926    );
927    spank_item_getter!(
928        /// Returns the job array id
929        job_array_id,
930        SpankItem::JobArrayId,
931        u32
932    );
933    spank_item_getter!(
934        /// Returns the job array task id
935        job_array_task_id,
936        SpankItem::JobArrayTaskId,
937        u32
938    );
939}
940
941fn cstring_escape_null(msg: &str) -> CString {
942    // XXX: We can't deal with NULL characters when passing strings to slurm log
943    // functions, but how do we expect a plugin author to handle the error if we
944    // returned one ? We assume they would prefer that we render them as a 0 in
945    // the logs instead.
946    let c_safe_msg = msg.split('\u{0000}').collect::<Vec<&str>>().join("0");
947
948    // Should never panic as we made sure there is no NULL chars
949    CString::new(&c_safe_msg as &str).unwrap()
950}
951
952/// Log level for SPANK logging functions
953pub enum LogLevel {
954    Error,
955    Info,
956    Verbose,
957    Debug,
958    Debug2,
959    Debug3,
960}
961
962static FORMAT_STRING: [u8; 3] = *b"%s\0";
963
964/// Log messages through SPANK
965pub fn spank_log(level: LogLevel, msg: &str) {
966    let c_msg = cstring_escape_null(msg);
967    let c_format_string = FORMAT_STRING.as_ptr() as *const c_char;
968
969    match level {
970        LogLevel::Error => unsafe { spank_sys::slurm_error(c_format_string, c_msg.as_ptr()) },
971        LogLevel::Info => unsafe { spank_sys::slurm_info(c_format_string, c_msg.as_ptr()) },
972        LogLevel::Verbose => unsafe { spank_sys::slurm_verbose(c_format_string, c_msg.as_ptr()) },
973        LogLevel::Debug => unsafe { spank_sys::slurm_debug(c_format_string, c_msg.as_ptr()) },
974        LogLevel::Debug2 => unsafe { spank_sys::slurm_debug2(c_format_string, c_msg.as_ptr()) },
975        LogLevel::Debug3 => unsafe { spank_sys::slurm_debug3(c_format_string, c_msg.as_ptr()) },
976    }
977}
978
979pub fn slurm_spank_log(msg: &str) {
980    let c_msg = cstring_escape_null(msg);
981    let c_format_string = FORMAT_STRING.as_ptr() as *const c_char;
982    unsafe { spank_sys::slurm_spank_log(c_format_string, c_msg.as_ptr()) }
983}
984
985#[macro_export]
986/// Log messages through SPANK at the error level
987macro_rules! spank_log_error {
988    ($($arg:tt)*) => ({
989        $crate::spank_log($crate::LogLevel::Error,&format!($($arg)*));
990    })
991}
992
993#[macro_export]
994/// Log messages through SPANK at the info level
995macro_rules! spank_log_info {
996    ($($arg:tt)*) => ({
997        $crate::spank_log($crate::LogLevel::Info, &format!($($arg)*));
998    })
999}
1000
1001#[macro_export]
1002/// Log messages through SPANK at the verbose level
1003macro_rules! spank_log_verbose {
1004    ($($arg:tt)*) => ({
1005        $crate::spank_log($crate::LogLevel::Verbose, &format!($($arg)*));
1006    })
1007}
1008
1009#[macro_export]
1010/// Log messages through SPANK at the debug level
1011macro_rules! spank_log_debug {
1012    ($($arg:tt)*) => ({
1013        $crate::spank_log($crate::LogLevel::Debug, &format!($($arg)*));
1014    })
1015}
1016
1017#[macro_export]
1018/// Log messages through SPANK at the debug2 level
1019macro_rules! spank_log_debug2 {
1020    ($($arg:tt)*) => ({
1021        $crate::spank_log($crate::LogLevel::Debug2, &format!($($arg)*));
1022    })
1023}
1024
1025#[macro_export]
1026/// Log messages through SPANK at the debug3 level
1027macro_rules! spank_log_debug3 {
1028    ($($arg:tt)*) => ({
1029        $crate::spank_log($crate::LogLevel::Debug3, &format!($($arg)*));
1030    })
1031}
1032
1033#[macro_export]
1034/// Log messages back to the user at the error level without prepending "error:"
1035macro_rules! spank_log_user {
1036    ($($arg:tt)*) => ({
1037        $crate::slurm_spank_log(&format!($($arg)*));
1038    })
1039}
1040
1041// XXX: Slurm should only call us in a sequential and non-reentrant way but Rust
1042// doesn't know that. The overhead of locking these Mutex at each Slurm callback
1043// should be negligible and we'll get a clear error if something is called out
1044// of order by mistake. However this is not ideal because it requires the Plugin
1045// to be Send which can be restricting. We should probably confirm with Slurm
1046// devs that all calls are sequential and switch to a static mut or similar.
1047lazy_static! {
1048    static ref OPTION_CACHE: Mutex<OptionCache> = Mutex::new(OptionCache::default());
1049    static ref PLUGIN: Mutex<Option<Box<dyn Plugin>>> = Mutex::new(None);
1050}
1051
1052#[doc(hidden)]
1053pub fn spank_callback_with_globals<P: Plugin + Default + 'static, F>(func: F) -> c_int
1054    where
1055        F: FnOnce(&mut dyn Plugin, &mut OptionCache, bool) -> Result<(), Box<dyn Error>> + UnwindSafe,
1056{
1057    let unwind_res = catch_unwind(|| {
1058        // These Mutexes should never be contended unless something unreoverable
1059        // happened before
1060        let mut opt_cache = OPTION_CACHE
1061            .try_lock()
1062            .expect("Failed to acquire global options mutex");
1063        let mut plugin_option = PLUGIN
1064            .try_lock()
1065            .expect("Failed to acquire global plugin mutex");
1066
1067        let mut need_setup = false;
1068
1069        let mut plugin = plugin_option.take().unwrap_or_else(|| {
1070            let p = P::default();
1071            need_setup = true;
1072            Box::new(p)
1073        });
1074
1075        let err = match func(plugin.as_mut(), &mut opt_cache, need_setup) {
1076            Ok(()) => 0,
1077            Err(_) => -1,
1078        };
1079        plugin_option.replace(plugin);
1080
1081        err
1082    });
1083
1084    match unwind_res {
1085        Ok(e) => e,
1086        Err(panic) => {
1087            let panic_string = panic
1088                .downcast::<&str>()
1089                .map(|b| b.to_string())
1090                .or_else(|panic| panic.downcast::<String>().map(|s| *s))
1091                .unwrap_or_else(|_| "non-string panic".to_string());
1092
1093            spank_log_error!(
1094                "Caught panic while running spank callback: {}",
1095                panic_string
1096            );
1097            -1
1098        }
1099    }
1100}
1101
1102#[no_mangle]
1103// We pass this callback to process all spank options
1104// It just stores which options were set in a cache for later retrieval
1105extern "C" fn spank_option_callback(
1106    val: std::os::raw::c_int,
1107    optarg: *const std::os::raw::c_char,
1108    _remote: std::os::raw::c_int,
1109) -> std::os::raw::c_int {
1110    // This Mutex should never be contended unless something unrecoverable
1111    // already happened before
1112    let mut opt_cache = OPTION_CACHE
1113        .try_lock()
1114        .expect("Failed to acquire global options mutex");
1115
1116    let name = opt_cache.options.get(val as usize).cloned();
1117
1118    let name = match name {
1119        None => {
1120            spank_log(
1121                LogLevel::Error,
1122                &format!(
1123                    "Internal spank-rs error: received unexpected option callback {}",
1124                    val
1125                ),
1126            );
1127            return -1;
1128        }
1129        Some(name) => name,
1130    };
1131
1132    let optarg = {
1133        if optarg.is_null() {
1134            None
1135        } else {
1136            Some(
1137                std::ffi::OsStr::from_bytes(unsafe { std::ffi::CStr::from_ptr(optarg) }.to_bytes())
1138                    .to_os_string(),
1139            )
1140        }
1141    };
1142
1143    opt_cache.values.insert(name, optarg);
1144    0
1145}
1146
1147#[doc(hidden)]
1148// This function only public so that it may be called from the callbacks
1149// generated by the macro. It should not be called to create handles manually.
1150pub fn init_spank_handle(
1151    spank: spank_sys::spank_t,
1152    argc: c_int,
1153    argv: *const *const c_char,
1154    opt_cache: &mut OptionCache,
1155) -> SpankHandle {
1156    SpankHandle {
1157        spank,
1158        argc,
1159        argv,
1160        opt_cache,
1161    }
1162}
1163
1164#[doc(hidden)]
1165// This function is only public so that it may be called from the callbacks
1166// generated by the macro.
1167pub fn make_cb_span(id: &str, cb: &str, ctx: &str, task_id: Option<u32>) -> tracing::Span {
1168    if let Some(task_id) = task_id {
1169        span!(tracing::Level::DEBUG, "spank", id, cb, ctx, task_id)
1170    } else {
1171        span!(tracing::Level::DEBUG, "spank", id, cb, ctx)
1172    }
1173}
1174
1175pub use spank_sys::SLURM_VERSION_NUMBER;
1176
1177#[macro_export]
1178/// Export a Plugin to make it available to the Slurm plugin loader
1179///
1180/// # Example
1181///
1182///```rust,no_run
1183///SPANK_PLUGIN!(b"renice", SLURM_VERSION_NUMBER, SpankRenice);
1184///```
1185///
1186/// The first argument is the name of the SPANK plugin. It has to be provided as a byte string.
1187///
1188/// The second argument is the Slurm version for which the plugin is built, specified in hexadecimal (2 digits per version component).
1189/// The SLURM_VERSION_NUMBER constant can be used. It refers to the version of the Slurm headers that the plugin is built against.
1190///
1191/// The last argument is a struct for which the Plugin trait has been implemented
1192macro_rules! SPANK_PLUGIN {
1193    ($spank_name:literal, $spank_version:expr, $spank_ty:ty) => {
1194        const fn byte_string_size<T>(_: &T) -> usize {
1195            std::mem::size_of::<T>()
1196        }
1197        #[no_mangle]
1198        pub static plugin_name: [u8; byte_string_size($spank_name) + 1] =
1199            *$crate::byte_strings::concat_bytes!($spank_name, "\0");
1200        #[no_mangle]
1201        pub static mut plugin_type: [u8; 6] = *b"spank\0";
1202        #[no_mangle]
1203        pub static plugin_version: std::os::raw::c_uint = $spank_version;
1204
1205        fn _check_spank_trait<T: Plugin>() {}
1206        fn _t() {
1207            _check_spank_trait::<$spank_ty>()
1208        }
1209
1210        macro_rules! spank_hook {
1211            ($c_spank_cb:ident, $rust_spank_cb:ident) => {
1212                #[no_mangle]
1213                #[doc(hidden)]
1214                pub extern "C" fn $c_spank_cb(
1215                    spank: $crate::spank_sys::spank_t,
1216                    ac: std::os::raw::c_int,
1217                    argv: *const *const std::os::raw::c_char,
1218                ) -> std::os::raw::c_int {
1219                    $crate::spank_callback_with_globals::<$spank_ty, _>(
1220                        |plugin, options, need_setup| {
1221                            let mut spank = $crate::init_spank_handle(spank, ac, argv, options);
1222
1223                            if need_setup {
1224                                plugin.setup(&mut spank).map_err(|e| {
1225                                    plugin.report_error(e.as_ref());
1226                                    e
1227                                })?;
1228                            }
1229
1230                            let context = spank
1231                                .context()
1232                                .map(|ctx| format!("{:?}", ctx))
1233                                .unwrap_or("Error".to_string());
1234
1235                            let tid = spank.task_global_id().ok();
1236
1237                            let span = $crate::make_cb_span(
1238                                std::ffi::CStr::from_bytes_with_nul(&plugin_name)?.to_str()?,
1239                                stringify!($c_spank_cb),
1240                                &context,
1241                                tid,
1242                            );
1243                            let _guard = span.enter();
1244
1245                            unsafe {
1246                                plugin.$rust_spank_cb(&mut spank).map_err(|e| {
1247                                    plugin.report_error(e.as_ref());
1248                                    e
1249                                })
1250                            }
1251                        },
1252                    )
1253                }
1254            };
1255        }
1256
1257        spank_hook!(slurm_spank_init, init);
1258        spank_hook!(slurm_spank_job_prolog, job_prolog);
1259        spank_hook!(slurm_spank_init_post_opt, init_post_opt);
1260        spank_hook!(slurm_spank_local_user_init, local_user_init);
1261        spank_hook!(slurm_spank_user_init, user_init);
1262        spank_hook!(slurm_spank_task_init_privileged, task_init_privileged);
1263        spank_hook!(slurm_spank_task_init, task_init);
1264        spank_hook!(slurm_spank_task_post_fork, task_post_fork);
1265        spank_hook!(slurm_spank_task_exit, task_exit);
1266        spank_hook!(slurm_spank_job_epilog, job_epilog);
1267        spank_hook!(slurm_spank_slurmd_exit, slurmd_exit);
1268        spank_hook!(slurm_spank_exit, exit);
1269    };
1270}
1271
1272/// Implement this trait to create a SPANK plugin
1273/// # Safety
1274/// The task callbacks (task_init, task_init_privileged, ...) are called from child processes which slurmstepd creates by forking itself.
1275/// This may lead to deadlocks or other issues if the Rust plugin is multi-threaded (see <https://man7.org/linux/man-pages/man7/signal-safety.7.html>)
1276#[allow(unused_variables)]
1277pub unsafe trait Plugin: Send {
1278    /// Called just after plugins are loaded.
1279    ///
1280    /// In remote context, this is just after job step is initialized. This
1281    /// function is called before any plugin option processing.
1282    fn init(&mut self, spank: &mut SpankHandle) -> Result<(), Box<dyn Error>> {
1283        Ok(())
1284    }
1285
1286    /// Called at the same time as the job prolog.
1287    ///
1288    /// If this function returns an error and the SPANK plugin that contains it
1289    /// is required in the plugstack.conf, the node that this is run on will be
1290    /// drained.
1291    fn job_prolog(&mut self, spank: &mut SpankHandle) -> Result<(), Box<dyn Error>> {
1292        Ok(())
1293    }
1294
1295    /// Called at the same point as slurm_spank_init, but after all user options
1296    /// to the plugin have been processed.
1297    ///
1298    /// The reason that the init and init_post_opt callbacks are separated is so
1299    /// that plugins can process system-wide options specified in plugstack.conf
1300    /// in the init callback, then process user options, and finally take some
1301    /// action in slurm_spank_init_post_opt if necessary. In the case of a
1302    /// heterogeneous job, slurm_spank_init is invoked once per job component.
1303    fn init_post_opt(&mut self, spank: &mut SpankHandle) -> Result<(), Box<dyn Error>> {
1304        Ok(())
1305    }
1306
1307    /// Called in local (srun) context only after all options have been
1308    /// processed.
1309    ///
1310    /// This is called after the job ID and step IDs are available. This happens
1311    /// in srun after the allocation is made, but before tasks are launched.
1312    fn local_user_init(&mut self, spank: &mut SpankHandle) -> Result<(), Box<dyn Error>> {
1313        Ok(())
1314    }
1315
1316    /// Called after privileges are temporarily dropped. (remote context only)
1317    fn user_init(&mut self, spank: &mut SpankHandle) -> Result<(), Box<dyn Error>> {
1318        Ok(())
1319    }
1320    /// Called for each task just after fork, but before all elevated privileges
1321    /// are dropped. (remote context only)
1322    fn task_init_privileged(&mut self, spank: &mut SpankHandle) -> Result<(), Box<dyn Error>> {
1323        Ok(())
1324    }
1325
1326    /// Called for each task just before execve (2).
1327    ///
1328    /// If you are restricting memory with cgroups, memory allocated here will be
1329    /// in the job's cgroup. (remote context only)
1330    fn task_init(&mut self, spank: &mut SpankHandle) -> Result<(), Box<dyn Error>> {
1331        Ok(())
1332    }
1333
1334    /// Called for each task from parent process after fork (2) is complete.
1335    ///
1336    ///  Due to the fact that slurmd does not exec any tasks until all tasks
1337    ///  have completed fork (2), this call is guaranteed to run before the user
1338    ///  task is executed. (remote context only)
1339    fn task_post_fork(&mut self, spank: &mut SpankHandle) -> Result<(), Box<dyn Error>> {
1340        Ok(())
1341    }
1342
1343    /// Called for each task as its exit status is collected by Slurm. (remote context only)
1344    fn task_exit(&mut self, spank: &mut SpankHandle) -> Result<(), Box<dyn Error>> {
1345        Ok(())
1346    }
1347
1348    /// Called at the same time as the job epilog.
1349    ///
1350    /// If this function returns an error and the SPANK plugin that contains it
1351    /// is required in the plugstack.conf, the node that this is run on will be
1352    /// drained.
1353    fn job_epilog(&mut self, spank: &mut SpankHandle) -> Result<(), Box<dyn Error>> {
1354        Ok(())
1355    }
1356
1357    /// Called in slurmd when the daemon is shut down.
1358    fn slurmd_exit(&mut self, spank: &mut SpankHandle) -> Result<(), Box<dyn Error>> {
1359        Ok(())
1360    }
1361
1362    /// Called once just before slurmstepd exits in remote context. In local
1363    /// context, called before srun exits.
1364    fn exit(&mut self, spank: &mut SpankHandle) -> Result<(), Box<dyn Error>> {
1365        Ok(())
1366    }
1367
1368    /// Called each time an Err Result is returned from a SPANK callback
1369    ///
1370    /// The default implementation logs errors through SPANK along with their
1371    /// causes.
1372    fn report_error(&self, error: &dyn Error) {
1373        // TODO: use error iterators once they're stable
1374        let mut report = error.to_string();
1375        let mut error = error;
1376        while let Some(source) = error.source() {
1377            report.push_str(&format!(": {}", source));
1378            error = source;
1379        }
1380        error!("{}", &report);
1381    }
1382
1383    /// Called before the first callback from SPANK
1384    ///
1385    /// The default implementation configures a tracing Subscriber.
1386    fn setup(&self, spank: &mut SpankHandle) -> Result<(), Box<dyn Error>> {
1387        let default_level = match spank.context()? {
1388            Context::Local | Context::Allocator => "error",
1389            _ => "debug",
1390        };
1391        let filter_layer =
1392            EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(default_level));
1393        let fmt_layer = layer()
1394            .with_ansi(false)
1395            .event_format(SpankTraceFormatter {})
1396            .with_writer(SpankTraceWriter {});
1397        Registry::default()
1398            .with(filter_layer)
1399            .with(fmt_layer)
1400            .init();
1401        Ok(())
1402    }
1403}
1404
1405struct SpankTraceFormatter;
1406
1407impl<S, N> FormatEvent<S, N> for SpankTraceFormatter
1408    where
1409        S: Subscriber + for<'a> LookupSpan<'a>,
1410        N: for<'a> FormatFields<'a> + 'static,
1411{
1412    fn format_event(
1413        &self,
1414        ctx: &FmtContext<'_, S, N>,
1415        mut writer: Writer,
1416        event: &Event<'_>,
1417    ) -> fmt::Result {
1418        // Write level
1419        let level = *event.metadata().level();
1420        write!(writer, "{}: ", level.to_string().to_lowercase())?;
1421
1422        // Write spans and fields of each span
1423        ctx.visit_spans(|span| {
1424            write!(writer, "{}", span.name())?;
1425
1426            let ext = span.extensions();
1427
1428            // `FormattedFields` is a a formatted representation of the span's
1429            // fields, which is stored in its extensions by the `fmt` layer's
1430            // `new_span` method. The fields will have been formatted
1431            // by the same field formatter that's provided to the event
1432            // formatter in the `FmtContext`.
1433            let fields = &ext
1434                .get::<FormattedFields<N>>()
1435                .expect("will never be `None`");
1436
1437            if !fields.is_empty() {
1438                write!(writer, "{{{}}}", fields)?;
1439            }
1440            write!(writer, ": ")?;
1441
1442            Ok(())
1443        })?;
1444
1445        // Write fields on the event
1446        ctx.field_format().format_fields(writer, event)
1447    }
1448}
1449
1450struct SpankTraceWriter {}
1451
1452impl<'a> tracing_subscriber::fmt::MakeWriter<'a> for SpankTraceWriter {
1453    type Writer = Self;
1454
1455    fn make_writer(&self) -> Self::Writer {
1456        Self {}
1457    }
1458}
1459
1460impl std::io::Write for SpankTraceWriter {
1461    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1462        let c_string = CString::new(buf)
1463            .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e.to_string()))?;
1464
1465        unsafe {
1466            spank_sys::slurm_info(FORMAT_STRING.as_ptr() as *const c_char, c_string.as_ptr())
1467        };
1468
1469        Ok(buf.len())
1470    }
1471
1472    fn flush(&mut self) -> std::io::Result<()> {
1473        Ok(())
1474    }
1475}
1476
1477#[derive(Debug, Copy, Clone, PartialEq, IntoPrimitive)]
1478#[repr(u32)]
1479enum SpankItem {
1480    JobGid = spank_sys::spank_item_S_JOB_GID,
1481    JobUid = spank_sys::spank_item_S_JOB_UID,
1482    JobId = spank_sys::spank_item_S_JOB_ID,
1483    JobStepid = spank_sys::spank_item_S_JOB_STEPID,
1484    JobNnodes = spank_sys::spank_item_S_JOB_NNODES,
1485    JobNodeid = spank_sys::spank_item_S_JOB_NODEID,
1486    JobLocalTaskCount = spank_sys::spank_item_S_JOB_LOCAL_TASK_COUNT,
1487    JobTotalTaskCount = spank_sys::spank_item_S_JOB_TOTAL_TASK_COUNT,
1488    JobNcpus = spank_sys::spank_item_S_JOB_NCPUS,
1489    JobArgv = spank_sys::spank_item_S_JOB_ARGV,
1490    JobEnv = spank_sys::spank_item_S_JOB_ENV,
1491    TaskId = spank_sys::spank_item_S_TASK_ID,
1492    TaskGlobalId = spank_sys::spank_item_S_TASK_GLOBAL_ID,
1493    TaskExitStatus = spank_sys::spank_item_S_TASK_EXIT_STATUS,
1494    TaskPid = spank_sys::spank_item_S_TASK_PID,
1495    JobPidToGlobalId = spank_sys::spank_item_S_JOB_PID_TO_GLOBAL_ID,
1496    JobPidToLocalId = spank_sys::spank_item_S_JOB_PID_TO_LOCAL_ID,
1497    JobLocalToGlobalId = spank_sys::spank_item_S_JOB_LOCAL_TO_GLOBAL_ID,
1498    JobGlobalToLocalId = spank_sys::spank_item_S_JOB_GLOBAL_TO_LOCAL_ID,
1499    JobSupplementaryGids = spank_sys::spank_item_S_JOB_SUPPLEMENTARY_GIDS,
1500    SlurmVersion = spank_sys::spank_item_S_SLURM_VERSION,
1501    SlurmVersionMajor = spank_sys::spank_item_S_SLURM_VERSION_MAJOR,
1502    SlurmVersionMinor = spank_sys::spank_item_S_SLURM_VERSION_MINOR,
1503    SlurmVersionMicro = spank_sys::spank_item_S_SLURM_VERSION_MICRO,
1504    StepCpusPerTask = spank_sys::spank_item_S_STEP_CPUS_PER_TASK,
1505    JobAllocCores = spank_sys::spank_item_S_JOB_ALLOC_CORES,
1506    JobAllocMem = spank_sys::spank_item_S_JOB_ALLOC_MEM,
1507    StepAllocCores = spank_sys::spank_item_S_STEP_ALLOC_CORES,
1508    StepAllocMem = spank_sys::spank_item_S_STEP_ALLOC_MEM,
1509    SlurmRestartCount = spank_sys::spank_item_S_SLURM_RESTART_COUNT,
1510    JobArrayId = spank_sys::spank_item_S_JOB_ARRAY_ID,
1511    JobArrayTaskId = spank_sys::spank_item_S_JOB_ARRAY_TASK_ID,
1512}
1513
1514#[derive(Debug, Copy, Clone, PartialEq, Eq, IntoPrimitive, FromPrimitive)]
1515#[repr(u32)]
1516/// Errors returned by the underlying SPANK API
1517pub enum SpankApiError {
1518    #[num_enum(default)]
1519    Generic = spank_sys::slurm_err_t_ESPANK_ERROR,
1520    BadArg = spank_sys::slurm_err_t_ESPANK_BAD_ARG,
1521    NotTask = spank_sys::slurm_err_t_ESPANK_NOT_TASK,
1522    EnvExists = spank_sys::slurm_err_t_ESPANK_ENV_EXISTS,
1523    EnvNotExist = spank_sys::slurm_err_t_ESPANK_ENV_NOEXIST,
1524    NoSpace = spank_sys::slurm_err_t_ESPANK_NOSPACE,
1525    NotRemote = spank_sys::slurm_err_t_ESPANK_NOT_REMOTE,
1526    NoExist = spank_sys::slurm_err_t_ESPANK_NOEXIST,
1527    NotExecd = spank_sys::slurm_err_t_ESPANK_NOT_EXECD,
1528    NotAvail = spank_sys::slurm_err_t_ESPANK_NOT_AVAIL,
1529    NotLocal = spank_sys::slurm_err_t_ESPANK_NOT_LOCAL,
1530}
1531
1532impl Error for SpankApiError {}
1533
1534impl fmt::Display for SpankApiError {
1535    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1536        let cerr = unsafe { CStr::from_ptr(spank_sys::spank_strerror(*self as u32)) };
1537
1538        if let Ok(err) = cerr.to_str() {
1539            write!(f, "{}", err)
1540        } else {
1541            write!(f, "Unknown Error")
1542        }
1543    }
1544}
1545
1546impl Error for SpankError {}
1547
1548#[derive(Debug, Clone)]
1549/// Main Error enum for interfaces provided by this crate
1550pub enum SpankError {
1551    CStringError(String),
1552    EnvExists(String),
1553    IdNotFound(u32),
1554    PidNotFound(pid_t),
1555    SpankAPI(String, SpankApiError),
1556    Utf8Error(String),
1557    Overflow(usize),
1558}
1559
1560impl SpankError {
1561    fn from_os_str(s: &OsStr) -> SpankError {
1562        SpankError::Utf8Error(s.to_string_lossy().to_string())
1563    }
1564    fn from_str(s: &str) -> SpankError {
1565        SpankError::CStringError(s.to_string())
1566    }
1567    fn from_cstr(s: &CStr) -> SpankError {
1568        SpankError::CStringError(s.to_string_lossy().to_string())
1569    }
1570    fn from_spank(name: &str, err: u32) -> SpankError {
1571        SpankError::SpankAPI(name.to_owned(), SpankApiError::from(err))
1572    }
1573    fn from_spank_item(name: &str, arg: SpankItem, err: u32) -> SpankError {
1574        SpankError::SpankAPI(format!("{}({:?})", name, arg), SpankApiError::from(err))
1575    }
1576}
1577
1578trait FromNoExist<T> {
1579    fn from_noexist(v: T) -> SpankError;
1580}
1581
1582impl FromNoExist<u32> for SpankError {
1583    fn from_noexist(v: u32) -> SpankError {
1584        SpankError::IdNotFound(v)
1585    }
1586}
1587
1588impl FromNoExist<pid_t> for SpankError {
1589    fn from_noexist(v: pid_t) -> SpankError {
1590        SpankError::PidNotFound(v)
1591    }
1592}
1593
1594impl fmt::Display for SpankError {
1595    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1596        match self {
1597            SpankError::SpankAPI(name, e) => {
1598                write!(f, "Error calling SPANK API function {}: {}", name, e)
1599            }
1600            SpankError::Utf8Error(s) => write!(f, "Cannot parse {} as UTF-8", s),
1601            SpankError::CStringError(s) => {
1602                write!(f, "String {} cannot be converted to a C string", s)
1603            }
1604            SpankError::EnvExists(s) => write!(
1605                f,
1606                "Environment variable {} exists and overwrite was not set",
1607                s
1608            ),
1609            SpankError::PidNotFound(p) => write!(f, "Could not find pid {}", p),
1610            SpankError::IdNotFound(i) => write!(f, "Could not find id {}", i),
1611            SpankError::Overflow(u) => write!(f, "Integer overflow: {}", u),
1612        }
1613    }
1614}
1615
1616#[derive(Debug, Copy, Clone, PartialEq, Eq, IntoPrimitive, TryFromPrimitive)]
1617#[repr(u32)]
1618/// Context in which a plugin is loaded during a Slurm job
1619pub enum Context {
1620    // We dont represent error here, as errors are better embedded in Results
1621    Local = spank_sys::spank_context_S_CTX_LOCAL,
1622    Remote = spank_sys::spank_context_S_CTX_REMOTE,
1623    Allocator = spank_sys::spank_context_S_CTX_ALLOCATOR,
1624    Slurmd = spank_sys::spank_context_S_CTX_SLURMD,
1625    JobScript = spank_sys::spank_context_S_CTX_JOB_SCRIPT,
1626}
1627
1628/// SPANK plugin command-line option that can be registered with
1629/// SpankHandle::register_option
1630pub struct SpankOption {
1631    name: String,
1632    arginfo: Option<String>,
1633    usage: Option<String>,
1634}
1635
1636impl SpankOption {
1637    pub fn new(name: &str) -> Self {
1638        SpankOption {
1639            name: name.to_string(),
1640            arginfo: None,
1641            usage: None,
1642        }
1643    }
1644    pub fn usage(mut self, usage: &str) -> Self {
1645        self.usage = Some(usage.to_string());
1646        self
1647    }
1648    pub fn takes_value(mut self, arg_name: &str) -> Self {
1649        self.arginfo = Some(arg_name.to_string());
1650        self
1651    }
1652}