treadmill_rs/api/
switchboard.rs

1pub mod jobs;
2pub mod supervisors;
3
4use crate::api::supervisor_puppet::ParameterValue;
5use crate::api::switchboard_supervisor::{
6    JobInitializingStage, JobUserExitStatus, RestartPolicy, RunningJobState,
7};
8use crate::image::manifest::ImageId;
9use base64::Engine;
10use chrono::{DateTime, Utc};
11use http::StatusCode;
12use serde::{Deserialize, Serialize};
13use serde_with::{base64::Base64, serde_as};
14use std::collections::HashMap;
15use std::fmt::{Display, Formatter};
16use subtle::{Choice, ConstantTimeEq};
17use uuid::Uuid;
18
19pub trait JsonProxiedStatus: Serialize + for<'de> Deserialize<'de> {
20    fn status_code(&self) -> StatusCode;
21}
22
23/// Request Body that [`login_handler`] expects.
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct LoginRequest {
26    pub user_identifier: String,
27    pub password: String,
28}
29
30#[serde_as]
31#[derive(Debug, Serialize, Deserialize, Eq, Copy, Clone)]
32// Use `serde_with::serde_as` since `serde` by itself doesn't support arrays larger than 32 items,
33// and also because `serde_with` has builtin base64-encoding support.
34pub struct AuthToken(#[serde_as(as = "Base64")] pub [u8; 128]);
35impl AuthToken {
36    pub fn encode_for_http(self) -> String {
37        base64::prelude::BASE64_STANDARD.encode(self.0)
38    }
39}
40impl ConstantTimeEq for AuthToken {
41    fn ct_eq(&self, other: &Self) -> Choice {
42        // IMPORTANT: use ConstantTimeEq to mitigate possible timing attacks:
43        // [`subtle::ConstantTimeEq`] is implemented for [u8] so this is sufficient
44        ConstantTimeEq::ct_eq(&self.0[..], &other.0[..])
45    }
46}
47impl PartialEq for AuthToken {
48    fn eq(&self, other: &Self) -> bool {
49        self.ct_eq(other).into()
50    }
51}
52
53/// Response Body that [`login_handler`] emits.
54///
55/// Indicates that the user successfully authenticated, and was issued `token`, which inherits the
56/// user's credentials, and will expire at `expires_at`.
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct LoginResponse {
59    pub token: AuthToken,
60    pub expires_at: DateTime<Utc>,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize)]
64#[serde(tag = "type")]
65#[serde(rename_all = "snake_case")]
66pub enum JobInitSpec {
67    /// Resume a previously started job.
68    ResumeJob { job_id: Uuid },
69
70    /// Restart a job.
71    RestartJob { job_id: Uuid },
72
73    /// Which image to base this job off. If the image is not locally cached
74    /// at the supervisor, it will be fetched using its manifest prior to
75    /// executing the job.
76    ///
77    /// Images are content-addressed by the SHA-256 digest of their
78    /// manifest.
79    Image { image_id: ImageId },
80}
81
82#[derive(Serialize, Deserialize, Debug, Clone)]
83#[serde(rename_all = "snake_case")]
84pub struct JobRequest {
85    /// What kind of job this is.
86    pub init_spec: JobInitSpec,
87
88    /// The set of initial SSH keys to deploy onto the image.
89    ///
90    /// The image's configuration of the Treadmill puppet daemon determines
91    /// how and whether these keys will be loaded.
92    pub ssh_keys: Vec<String>,
93
94    pub restart_policy: RestartPolicy,
95
96    /// A hash map of parameters provided to this job execution. These
97    /// parameters are provided to the puppet daemon.
98    pub parameters: HashMap<String, ParameterValue>,
99
100    /// The tag configuration.
101    ///
102    /// FIXME: TO BE SPECIFIED
103    pub tag_config: String,
104
105    #[serde(with = "crate::util::chrono::optional_duration")]
106    pub override_timeout: Option<chrono::Duration>,
107}
108
109// In accordance with the Job Lifecycle document.
110#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
111#[serde(rename_all = "snake_case")]
112pub enum ExitStatus {
113    SupervisorMatchError,
114    QueueTimeout,
115    InternalSupervisorError,
116    SupervisorHostStartFailure,
117    SupervisorDroppedJob,
118    JobCanceled,
119    JobTimeout,
120    WorkloadFinishedSuccess,
121    WorkloadFinishedError,
122    WorkloadFinishedUnknown,
123}
124impl From<JobUserExitStatus> for ExitStatus {
125    fn from(value: JobUserExitStatus) -> Self {
126        match value {
127            JobUserExitStatus::Success => Self::WorkloadFinishedSuccess,
128            JobUserExitStatus::Error => Self::WorkloadFinishedError,
129            JobUserExitStatus::Unknown => Self::WorkloadFinishedUnknown,
130        }
131    }
132}
133impl Display for ExitStatus {
134    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
135        let s = match self {
136            ExitStatus::SupervisorMatchError => "failed to match",
137            ExitStatus::QueueTimeout => "timed out in queue",
138            ExitStatus::InternalSupervisorError => "internal supervisor error",
139            ExitStatus::SupervisorHostStartFailure => "supervisor/host start failure",
140            ExitStatus::SupervisorDroppedJob => "supervisor dropped job",
141            ExitStatus::WorkloadFinishedError => "user-defined workload terminated with error",
142            ExitStatus::WorkloadFinishedSuccess => "user-defined workload terminated with success",
143            ExitStatus::WorkloadFinishedUnknown => {
144                "user-defined workload terminated with unknown status"
145            }
146            ExitStatus::JobCanceled => "canceled",
147            ExitStatus::JobTimeout => "job timed out while dispatched",
148        };
149        f.write_str(s)
150    }
151}
152/// Represents the finalized state of a job.
153#[derive(Debug, Clone, Serialize, Deserialize)]
154pub struct JobResult {
155    /// The job's ID.
156    pub job_id: Uuid,
157    /// If the job was dispatched, the ID of the supervisor it was dispatched on.
158    /// Otherwise [`None`].
159    pub supervisor_id: Option<Uuid>,
160    /// Exit status of the job.
161    pub exit_status: ExitStatus,
162    /// Optional output.
163    pub host_output: Option<String>,
164    /// Time at which the switchboard processed the termination.
165    pub terminated_at: DateTime<Utc>,
166}
167/// Represents a point in a job's lifecycle.
168#[derive(Debug, Clone, Serialize, Deserialize)]
169#[serde(tag = "type", rename_all = "snake_case")]
170pub enum JobState {
171    Queued,
172    Scheduled,
173    Initializing { stage: JobInitializingStage },
174    Ready,
175    Terminating,
176    Terminated,
177}
178impl TryFrom<JobState> for RunningJobState {
179    type Error = JobState;
180
181    fn try_from(value: JobState) -> Result<Self, Self::Error> {
182        match value {
183            JobState::Queued => Err(value),
184            JobState::Scheduled => Err(value),
185            JobState::Initializing { stage } => Ok(Self::Initializing { stage }),
186            JobState::Ready => Ok(Self::Ready),
187            JobState::Terminating => Ok(Self::Terminating),
188            JobState::Terminated => Err(value),
189        }
190    }
191}
192#[derive(Debug, Clone, Serialize, Deserialize)]
193#[serde(tag = "event_type", rename_all = "snake_case")]
194pub enum JobEvent {
195    StateTransition {
196        state: JobState,
197        status_message: Option<String>,
198    },
199    DeclareWorkloadExitStatus {
200        workload_exit_status: JobUserExitStatus,
201        workload_output: Option<String>,
202    },
203    SetExitStatus {
204        exit_status: ExitStatus,
205        status_message: Option<String>,
206    },
207    FinalizeResult {
208        job_result: JobResult,
209    },
210}
211
212#[derive(Debug, Clone, Serialize, Deserialize)]
213pub struct JobSshEndpoint {
214    pub host: String,
215    pub port: u16,
216}
217
218/// This is exclusively an API type
219#[derive(Debug, Clone, Serialize, Deserialize)]
220pub struct ExtendedJobState {
221    #[serde(flatten)]
222    pub state: JobState,
223    pub dispatched_to_supervisor: Option<Uuid>,
224    #[serde(default)] // Backwards-compatibility with clients which do not expect this field
225    pub ssh_endpoints: Option<Vec<JobSshEndpoint>>,
226    #[serde(default)] // Backwards-compatibility with clients which do not expect this field
227    pub ssh_user: Option<String>,
228    #[serde(default)] // Backwards-compatibility with clients which do not expect this field
229    pub ssh_host_keys: Option<Vec<String>>,
230    pub result: Option<JobResult>,
231}
232/// Represents the status of a job as of some point in time.
233#[derive(Debug, Clone, Serialize, Deserialize)]
234pub struct JobStatus {
235    pub state: ExtendedJobState,
236    pub as_of: DateTime<Utc>,
237}
238
239/// Note the difference with [`switchboard_supervisor`](super::switchboard_supervisor)'s
240/// [`SupervisorStatus`](super::switchboard_supervisor::ReportedSupervisorStatus): that one is concerned
241/// solely primarily with over-the-wire communication, so it does not have 'Disconnected' variants.
242/// However, on the switchboard side, we do need those, hence the differrent set of variants.
243#[derive(Debug, Clone, Serialize, Deserialize)]
244#[serde(tag = "status")]
245#[serde(rename_all = "snake_case")]
246pub enum SupervisorStatus {
247    Busy {
248        job_id: Uuid,
249        job_state: RunningJobState,
250    },
251    BusyDisconnected {
252        job_id: Uuid,
253        job_state: RunningJobState,
254    },
255    Idle,
256    Disconnected,
257}