forge_orchestration/
job.rs

1//! Job and Task definitions for Forge orchestration
2//!
3//! ## Table of Contents
4//! - **Job**: Top-level workload definition (similar to K8s Deployment)
5//! - **TaskGroup**: Group of related tasks with shared lifecycle
6//! - **Task**: Individual executable unit
7//! - **Driver**: Execution driver (Exec, Docker, etc.)
8//! - **Resources**: CPU/Memory resource requirements
9
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12use uuid::Uuid;
13
14/// Execution driver for tasks
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
16pub enum Driver {
17    /// Execute a binary directly
18    Exec,
19    /// Run in a Docker container
20    Docker,
21    /// Run in a Podman container
22    Podman,
23    /// Run as a raw fork/exec
24    RawExec,
25    /// Java application
26    Java,
27    /// QEMU virtual machine
28    Qemu,
29}
30
31impl Default for Driver {
32    fn default() -> Self {
33        Self::Exec
34    }
35}
36
37/// Resource requirements for a task
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct Resources {
40    /// CPU in MHz
41    pub cpu: u32,
42    /// Memory in MB
43    pub memory: u32,
44    /// Disk in MB (optional)
45    pub disk: Option<u32>,
46    /// Network bandwidth in Mbps (optional)
47    pub network: Option<u32>,
48    /// GPU count (optional)
49    pub gpu: Option<u32>,
50}
51
52impl Resources {
53    /// Create new resource requirements
54    pub fn new(cpu: u32, memory: u32) -> Self {
55        Self {
56            cpu,
57            memory,
58            disk: None,
59            network: None,
60            gpu: None,
61        }
62    }
63
64    /// Set disk requirement
65    pub fn with_disk(mut self, disk: u32) -> Self {
66        self.disk = Some(disk);
67        self
68    }
69
70    /// Set network requirement
71    pub fn with_network(mut self, network: u32) -> Self {
72        self.network = Some(network);
73        self
74    }
75
76    /// Set GPU requirement
77    pub fn with_gpu(mut self, gpu: u32) -> Self {
78        self.gpu = Some(gpu);
79        self
80    }
81}
82
83impl Default for Resources {
84    fn default() -> Self {
85        Self::new(100, 128)
86    }
87}
88
89/// Scaling configuration for a task group
90#[derive(Debug, Clone, Serialize, Deserialize)]
91pub struct ScalingConfig {
92    /// Minimum instance count
93    pub min: u32,
94    /// Maximum instance count
95    pub max: u32,
96    /// Desired instance count
97    pub desired: u32,
98}
99
100impl ScalingConfig {
101    /// Create new scaling config
102    pub fn new(min: u32, max: u32) -> Self {
103        Self {
104            min,
105            max,
106            desired: min,
107        }
108    }
109
110    /// Set desired count
111    pub fn with_desired(mut self, desired: u32) -> Self {
112        self.desired = desired.clamp(self.min, self.max);
113        self
114    }
115}
116
117impl Default for ScalingConfig {
118    fn default() -> Self {
119        Self::new(1, 1)
120    }
121}
122
123/// Individual task within a task group
124#[derive(Debug, Clone, Serialize, Deserialize)]
125pub struct Task {
126    /// Task name
127    pub name: String,
128    /// Execution driver
129    pub driver: Driver,
130    /// Command to execute
131    pub command: Option<String>,
132    /// Command arguments
133    pub args: Vec<String>,
134    /// Environment variables
135    pub env: HashMap<String, String>,
136    /// Resource requirements
137    pub resources: Resources,
138    /// Artifact URLs to download
139    pub artifacts: Vec<String>,
140    /// Health check configuration
141    pub health_check: Option<HealthCheck>,
142    /// Task metadata
143    pub metadata: HashMap<String, String>,
144}
145
146impl Task {
147    /// Create a new task with the given name
148    pub fn new(name: impl Into<String>) -> Self {
149        Self {
150            name: name.into(),
151            driver: Driver::default(),
152            command: None,
153            args: Vec::new(),
154            env: HashMap::new(),
155            resources: Resources::default(),
156            artifacts: Vec::new(),
157            health_check: None,
158            metadata: HashMap::new(),
159        }
160    }
161
162    /// Set the execution driver
163    pub fn driver(mut self, driver: Driver) -> Self {
164        self.driver = driver;
165        self
166    }
167
168    /// Set the command to execute
169    pub fn command(mut self, cmd: impl Into<String>) -> Self {
170        self.command = Some(cmd.into());
171        self
172    }
173
174    /// Set command arguments
175    pub fn args(mut self, args: Vec<impl Into<String>>) -> Self {
176        self.args = args.into_iter().map(|a| a.into()).collect();
177        self
178    }
179
180    /// Add an environment variable
181    pub fn env(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
182        self.env.insert(key.into(), value.into());
183        self
184    }
185
186    /// Set resource requirements (CPU MHz, Memory MB)
187    pub fn resources(mut self, cpu: u32, memory: u32) -> Self {
188        self.resources = Resources::new(cpu, memory);
189        self
190    }
191
192    /// Set full resource configuration
193    pub fn with_resources(mut self, resources: Resources) -> Self {
194        self.resources = resources;
195        self
196    }
197
198    /// Add an artifact URL
199    pub fn artifact(mut self, url: impl Into<String>) -> Self {
200        self.artifacts.push(url.into());
201        self
202    }
203
204    /// Set health check
205    pub fn health_check(mut self, check: HealthCheck) -> Self {
206        self.health_check = Some(check);
207        self
208    }
209
210    /// Add metadata
211    pub fn metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
212        self.metadata.insert(key.into(), value.into());
213        self
214    }
215}
216
217/// Health check configuration
218#[derive(Debug, Clone, Serialize, Deserialize)]
219pub struct HealthCheck {
220    /// Check type
221    pub check_type: HealthCheckType,
222    /// Check interval in seconds
223    pub interval_secs: u32,
224    /// Timeout in seconds
225    pub timeout_secs: u32,
226    /// Path for HTTP checks
227    pub path: Option<String>,
228    /// Port for network checks
229    pub port: Option<u16>,
230}
231
232impl HealthCheck {
233    /// Create an HTTP health check
234    pub fn http(path: impl Into<String>, port: u16) -> Self {
235        Self {
236            check_type: HealthCheckType::Http,
237            interval_secs: 10,
238            timeout_secs: 2,
239            path: Some(path.into()),
240            port: Some(port),
241        }
242    }
243
244    /// Create a TCP health check
245    pub fn tcp(port: u16) -> Self {
246        Self {
247            check_type: HealthCheckType::Tcp,
248            interval_secs: 10,
249            timeout_secs: 2,
250            path: None,
251            port: Some(port),
252        }
253    }
254
255    /// Create a script health check
256    pub fn script() -> Self {
257        Self {
258            check_type: HealthCheckType::Script,
259            interval_secs: 30,
260            timeout_secs: 5,
261            path: None,
262            port: None,
263        }
264    }
265
266    /// Set check interval
267    pub fn interval(mut self, secs: u32) -> Self {
268        self.interval_secs = secs;
269        self
270    }
271
272    /// Set timeout
273    pub fn timeout(mut self, secs: u32) -> Self {
274        self.timeout_secs = secs;
275        self
276    }
277}
278
279/// Health check type
280#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
281pub enum HealthCheckType {
282    /// HTTP GET check
283    Http,
284    /// TCP connection check
285    Tcp,
286    /// Script execution check
287    Script,
288    /// gRPC health check
289    Grpc,
290}
291
292/// Group of related tasks
293#[derive(Debug, Clone, Serialize, Deserialize)]
294pub struct TaskGroup {
295    /// Group name
296    pub name: String,
297    /// Tasks in this group
298    pub tasks: Vec<Task>,
299    /// Scaling configuration
300    pub scaling: ScalingConfig,
301    /// Restart policy
302    pub restart_policy: RestartPolicy,
303    /// Network configuration
304    pub network: Option<NetworkConfig>,
305    /// Group metadata
306    pub metadata: HashMap<String, String>,
307}
308
309impl TaskGroup {
310    /// Create a new task group
311    pub fn new(name: impl Into<String>) -> Self {
312        Self {
313            name: name.into(),
314            tasks: Vec::new(),
315            scaling: ScalingConfig::default(),
316            restart_policy: RestartPolicy::default(),
317            network: None,
318            metadata: HashMap::new(),
319        }
320    }
321
322    /// Add a task to the group
323    pub fn task(mut self, task: Task) -> Self {
324        self.tasks.push(task);
325        self
326    }
327
328    /// Set scaling configuration
329    pub fn scaling(mut self, min: u32, max: u32) -> Self {
330        self.scaling = ScalingConfig::new(min, max);
331        self
332    }
333
334    /// Set restart policy
335    pub fn restart_policy(mut self, policy: RestartPolicy) -> Self {
336        self.restart_policy = policy;
337        self
338    }
339
340    /// Set network configuration
341    pub fn network(mut self, config: NetworkConfig) -> Self {
342        self.network = Some(config);
343        self
344    }
345}
346
347/// Restart policy for task groups
348#[derive(Debug, Clone, Serialize, Deserialize)]
349pub struct RestartPolicy {
350    /// Number of restart attempts
351    pub attempts: u32,
352    /// Delay between restarts in seconds
353    pub delay_secs: u32,
354    /// Restart mode
355    pub mode: RestartMode,
356}
357
358impl Default for RestartPolicy {
359    fn default() -> Self {
360        Self {
361            attempts: 3,
362            delay_secs: 15,
363            mode: RestartMode::Fail,
364        }
365    }
366}
367
368/// Restart mode
369#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
370pub enum RestartMode {
371    /// Fail after max attempts
372    Fail,
373    /// Keep retrying indefinitely
374    Delay,
375}
376
377/// Network configuration for task groups
378#[derive(Debug, Clone, Serialize, Deserialize)]
379pub struct NetworkConfig {
380    /// Network mode
381    pub mode: NetworkMode,
382    /// Port mappings
383    pub ports: Vec<PortMapping>,
384}
385
386impl NetworkConfig {
387    /// Create bridge network config
388    pub fn bridge() -> Self {
389        Self {
390            mode: NetworkMode::Bridge,
391            ports: Vec::new(),
392        }
393    }
394
395    /// Create host network config
396    pub fn host() -> Self {
397        Self {
398            mode: NetworkMode::Host,
399            ports: Vec::new(),
400        }
401    }
402
403    /// Add a port mapping
404    pub fn port(mut self, label: impl Into<String>, to: u16) -> Self {
405        self.ports.push(PortMapping {
406            label: label.into(),
407            to,
408            static_port: None,
409        });
410        self
411    }
412
413    /// Add a static port mapping
414    pub fn static_port(mut self, label: impl Into<String>, port: u16) -> Self {
415        self.ports.push(PortMapping {
416            label: label.into(),
417            to: port,
418            static_port: Some(port),
419        });
420        self
421    }
422}
423
424/// Network mode
425#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
426pub enum NetworkMode {
427    /// Bridge networking
428    Bridge,
429    /// Host networking
430    Host,
431    /// No networking
432    None,
433}
434
435/// Port mapping configuration
436#[derive(Debug, Clone, Serialize, Deserialize)]
437pub struct PortMapping {
438    /// Port label
439    pub label: String,
440    /// Container port
441    pub to: u16,
442    /// Static host port (if any)
443    pub static_port: Option<u16>,
444}
445
446/// Job state
447#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
448pub enum JobState {
449    /// Job is pending submission
450    Pending,
451    /// Job is running
452    Running,
453    /// Job completed successfully
454    Complete,
455    /// Job failed
456    Failed,
457    /// Job was stopped
458    Stopped,
459}
460
461/// Top-level job definition
462#[derive(Debug, Clone, Serialize, Deserialize)]
463pub struct Job {
464    /// Unique job ID
465    pub id: String,
466    /// Job name
467    pub name: String,
468    /// Job type
469    pub job_type: JobType,
470    /// Task groups
471    pub groups: Vec<TaskGroup>,
472    /// Job priority (0-100)
473    pub priority: u8,
474    /// Datacenter constraints
475    pub datacenters: Vec<String>,
476    /// Job state
477    pub state: JobState,
478    /// Job metadata
479    pub metadata: HashMap<String, String>,
480    /// Creation timestamp
481    pub created_at: chrono::DateTime<chrono::Utc>,
482}
483
484impl Job {
485    /// Create a new job with the given name
486    pub fn new(name: impl Into<String>) -> Self {
487        let name = name.into();
488        Self {
489            id: format!("{}-{}", &name, &Uuid::new_v4().to_string()[..8]),
490            name,
491            job_type: JobType::Service,
492            groups: Vec::new(),
493            priority: 50,
494            datacenters: vec!["dc1".to_string()],
495            state: JobState::Pending,
496            metadata: HashMap::new(),
497            created_at: chrono::Utc::now(),
498        }
499    }
500
501    /// Set job type
502    pub fn job_type(mut self, job_type: JobType) -> Self {
503        self.job_type = job_type;
504        self
505    }
506
507    /// Add a task group with a single task
508    pub fn with_group(mut self, group_name: impl Into<String>, task: Task) -> Self {
509        let group = TaskGroup::new(group_name).task(task);
510        self.groups.push(group);
511        self
512    }
513
514    /// Add a full task group
515    pub fn group(mut self, group: TaskGroup) -> Self {
516        self.groups.push(group);
517        self
518    }
519
520    /// Set job priority
521    pub fn priority(mut self, priority: u8) -> Self {
522        self.priority = priority.min(100);
523        self
524    }
525
526    /// Set datacenters
527    pub fn datacenters(mut self, dcs: Vec<impl Into<String>>) -> Self {
528        self.datacenters = dcs.into_iter().map(|d| d.into()).collect();
529        self
530    }
531
532    /// Add metadata
533    pub fn metadata(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
534        self.metadata.insert(key.into(), value.into());
535        self
536    }
537
538    /// Get total task count across all groups
539    pub fn task_count(&self) -> usize {
540        self.groups.iter().map(|g| g.tasks.len()).sum()
541    }
542
543    /// Get total desired instance count
544    pub fn desired_count(&self) -> u32 {
545        self.groups.iter().map(|g| g.scaling.desired).sum()
546    }
547}
548
549/// Job type
550#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
551pub enum JobType {
552    /// Long-running service
553    Service,
554    /// Batch job (runs to completion)
555    Batch,
556    /// System job (runs on all nodes)
557    System,
558    /// Parameterized job (dispatch-based)
559    Parameterized,
560}
561
562impl Default for JobType {
563    fn default() -> Self {
564        Self::Service
565    }
566}
567
568#[cfg(test)]
569mod tests {
570    use super::*;
571
572    #[test]
573    fn test_job_builder() {
574        let job = Job::new("my-service")
575            .job_type(JobType::Service)
576            .priority(75)
577            .with_group(
578                "api",
579                Task::new("server")
580                    .driver(Driver::Exec)
581                    .command("/usr/bin/server")
582                    .args(vec!["--port", "8080"])
583                    .resources(500, 256),
584            );
585
586        assert_eq!(job.name, "my-service");
587        assert_eq!(job.priority, 75);
588        assert_eq!(job.groups.len(), 1);
589        assert_eq!(job.groups[0].tasks[0].name, "server");
590    }
591
592    #[test]
593    fn test_task_group_scaling() {
594        let group = TaskGroup::new("workers")
595            .task(Task::new("worker"))
596            .scaling(2, 10);
597
598        assert_eq!(group.scaling.min, 2);
599        assert_eq!(group.scaling.max, 10);
600    }
601
602    #[test]
603    fn test_resources_builder() {
604        let res = Resources::new(1000, 512).with_gpu(2).with_disk(10000);
605
606        assert_eq!(res.cpu, 1000);
607        assert_eq!(res.memory, 512);
608        assert_eq!(res.gpu, Some(2));
609        assert_eq!(res.disk, Some(10000));
610    }
611}