forge_orchestration/sdk/
client.rs1use super::{forge_api_url, SdkError, SdkResult, FORGE_API_ENV};
4use reqwest::Client;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use std::time::Duration;
8use tracing::{debug, info};
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct JobInfo {
13 pub id: String,
15 pub name: String,
17 pub status: String,
19 pub groups: Vec<GroupInfo>,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct GroupInfo {
26 pub name: String,
28 pub count: u32,
30 pub desired: u32,
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct AllocationInfo {
37 pub id: String,
39 pub job_id: String,
41 pub task_group: String,
43 pub node_id: String,
45 pub status: String,
47}
48
49#[derive(Clone)]
51pub struct ForgeClient {
52 client: Client,
53 base_url: String,
54}
55
56impl ForgeClient {
57 pub fn from_env() -> SdkResult<Self> {
59 let base_url = forge_api_url().ok_or(SdkError::NotConfigured(FORGE_API_ENV))?;
60 Self::new(base_url)
61 }
62
63 pub fn new(base_url: impl Into<String>) -> SdkResult<Self> {
65 let client = Client::builder()
66 .timeout(Duration::from_secs(30))
67 .build()
68 .map_err(|e| SdkError::api(format!("Failed to create HTTP client: {}", e)))?;
69
70 Ok(Self {
71 client,
72 base_url: base_url.into().trim_end_matches('/').to_string(),
73 })
74 }
75
76 fn url(&self, path: &str) -> String {
77 format!("{}{}", self.base_url, path)
78 }
79
80 pub async fn health(&self) -> SdkResult<bool> {
82 let resp = self.client.get(self.url("/health")).send().await?;
83 Ok(resp.status().is_success())
84 }
85
86 pub async fn list_jobs(&self) -> SdkResult<Vec<JobInfo>> {
88 let resp = self
89 .client
90 .get(self.url("/api/v1/jobs"))
91 .send()
92 .await?
93 .error_for_status()
94 .map_err(|e| SdkError::api(e.to_string()))?;
95
96 resp.json()
97 .await
98 .map_err(|e| SdkError::api(e.to_string()))
99 }
100
101 pub async fn get_job(&self, job_id: &str) -> SdkResult<JobInfo> {
103 let resp = self
104 .client
105 .get(self.url(&format!("/api/v1/jobs/{}", job_id)))
106 .send()
107 .await?
108 .error_for_status()
109 .map_err(|e| SdkError::api(e.to_string()))?;
110
111 resp.json()
112 .await
113 .map_err(|e| SdkError::api(e.to_string()))
114 }
115
116 pub async fn get_allocations(&self, job_id: &str) -> SdkResult<Vec<AllocationInfo>> {
118 let resp = self
119 .client
120 .get(self.url(&format!("/api/v1/jobs/{}/allocations", job_id)))
121 .send()
122 .await?
123 .error_for_status()
124 .map_err(|e| SdkError::api(e.to_string()))?;
125
126 resp.json()
127 .await
128 .map_err(|e| SdkError::api(e.to_string()))
129 }
130
131 pub async fn report_metrics(&self, metrics: &MetricsReport) -> SdkResult<()> {
133 self.client
134 .post(self.url("/api/v1/metrics"))
135 .json(metrics)
136 .send()
137 .await?
138 .error_for_status()
139 .map_err(|e| SdkError::api(e.to_string()))?;
140
141 debug!("Metrics reported");
142 Ok(())
143 }
144
145 pub async fn heartbeat(&self, alloc_id: &str, task: &str) -> SdkResult<()> {
147 let payload = serde_json::json!({
148 "alloc_id": alloc_id,
149 "task": task,
150 "timestamp": chrono::Utc::now().to_rfc3339()
151 });
152
153 self.client
154 .post(self.url("/api/v1/heartbeat"))
155 .json(&payload)
156 .send()
157 .await?
158 .error_for_status()
159 .map_err(|e| SdkError::api(e.to_string()))?;
160
161 debug!(alloc_id = %alloc_id, "Heartbeat sent");
162 Ok(())
163 }
164}
165
166#[derive(Debug, Clone, Serialize, Deserialize)]
168pub struct MetricsReport {
169 pub alloc_id: String,
171 pub task: String,
173 pub cpu: f64,
175 pub memory: f64,
177 #[serde(default)]
179 pub custom: HashMap<String, f64>,
180}
181
182impl MetricsReport {
183 pub fn new(alloc_id: impl Into<String>, task: impl Into<String>) -> Self {
185 Self {
186 alloc_id: alloc_id.into(),
187 task: task.into(),
188 cpu: 0.0,
189 memory: 0.0,
190 custom: HashMap::new(),
191 }
192 }
193
194 pub fn cpu(mut self, cpu: f64) -> Self {
196 self.cpu = cpu.clamp(0.0, 1.0);
197 self
198 }
199
200 pub fn memory(mut self, memory: f64) -> Self {
202 self.memory = memory.clamp(0.0, 1.0);
203 self
204 }
205
206 pub fn metric(mut self, name: impl Into<String>, value: f64) -> Self {
208 self.custom.insert(name.into(), value);
209 self
210 }
211}
212
213pub fn start_heartbeat(client: ForgeClient, alloc_id: String, task: String, interval_secs: u64) {
215 tokio::spawn(async move {
216 let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
217
218 loop {
219 interval.tick().await;
220
221 if let Err(e) = client.heartbeat(&alloc_id, &task).await {
222 tracing::warn!(error = %e, "Heartbeat failed");
223 }
224 }
225 });
226
227 info!(interval = interval_secs, "Heartbeat task started");
228}