forge_orchestration/sdk/
client.rs

1//! Forge API client for SDK
2
3use super::{forge_api_url, SdkError, SdkResult, FORGE_API_ENV};
4use reqwest::Client;
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7use std::time::Duration;
8use tracing::{debug, info};
9
10/// Job information from Forge API
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct JobInfo {
13    /// Job ID
14    pub id: String,
15    /// Job name
16    pub name: String,
17    /// Job status
18    pub status: String,
19    /// Task groups
20    pub groups: Vec<GroupInfo>,
21}
22
23/// Task group information
24#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct GroupInfo {
26    /// Group name
27    pub name: String,
28    /// Current instance count
29    pub count: u32,
30    /// Desired instance count
31    pub desired: u32,
32}
33
34/// Allocation information
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct AllocationInfo {
37    /// Allocation ID
38    pub id: String,
39    /// Job ID
40    pub job_id: String,
41    /// Task group
42    pub task_group: String,
43    /// Node ID
44    pub node_id: String,
45    /// Status
46    pub status: String,
47}
48
49/// Forge API client
50#[derive(Clone)]
51pub struct ForgeClient {
52    client: Client,
53    base_url: String,
54}
55
56impl ForgeClient {
57    /// Create a new client from environment
58    pub fn from_env() -> SdkResult<Self> {
59        let base_url = forge_api_url().ok_or(SdkError::NotConfigured(FORGE_API_ENV))?;
60        Self::new(base_url)
61    }
62
63    /// Create a new client with explicit URL
64    pub fn new(base_url: impl Into<String>) -> SdkResult<Self> {
65        let client = Client::builder()
66            .timeout(Duration::from_secs(30))
67            .build()
68            .map_err(|e| SdkError::api(format!("Failed to create HTTP client: {}", e)))?;
69
70        Ok(Self {
71            client,
72            base_url: base_url.into().trim_end_matches('/').to_string(),
73        })
74    }
75
76    fn url(&self, path: &str) -> String {
77        format!("{}{}", self.base_url, path)
78    }
79
80    /// Check API health
81    pub async fn health(&self) -> SdkResult<bool> {
82        let resp = self.client.get(self.url("/health")).send().await?;
83        Ok(resp.status().is_success())
84    }
85
86    /// List all jobs
87    pub async fn list_jobs(&self) -> SdkResult<Vec<JobInfo>> {
88        let resp = self
89            .client
90            .get(self.url("/api/v1/jobs"))
91            .send()
92            .await?
93            .error_for_status()
94            .map_err(|e| SdkError::api(e.to_string()))?;
95
96        resp.json()
97            .await
98            .map_err(|e| SdkError::api(e.to_string()))
99    }
100
101    /// Get job by ID
102    pub async fn get_job(&self, job_id: &str) -> SdkResult<JobInfo> {
103        let resp = self
104            .client
105            .get(self.url(&format!("/api/v1/jobs/{}", job_id)))
106            .send()
107            .await?
108            .error_for_status()
109            .map_err(|e| SdkError::api(e.to_string()))?;
110
111        resp.json()
112            .await
113            .map_err(|e| SdkError::api(e.to_string()))
114    }
115
116    /// Get allocations for a job
117    pub async fn get_allocations(&self, job_id: &str) -> SdkResult<Vec<AllocationInfo>> {
118        let resp = self
119            .client
120            .get(self.url(&format!("/api/v1/jobs/{}/allocations", job_id)))
121            .send()
122            .await?
123            .error_for_status()
124            .map_err(|e| SdkError::api(e.to_string()))?;
125
126        resp.json()
127            .await
128            .map_err(|e| SdkError::api(e.to_string()))
129    }
130
131    /// Report metrics to Forge
132    pub async fn report_metrics(&self, metrics: &MetricsReport) -> SdkResult<()> {
133        self.client
134            .post(self.url("/api/v1/metrics"))
135            .json(metrics)
136            .send()
137            .await?
138            .error_for_status()
139            .map_err(|e| SdkError::api(e.to_string()))?;
140
141        debug!("Metrics reported");
142        Ok(())
143    }
144
145    /// Send a heartbeat
146    pub async fn heartbeat(&self, alloc_id: &str, task: &str) -> SdkResult<()> {
147        let payload = serde_json::json!({
148            "alloc_id": alloc_id,
149            "task": task,
150            "timestamp": chrono::Utc::now().to_rfc3339()
151        });
152
153        self.client
154            .post(self.url("/api/v1/heartbeat"))
155            .json(&payload)
156            .send()
157            .await?
158            .error_for_status()
159            .map_err(|e| SdkError::api(e.to_string()))?;
160
161        debug!(alloc_id = %alloc_id, "Heartbeat sent");
162        Ok(())
163    }
164}
165
166/// Metrics report payload
167#[derive(Debug, Clone, Serialize, Deserialize)]
168pub struct MetricsReport {
169    /// Allocation ID
170    pub alloc_id: String,
171    /// Task name
172    pub task: String,
173    /// CPU utilization (0.0 - 1.0)
174    pub cpu: f64,
175    /// Memory utilization (0.0 - 1.0)
176    pub memory: f64,
177    /// Custom metrics
178    #[serde(default)]
179    pub custom: HashMap<String, f64>,
180}
181
182impl MetricsReport {
183    /// Create a new metrics report
184    pub fn new(alloc_id: impl Into<String>, task: impl Into<String>) -> Self {
185        Self {
186            alloc_id: alloc_id.into(),
187            task: task.into(),
188            cpu: 0.0,
189            memory: 0.0,
190            custom: HashMap::new(),
191        }
192    }
193
194    /// Set CPU utilization
195    pub fn cpu(mut self, cpu: f64) -> Self {
196        self.cpu = cpu.clamp(0.0, 1.0);
197        self
198    }
199
200    /// Set memory utilization
201    pub fn memory(mut self, memory: f64) -> Self {
202        self.memory = memory.clamp(0.0, 1.0);
203        self
204    }
205
206    /// Add a custom metric
207    pub fn metric(mut self, name: impl Into<String>, value: f64) -> Self {
208        self.custom.insert(name.into(), value);
209        self
210    }
211}
212
213/// Start a background heartbeat task
214pub fn start_heartbeat(client: ForgeClient, alloc_id: String, task: String, interval_secs: u64) {
215    tokio::spawn(async move {
216        let mut interval = tokio::time::interval(Duration::from_secs(interval_secs));
217
218        loop {
219            interval.tick().await;
220
221            if let Err(e) = client.heartbeat(&alloc_id, &task).await {
222                tracing::warn!(error = %e, "Heartbeat failed");
223            }
224        }
225    });
226
227    info!(interval = interval_secs, "Heartbeat task started");
228}