Skip to main content

mockforge_registry_server/deployment/
flyio.rs

1//! Fly.io API integration for deploying mock services
2
3use anyhow::{Context, Result};
4use serde::{Deserialize, Serialize};
5use std::collections::HashMap;
6
7/// Fly.io API client for managing deployments
8pub struct FlyioClient {
9    api_token: String,
10    base_url: String,
11}
12
13#[derive(Debug, Serialize, Deserialize)]
14pub struct FlyioApp {
15    pub id: String,
16    #[serde(default)]
17    pub name: Option<String>,
18    #[serde(default)]
19    pub hostname: Option<String>,
20    #[serde(default)]
21    pub organization: Option<FlyioOrganization>,
22    #[serde(default)]
23    pub status: Option<String>,
24}
25
26#[derive(Debug, Serialize, Deserialize)]
27pub struct FlyioOrganization {
28    pub id: String,
29    #[serde(default)]
30    pub name: Option<String>,
31    #[serde(default)]
32    pub slug: Option<String>,
33}
34
35#[derive(Debug, Serialize, Deserialize)]
36pub struct FlyioMachine {
37    pub id: String,
38    pub name: String,
39    pub state: String,
40    pub region: String,
41    pub image_ref: Option<FlyioImageRef>,
42    pub config: FlyioMachineConfig,
43}
44
45#[derive(Debug, Serialize, Deserialize)]
46pub struct FlyioImageRef {
47    pub registry: String,
48    pub repository: String,
49    pub tag: String,
50    pub digest: String,
51}
52
53#[derive(Debug, Serialize, Deserialize)]
54pub struct FlyioMachineConfig {
55    pub image: String,
56    pub env: HashMap<String, String>,
57    pub services: Vec<FlyioService>,
58    pub checks: Option<HashMap<String, FlyioCheck>>,
59    /// VM resource sizing. `None` means accept Fly's API default
60    /// (currently `shared-cpu-1x:256MB`); existing deployments and
61    /// non-plugin-enabled machines stay on that default. Cloud
62    /// plugins requires explicit sizing — see
63    /// `docs/plugins/security/cloud-runtime-sidecar-spike.md` for
64    /// the measured-on-Fly numbers and tier table.
65    #[serde(default, skip_serializing_if = "Option::is_none")]
66    pub guest: Option<FlyioGuest>,
67}
68
69/// VM sizing block sent to Fly Machines API as `config.guest`.
70/// Mirrors the API's expected JSON shape:
71/// <https://fly.io/docs/machines/api/machines-resource/#machine-config-object-properties>
72#[derive(Debug, Clone, Serialize, Deserialize)]
73pub struct FlyioGuest {
74    /// `"shared"` or `"performance"`.
75    pub cpu_kind: String,
76    pub cpus: u32,
77    pub memory_mb: u32,
78}
79
80impl FlyioGuest {
81    /// `shared-cpu-1x:256MB` — Fly's default for the registry today.
82    /// Used when no plugins are attached so existing hosted-mocks
83    /// keep their current footprint.
84    pub fn shared_256() -> Self {
85        Self {
86            cpu_kind: "shared".into(),
87            cpus: 1,
88            memory_mb: 256,
89        }
90    }
91
92    /// `shared-cpu-1x:512MB` — Pro tier with cloud plugins. Real-Fly
93    /// measurement on the spike showed mockforge + sidecar idle at
94    /// ~166 MB / 256 MB (65%), with only 71 MB headroom; bumping to
95    /// 512 MB gives 316 MB headroom — comfortable for ≤5 plugins
96    /// at typical sizes.
97    pub fn shared_512() -> Self {
98        Self {
99            cpu_kind: "shared".into(),
100            cpus: 1,
101            memory_mb: 512,
102        }
103    }
104
105    /// `shared-cpu-1x:1024MB` — Team tier with cloud plugins
106    /// (≤25 plugins per mock).
107    pub fn shared_1024() -> Self {
108        Self {
109            cpu_kind: "shared".into(),
110            cpus: 1,
111            memory_mb: 1024,
112        }
113    }
114
115    /// `shared-cpu-2x:2048MB` — Enterprise tier with metered
116    /// pass-through. Two cores so heavy plugin workloads don't
117    /// starve mockforge's request path.
118    pub fn shared_2x_2048() -> Self {
119        Self {
120            cpu_kind: "shared".into(),
121            cpus: 2,
122            memory_mb: 2048,
123        }
124    }
125
126    /// Pick the right Fly machine size for a hosted-mock deployment
127    /// based on org plan + whether cloud plugins are attached.
128    ///
129    /// Without plugins, every tier stays on the existing 256 MB
130    /// default — no behavior change for legacy deployments.
131    ///
132    /// With plugins, the floor bumps according to the
133    /// real-microVM measurements in
134    /// `docs/plugins/security/cloud-runtime-sidecar-spike.md`:
135    /// idle on 256 MB sits at ~166 MB / 65% utilization with only
136    /// ~71 MB headroom. That's not enough for the plugin runtime to
137    /// grow into without OOM-killing mockforge. 512 MB gives 316 MB
138    /// headroom which fits ≤5 plugins comfortably; Team's larger
139    /// plugin count needs 1024 MB.
140    pub fn for_hosted_mock(plan: &str, plugins_enabled: bool) -> Self {
141        if !plugins_enabled {
142            return Self::shared_256();
143        }
144        match plan.to_lowercase().as_str() {
145            "free" => Self::shared_256(), // Plugins not available on Free.
146            "pro" => Self::shared_512(),
147            "team" => Self::shared_1024(),
148            // Unknown / future plans (e.g. "enterprise") get the
149            // largest currently-available shape. The handler should
150            // refuse plugin attachment for unknown plans before
151            // ever reaching this; defaulting big is fail-safe.
152            _ => Self::shared_2x_2048(),
153        }
154    }
155}
156
157/// Registry authentication for pulling private Docker images
158#[derive(Debug, Serialize, Deserialize)]
159pub struct FlyioRegistryAuth {
160    pub server: String,
161    pub username: String,
162    pub password: String,
163}
164
165#[derive(Debug, Serialize, Deserialize)]
166pub struct FlyioService {
167    pub protocol: String,
168    pub internal_port: u16,
169    pub ports: Vec<FlyioPort>,
170}
171
172#[derive(Debug, Serialize, Deserialize)]
173pub struct FlyioPort {
174    pub port: u16,
175    pub handlers: Vec<String>,
176}
177
178#[derive(Debug, Serialize, Deserialize)]
179pub struct FlyioCheck {
180    #[serde(rename = "type")]
181    pub check_type: String,
182    pub port: u16,
183    pub grace_period: String,
184    pub interval: String,
185    pub method: String,
186    pub timeout: String,
187    pub tls_skip_verify: bool,
188    pub path: Option<String>,
189}
190
191impl FlyioClient {
192    pub fn new(api_token: String) -> Self {
193        Self {
194            api_token,
195            base_url: "https://api.machines.dev".to_string(),
196        }
197    }
198
199    /// Get the API token (used for Fly.io registry auth)
200    pub fn api_token(&self) -> &str {
201        &self.api_token
202    }
203
204    /// Create a new Fly.io app
205    pub async fn create_app(&self, app_name: &str, org_slug: &str) -> Result<FlyioApp> {
206        let client = reqwest::Client::new();
207        let url = format!("{}/v1/apps", self.base_url);
208
209        let payload = serde_json::json!({
210            "app_name": app_name,
211            "org_slug": org_slug,
212        });
213
214        let response = client
215            .post(&url)
216            .header("Authorization", format!("Bearer {}", self.api_token))
217            .header("Content-Type", "application/json")
218            .json(&payload)
219            .send()
220            .await
221            .context("Failed to create Fly.io app")?;
222
223        let status = response.status();
224        if !status.is_success() {
225            let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
226
227            // Handle "Name has already been taken" (422) — the app exists, fetch it instead
228            if status.as_u16() == 422 && error_text.contains("already been taken") {
229                tracing::info!("Fly.io app '{}' already exists, fetching existing app", app_name);
230                return self.get_app(app_name).await;
231            }
232
233            anyhow::bail!("Failed to create Fly.io app: {} - {}", status, error_text);
234        }
235
236        let app: FlyioApp = response.json().await.context("Failed to parse Fly.io app response")?;
237
238        Ok(app)
239    }
240
241    /// Create a machine (instance) for the app
242    pub async fn create_machine(
243        &self,
244        app_name: &str,
245        config: FlyioMachineConfig,
246        region: &str,
247        registry_auth: Option<FlyioRegistryAuth>,
248    ) -> Result<FlyioMachine> {
249        let client = reqwest::Client::new();
250        let url = format!("{}/v1/apps/{}/machines", self.base_url, app_name);
251
252        let mut payload = serde_json::json!({
253            "config": config,
254            "region": region,
255        });
256        if let Some(auth) = registry_auth {
257            payload["config"]["image_registry_auth"] =
258                serde_json::to_value(auth).context("Failed to serialize registry auth")?;
259        }
260
261        let response = client
262            .post(&url)
263            .header("Authorization", format!("Bearer {}", self.api_token))
264            .header("Content-Type", "application/json")
265            .json(&payload)
266            .send()
267            .await
268            .context("Failed to create Fly.io machine")?;
269
270        let status = response.status();
271        if !status.is_success() {
272            let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
273            anyhow::bail!("Failed to create Fly.io machine: {} - {}", status, error_text);
274        }
275
276        let machine: FlyioMachine =
277            response.json().await.context("Failed to parse Fly.io machine response")?;
278
279        Ok(machine)
280    }
281
282    /// Update a machine's configuration (image, env vars, etc.)
283    pub async fn update_machine(
284        &self,
285        app_name: &str,
286        machine_id: &str,
287        config: FlyioMachineConfig,
288        registry_auth: Option<FlyioRegistryAuth>,
289    ) -> Result<FlyioMachine> {
290        let client = reqwest::Client::new();
291        let url = format!("{}/v1/apps/{}/machines/{}", self.base_url, app_name, machine_id);
292
293        let mut payload = serde_json::json!({
294            "config": config,
295        });
296        if let Some(auth) = registry_auth {
297            payload["config"]["image_registry_auth"] =
298                serde_json::to_value(auth).context("Failed to serialize registry auth")?;
299        }
300
301        let response = client
302            .post(&url)
303            .header("Authorization", format!("Bearer {}", self.api_token))
304            .header("Content-Type", "application/json")
305            .json(&payload)
306            .send()
307            .await
308            .context("Failed to update Fly.io machine")?;
309
310        let status = response.status();
311        if !status.is_success() {
312            let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
313            anyhow::bail!("Failed to update Fly.io machine: {} - {}", status, error_text);
314        }
315
316        let machine: FlyioMachine =
317            response.json().await.context("Failed to parse Fly.io machine response")?;
318
319        Ok(machine)
320    }
321
322    /// Get machine status
323    pub async fn get_machine(&self, app_name: &str, machine_id: &str) -> Result<FlyioMachine> {
324        let client = reqwest::Client::new();
325        let url = format!("{}/v1/apps/{}/machines/{}", self.base_url, app_name, machine_id);
326
327        let response = client
328            .get(&url)
329            .header("Authorization", format!("Bearer {}", self.api_token))
330            .send()
331            .await
332            .context("Failed to get Fly.io machine")?;
333
334        let status = response.status();
335        if !status.is_success() {
336            let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
337            anyhow::bail!("Failed to get Fly.io machine: {} - {}", status, error_text);
338        }
339
340        let machine: FlyioMachine =
341            response.json().await.context("Failed to parse Fly.io machine response")?;
342
343        Ok(machine)
344    }
345
346    /// Stop a running machine (graceful shutdown, keeps the machine around
347    /// so it can be restarted later without recreating its config).
348    pub async fn stop_machine(&self, app_name: &str, machine_id: &str) -> Result<()> {
349        let client = reqwest::Client::new();
350        let url = format!("{}/v1/apps/{}/machines/{}/stop", self.base_url, app_name, machine_id);
351
352        let response = client
353            .post(&url)
354            .header("Authorization", format!("Bearer {}", self.api_token))
355            .header("Content-Type", "application/json")
356            .json(&serde_json::json!({}))
357            .send()
358            .await
359            .context("Failed to stop Fly.io machine")?;
360
361        let status = response.status();
362        if !status.is_success() {
363            let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
364            anyhow::bail!("Failed to stop Fly.io machine: {} - {}", status, error_text);
365        }
366
367        Ok(())
368    }
369
370    /// Start a stopped machine.
371    pub async fn start_machine(&self, app_name: &str, machine_id: &str) -> Result<()> {
372        let client = reqwest::Client::new();
373        let url = format!("{}/v1/apps/{}/machines/{}/start", self.base_url, app_name, machine_id);
374
375        let response = client
376            .post(&url)
377            .header("Authorization", format!("Bearer {}", self.api_token))
378            .header("Content-Type", "application/json")
379            .send()
380            .await
381            .context("Failed to start Fly.io machine")?;
382
383        let status = response.status();
384        if !status.is_success() {
385            let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
386            anyhow::bail!("Failed to start Fly.io machine: {} - {}", status, error_text);
387        }
388
389        Ok(())
390    }
391
392    /// Delete a machine
393    pub async fn delete_machine(&self, app_name: &str, machine_id: &str) -> Result<()> {
394        let client = reqwest::Client::new();
395        let url = format!("{}/v1/apps/{}/machines/{}", self.base_url, app_name, machine_id);
396
397        let response = client
398            .delete(&url)
399            .header("Authorization", format!("Bearer {}", self.api_token))
400            .send()
401            .await
402            .context("Failed to delete Fly.io machine")?;
403
404        let status = response.status();
405        if !status.is_success() {
406            let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
407            anyhow::bail!("Failed to delete Fly.io machine: {} - {}", status, error_text);
408        }
409
410        Ok(())
411    }
412
413    /// Delete a Fly.io app
414    pub async fn delete_app(&self, app_name: &str) -> Result<()> {
415        let client = reqwest::Client::new();
416        let url = format!("{}/v1/apps/{}", self.base_url, app_name);
417
418        let response = client
419            .delete(&url)
420            .header("Authorization", format!("Bearer {}", self.api_token))
421            .send()
422            .await
423            .context("Failed to delete Fly.io app")?;
424
425        let status = response.status();
426        if !status.is_success() {
427            let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
428            anyhow::bail!("Failed to delete Fly.io app: {} - {}", status, error_text);
429        }
430
431        Ok(())
432    }
433
434    /// Allocate a shared IPv4 and a dedicated IPv6 address for an app
435    pub async fn allocate_ips(&self, app_name: &str) -> Result<()> {
436        let client = reqwest::Client::new();
437        let graphql_url = "https://api.fly.io/graphql";
438
439        // Allocate shared IPv4
440        let ipv4_query = serde_json::json!({
441            "query": "mutation($input: AllocateIPAddressInput!) { allocateIpAddress(input: $input) { ipAddress { id address type } } }",
442            "variables": {
443                "input": {
444                    "appId": app_name,
445                    "type": "shared_v4"
446                }
447            }
448        });
449
450        let response = client
451            .post(graphql_url)
452            .header("Authorization", format!("Bearer {}", self.api_token))
453            .json(&ipv4_query)
454            .send()
455            .await
456            .context("Failed to allocate shared IPv4")?;
457
458        if !response.status().is_success() {
459            let error_text = response.text().await.unwrap_or_default();
460            anyhow::bail!("Failed to allocate shared IPv4: {}", error_text);
461        }
462
463        // Allocate IPv6
464        let ipv6_query = serde_json::json!({
465            "query": "mutation($input: AllocateIPAddressInput!) { allocateIpAddress(input: $input) { ipAddress { id address type } } }",
466            "variables": {
467                "input": {
468                    "appId": app_name,
469                    "type": "v6"
470                }
471            }
472        });
473
474        let response = client
475            .post(graphql_url)
476            .header("Authorization", format!("Bearer {}", self.api_token))
477            .json(&ipv6_query)
478            .send()
479            .await
480            .context("Failed to allocate IPv6")?;
481
482        if !response.status().is_success() {
483            let error_text = response.text().await.unwrap_or_default();
484            anyhow::bail!("Failed to allocate IPv6: {}", error_text);
485        }
486
487        Ok(())
488    }
489
490    /// Get app info
491    pub async fn get_app(&self, app_name: &str) -> Result<FlyioApp> {
492        let client = reqwest::Client::new();
493        let url = format!("{}/v1/apps/{}", self.base_url, app_name);
494
495        let response = client
496            .get(&url)
497            .header("Authorization", format!("Bearer {}", self.api_token))
498            .send()
499            .await
500            .context("Failed to get Fly.io app")?;
501
502        let status = response.status();
503        if !status.is_success() {
504            let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
505            anyhow::bail!("Failed to get Fly.io app: {} - {}", status, error_text);
506        }
507
508        let app: FlyioApp = response.json().await.context("Failed to parse Fly.io app response")?;
509
510        Ok(app)
511    }
512
513    /// Add a custom domain certificate to an app
514    ///
515    /// This tells Fly.io to provision a Let's Encrypt TLS certificate for the
516    /// given hostname and route traffic for that hostname to this app via SNI.
517    pub async fn add_certificate(&self, app_name: &str, hostname: &str) -> Result<()> {
518        let client = reqwest::Client::new();
519        let graphql_url = "https://api.fly.io/graphql";
520
521        let query = serde_json::json!({
522            "query": "mutation($appId: ID!, $hostname: String!) { addCertificate(appId: $appId, hostname: $hostname) { certificate { id hostname } } }",
523            "variables": {
524                "appId": app_name,
525                "hostname": hostname
526            }
527        });
528
529        let response = client
530            .post(graphql_url)
531            .header("Authorization", format!("Bearer {}", self.api_token))
532            .json(&query)
533            .send()
534            .await
535            .context("Failed to add certificate")?;
536
537        if !response.status().is_success() {
538            let error_text = response.text().await.unwrap_or_default();
539            anyhow::bail!("Failed to add certificate for {}: {}", hostname, error_text);
540        }
541
542        // Check for GraphQL-level errors
543        let body: serde_json::Value =
544            response.json().await.context("Failed to parse certificate response")?;
545        if let Some(errors) = body.get("errors") {
546            // "already exists" is fine — idempotent
547            let err_str = errors.to_string();
548            if !err_str.contains("already exists") {
549                anyhow::bail!("Failed to add certificate for {}: {}", hostname, err_str);
550            }
551        }
552
553        Ok(())
554    }
555
556    /// Remove a custom domain certificate from an app
557    pub async fn delete_certificate(&self, app_name: &str, hostname: &str) -> Result<()> {
558        let client = reqwest::Client::new();
559        let graphql_url = "https://api.fly.io/graphql";
560
561        let query = serde_json::json!({
562            "query": "mutation($appId: ID!, $hostname: String!) { deleteCertificate(appId: $appId, hostname: $hostname) { app { name } } }",
563            "variables": {
564                "appId": app_name,
565                "hostname": hostname
566            }
567        });
568
569        let response = client
570            .post(graphql_url)
571            .header("Authorization", format!("Bearer {}", self.api_token))
572            .json(&query)
573            .send()
574            .await
575            .context("Failed to delete certificate")?;
576
577        if !response.status().is_success() {
578            let error_text = response.text().await.unwrap_or_default();
579            anyhow::bail!("Failed to delete certificate for {}: {}", hostname, error_text);
580        }
581
582        Ok(())
583    }
584
585    /// List machines for an app
586    pub async fn list_machines(&self, app_name: &str) -> Result<Vec<FlyioMachine>> {
587        let client = reqwest::Client::new();
588        let url = format!("{}/v1/apps/{}/machines", self.base_url, app_name);
589
590        let response = client
591            .get(&url)
592            .header("Authorization", format!("Bearer {}", self.api_token))
593            .send()
594            .await
595            .context("Failed to list Fly.io machines")?;
596
597        let status = response.status();
598        if !status.is_success() {
599            let error_text = response.text().await.unwrap_or_else(|_| "Unknown error".to_string());
600            anyhow::bail!("Failed to list Fly.io machines: {} - {}", status, error_text);
601        }
602
603        let machines: Vec<FlyioMachine> =
604            response.json().await.context("Failed to parse Fly.io machines response")?;
605
606        Ok(machines)
607    }
608}
609
610#[cfg(test)]
611mod guest_tests {
612    use super::*;
613
614    #[test]
615    fn for_hosted_mock_no_plugins_uses_legacy_256() {
616        // Existing hosted-mocks without cloud plugins must keep
617        // their current 256 MB footprint — no surprise pricing
618        // bumps for legacy deployments.
619        for plan in ["free", "pro", "team", "enterprise", "weird-future-plan"] {
620            let g = FlyioGuest::for_hosted_mock(plan, false);
621            assert_eq!(g.memory_mb, 256, "plan {} without plugins must stay at 256MB", plan);
622            assert_eq!(g.cpu_kind, "shared");
623            assert_eq!(g.cpus, 1);
624        }
625    }
626
627    #[test]
628    fn for_hosted_mock_pro_with_plugins_bumps_to_512() {
629        let g = FlyioGuest::for_hosted_mock("pro", true);
630        assert_eq!(g.memory_mb, 512);
631        assert_eq!(g.cpus, 1);
632    }
633
634    #[test]
635    fn for_hosted_mock_team_with_plugins_bumps_to_1024() {
636        let g = FlyioGuest::for_hosted_mock("team", true);
637        assert_eq!(g.memory_mb, 1024);
638        assert_eq!(g.cpus, 1);
639    }
640
641    #[test]
642    fn for_hosted_mock_free_with_plugins_stays_256() {
643        // Plugins aren't available on Free — handler rejects before
644        // we get here — but if we do, don't accidentally upsize
645        // a Free org's machine.
646        let g = FlyioGuest::for_hosted_mock("free", true);
647        assert_eq!(g.memory_mb, 256);
648    }
649
650    #[test]
651    fn for_hosted_mock_unknown_plan_with_plugins_fails_safe_high() {
652        // Unknown plans (e.g. future "enterprise") get the largest
653        // shape rather than under-provisioning.
654        let g = FlyioGuest::for_hosted_mock("enterprise", true);
655        assert_eq!(g.memory_mb, 2048);
656        assert_eq!(g.cpus, 2);
657    }
658
659    #[test]
660    fn for_hosted_mock_plan_string_is_case_insensitive() {
661        let g = FlyioGuest::for_hosted_mock("PRO", true);
662        assert_eq!(g.memory_mb, 512);
663        let g = FlyioGuest::for_hosted_mock("Team", true);
664        assert_eq!(g.memory_mb, 1024);
665    }
666
667    #[test]
668    fn machine_config_serialization_omits_guest_when_none() {
669        // Existing call sites that pass `guest: None` (or use the
670        // legacy struct without the field via Default) must produce
671        // JSON without a `guest` key — so Fly's API default kicks in.
672        let cfg = FlyioMachineConfig {
673            image: "img".into(),
674            env: HashMap::new(),
675            services: vec![],
676            checks: None,
677            guest: None,
678        };
679        let json = serde_json::to_string(&cfg).unwrap();
680        assert!(!json.contains("guest"), "guest=None must be omitted, got {}", json);
681    }
682
683    #[test]
684    fn machine_config_serializes_guest_when_set() {
685        let cfg = FlyioMachineConfig {
686            image: "img".into(),
687            env: HashMap::new(),
688            services: vec![],
689            checks: None,
690            guest: Some(FlyioGuest::shared_512()),
691        };
692        let json = serde_json::to_string(&cfg).unwrap();
693        assert!(json.contains("\"guest\""));
694        assert!(json.contains("\"memory_mb\":512"));
695        assert!(json.contains("\"cpu_kind\":\"shared\""));
696    }
697}