Skip to main content

ryra_core/
deploy.rs

1//! Blue/green deploy: the runtime-agnostic step sequence for a zero-downtime
2//! color swap.
3//!
4//! A blue/green service runs two interchangeable slots, `blue` and `green`, on
5//! their own ports. At any moment one is *live* (Caddy routes to it) and the
6//! other is idle. A deploy readies the new version on the idle slot, proves it
7//! healthy, swaps the Caddy upstream over with a graceful reload (no dropped
8//! connections), then stops the old slot. Because the old slot lingers through
9//! the swap, rollback is just the reverse upstream swap — no rebuild.
10//!
11//! The *sequence* is identical regardless of how a slot is realized — that's
12//! the whole point. A slot is "an immutable artifact + a port", and the two
13//! runtimes only differ in how that artifact is produced:
14//!
15//! - **podman**: the artifact is an image; readying the idle slot is a
16//!   [`Step::PullImage`], and each color is its own quadlet/container. Immutable
17//!   for free, so this is the baseline and covers every language.
18//! - **native**: the artifact is the idle color's own working dir (a synced,
19//!   separately-built copy of the source), so a Python/C++/Node/Rust process
20//!   keeps serving from its slot while the new code builds in the other. Readying
21//!   the idle slot is a [`Step::Build`] in that slot's dir.
22//!
23//! Either way the swap below is the same five moves, which is why this lives in
24//! one runtime-agnostic builder.
25
26use crate::GeneratedFile;
27use crate::Step;
28use crate::registry::service_def::Color;
29
30/// systemd unit name (without the `.service` suffix) for one color slot.
31///
32/// Native units and podman quadlet-units both follow `<service>-<color>`, so
33/// the swap plan never has to branch on runtime to name what it starts and
34/// stops. Mirrors how a single-instance service's unit is just `<service>`.
35pub fn color_unit(service_name: &str, color: Color) -> String {
36    format!("{service_name}-{color}")
37}
38
39/// Expand a podman quadlet bundle for a blue/green service: replace the single
40/// main `<service>.container` with its two color variants
41/// (`<service>-blue.container`, `<service>-green.container`), leaving every aux
42/// quadlet (a bundled DB, a network, a volume) untouched — only the routable
43/// app container is doubled. The main container is identified by filename, the
44/// same convention the bundle renderer uses to inject ExecStartPre, so both
45/// slots inherit those hooks.
46pub fn expand_color_quadlets(files: Vec<GeneratedFile>, service_name: &str) -> Vec<GeneratedFile> {
47    let main = format!("{service_name}.container");
48    let mut out = Vec::with_capacity(files.len() + 1);
49    for f in files {
50        let is_main = f.path.file_name().and_then(|n| n.to_str()) == Some(main.as_str());
51        if is_main {
52            for color in [Color::Blue, Color::Green] {
53                out.push(GeneratedFile {
54                    path: f
55                        .path
56                        .with_file_name(color_quadlet_filename(service_name, color)),
57                    content: podman_color_quadlet(&f.content, service_name, color),
58                });
59            }
60        } else {
61            out.push(f);
62        }
63    }
64    out
65}
66
67/// Quadlet filename for one color slot: `<service>-<color>.container`.
68/// systemd's generator turns that into the `<service>-<color>.service` unit
69/// that [`color_unit`] names, so the two stay in lockstep.
70pub fn color_quadlet_filename(service_name: &str, color: Color) -> String {
71    format!("{service_name}-{color}.container")
72}
73
74/// Env-var name carrying one color slot's host port: `SERVICE_PORT_HTTP_BLUE`
75/// from the base `SERVICE_PORT_HTTP`. The two slots can't share a host port
76/// (only one process binds it), so a blue/green install allocates a pair and
77/// the renders below reference the color-specific one.
78pub fn color_port_var(base_port_var: &str, color: Color) -> String {
79    format!("{base_port_var}_{}", color.as_str().to_uppercase())
80}
81
82/// Rewrite a podman main-container quadlet into one color slot's variant:
83/// rename the container and point every published host port at the
84/// color-specific env var. The image, volumes, env file, and health command
85/// are untouched — both slots are the same artifact, differing only in
86/// identity and port. Aux quadlets (a bundled DB) are never colorized; only
87/// the routable app container is.
88pub fn podman_color_quadlet(content: &str, service_name: &str, color: Color) -> String {
89    let mut out = String::with_capacity(content.len() + 16);
90    for line in content.lines() {
91        let trimmed = line.trim_start();
92        if let Some(rest) = trimmed.strip_prefix("ContainerName=") {
93            let indent = &line[..line.len() - trimmed.len()];
94            // Only rewrite the app container's own name; defensively leave any
95            // already-suffixed name alone so a re-render is idempotent.
96            if rest.trim() == service_name {
97                out.push_str(&format!("{indent}ContainerName={service_name}-{color}\n"));
98                continue;
99            }
100        }
101        out.push_str(&colorize_port_vars(line, color));
102        out.push('\n');
103    }
104    out
105}
106
107/// Suffix every `${SERVICE_PORT_<NAME>}` reference on a line with the color
108/// (`${SERVICE_PORT_HTTP}` -> `${SERVICE_PORT_HTTP_BLUE}`). Leaves the
109/// container-port side of `PublishPort=host:container` and everything else
110/// untouched, since only the host port comes from a `SERVICE_PORT_*` var.
111fn colorize_port_vars(line: &str, color: Color) -> String {
112    const MARKER: &str = "${SERVICE_PORT_";
113    let suffix = format!("_{}", color.as_str().to_uppercase());
114    let mut out = String::with_capacity(line.len() + suffix.len());
115    let mut rest = line;
116    while let Some(pos) = rest.find(MARKER) {
117        let (before, from_marker) = rest.split_at(pos);
118        out.push_str(before);
119        // The var name runs from after the marker up to the closing `}`.
120        match from_marker[MARKER.len()..].find('}') {
121            Some(close_rel) => {
122                let close = MARKER.len() + close_rel;
123                out.push_str(&from_marker[..close]); // ${SERVICE_PORT_HTTP
124                // Don't double-suffix if it's already colorized.
125                if !from_marker[..close].ends_with(&suffix) {
126                    out.push_str(&suffix);
127                }
128                out.push('}');
129                rest = &from_marker[close + 1..];
130            }
131            // Malformed (no closing brace) — emit verbatim and stop scanning.
132            None => {
133                out.push_str(from_marker);
134                return out;
135            }
136        }
137    }
138    out.push_str(rest);
139    out
140}
141
142/// Render one color slot's systemd unit for a native (non-container) service.
143///
144/// Shares the shape of the single-instance native unit but with two
145/// differences that make blue/green work for *any* language: the process runs
146/// from the color's own isolated working dir (`colors/<color>/` — a synced,
147/// separately-built copy of the source, so an interpreted runtime's lazily-read
148/// source files can't be mutated out from under the live slot), and an explicit
149/// `Environment=` overrides the port so the two slots bind different ones. The
150/// `Environment=` line comes *after* `EnvironmentFile=` so it wins over the
151/// base `SERVICE_PORT_HTTP` from `.env`.
152pub fn native_color_unit(p: &NativeColorUnit) -> String {
153    format!(
154        "[Unit]\n\
155         Description={description} ({color})\n\
156         After=network.target\n\
157         \n\
158         [Service]\n\
159         Type=simple\n\
160         WorkingDirectory={workdir}\n\
161         EnvironmentFile={home}/.env\n\
162         Environment=SERVICE_HOME={home}\n\
163         Environment={port_var}={port}\n\
164         Environment=PATH=%h/.local/bin:%h/.cargo/bin:%h/.bun/bin:%h/.deno/bin:%h/go/bin:/usr/local/bin:/usr/bin:/bin\n\
165         ExecStart=/bin/sh -c 'exec {run}'\n\
166         Restart=always\n\
167         RestartSec=5\n\
168         \n\
169         [Install]\n\
170         WantedBy=default.target\n",
171        description = p.description,
172        color = p.color,
173        workdir = p.workdir,
174        home = p.home,
175        port_var = p.port_var,
176        port = p.port,
177        run = p.run,
178    )
179}
180
181/// Inputs to [`native_color_unit`]. `workdir` is the color's isolated slot
182/// dir; `port` is the slot's allocated host port; `run` is the service's
183/// `[service].run` command, executed unchanged in the slot dir.
184pub struct NativeColorUnit<'a> {
185    pub description: &'a str,
186    pub color: Color,
187    pub workdir: &'a str,
188    pub home: &'a str,
189    pub port_var: &'a str,
190    pub port: u16,
191    pub run: &'a str,
192}
193
194/// Everything [`color_swap_steps`] needs, assembled by the caller from the
195/// per-runtime render. Keeping this a plain data struct (rather than threading
196/// the registry/exposure through) makes the swap logic a pure function the
197/// tests can pin without a live host.
198pub struct ColorSwap {
199    pub service_name: String,
200    /// The slot serving traffic right now. The new version rolls onto
201    /// `live.other()`.
202    pub live: Color,
203    /// Readies the idle slot's artifact: a [`Step::Build`] (native) or
204    /// [`Step::PullImage`] (podman). `None` when the artifact is already in
205    /// place and nothing needs (re)building.
206    pub prepare: Option<Step>,
207    /// Health probe against the *idle* slot's own port — ryra won't move
208    /// traffic until this returns 200.
209    pub health_url: String,
210    pub health_timeout_secs: u32,
211    /// The re-rendered Caddyfile ([`Step::WriteFile`]) with the upstream
212    /// repointed at the idle color. `None` for a loopback install with no
213    /// Caddy route, where the swap still works (the new slot simply takes over
214    /// once the old one stops) but there's nothing to repoint.
215    pub caddy_rewrite: Option<Step>,
216}
217
218impl ColorSwap {
219    /// The slot the new version rolls onto — and the value the caller should
220    /// persist as the install's new `active_color` once the plan succeeds.
221    pub fn target(&self) -> Color {
222        self.live.other()
223    }
224}
225
226/// Build the ordered step list for a zero-downtime color swap.
227///
228/// The order is load-bearing: prepare and start the idle slot, *then* gate on
229/// its health, *then* swap Caddy, and only then stop the old slot. If the
230/// health gate times out the plan aborts before the Caddy swap, so the old slot
231/// is still live and still routed — a failed deploy is a no-op, not an outage.
232pub fn color_swap_steps(swap: ColorSwap) -> Vec<Step> {
233    let target = swap.target();
234    let start_unit = color_unit(&swap.service_name, target);
235    let stop_unit = color_unit(&swap.service_name, swap.live);
236
237    let mut steps = Vec::new();
238    if let Some(prepare) = swap.prepare {
239        steps.push(prepare);
240    }
241    steps.push(Step::StartService { unit: start_unit });
242    steps.push(Step::WaitForHttpHealthy {
243        url: swap.health_url,
244        expect_status: 200,
245        timeout_secs: swap.health_timeout_secs,
246    });
247    // Atomic cutover: rewrite the upstream, then reload Caddy (graceful — it
248    // drains in-flight requests on the old upstream rather than dropping them).
249    if let Some(rewrite) = swap.caddy_rewrite {
250        steps.push(rewrite);
251        steps.push(Step::ReloadCaddy);
252    }
253    steps.push(Step::StopService { unit: stop_unit });
254    steps
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260    use crate::GeneratedFile;
261    use std::path::PathBuf;
262
263    fn caddy_write() -> Step {
264        Step::WriteFile(GeneratedFile {
265            path: PathBuf::from("/etc/caddy/Caddyfile"),
266            content: "reverse_proxy app-green:8080".into(),
267        })
268    }
269
270    #[test]
271    fn target_is_the_other_color() {
272        let swap = ColorSwap {
273            service_name: "app".into(),
274            live: Color::Blue,
275            prepare: None,
276            health_url: "http://127.0.0.1:9001/healthz".into(),
277            health_timeout_secs: 60,
278            caddy_rewrite: None,
279        };
280        assert_eq!(swap.target(), Color::Green);
281    }
282
283    #[test]
284    fn podman_swap_has_canonical_order() {
285        let steps = color_swap_steps(ColorSwap {
286            service_name: "app".into(),
287            live: Color::Green,
288            prepare: Some(Step::PullImage {
289                image: "ghcr.io/me/app:v2".into(),
290            }),
291            health_url: "http://127.0.0.1:9001/healthz".into(),
292            health_timeout_secs: 60,
293            caddy_rewrite: Some(caddy_write()),
294        });
295        // prepare -> start idle (blue) -> health -> caddy write -> reload -> stop old (green)
296        assert!(matches!(steps[0], Step::PullImage { .. }));
297        assert!(matches!(&steps[1], Step::StartService { unit } if unit == "app-blue"));
298        assert!(matches!(steps[2], Step::WaitForHttpHealthy { .. }));
299        assert!(matches!(steps[3], Step::WriteFile(_)));
300        assert!(matches!(steps[4], Step::ReloadCaddy));
301        assert!(matches!(&steps[5], Step::StopService { unit } if unit == "app-green"));
302        assert_eq!(steps.len(), 6);
303    }
304
305    #[test]
306    fn native_swap_builds_the_idle_slot_first() {
307        let steps = color_swap_steps(ColorSwap {
308            service_name: "api".into(),
309            live: Color::Blue,
310            prepare: Some(Step::Build {
311                dir: PathBuf::from("/srv/api/colors/green"),
312                command: "cargo build --release".into(),
313            }),
314            health_url: "http://127.0.0.1:9002/healthz".into(),
315            health_timeout_secs: 120,
316            caddy_rewrite: Some(caddy_write()),
317        });
318        // The build runs in the *idle* (green) slot's dir, never touching the
319        // live (blue) slot still serving — language-agnostic isolation.
320        match &steps[0] {
321            Step::Build { dir, .. } => assert!(dir.ends_with("colors/green")),
322            _ => panic!("expected Build step first"),
323        }
324        assert!(matches!(&steps[1], Step::StartService { unit } if unit == "api-green"));
325        assert!(matches!(&steps[5], Step::StopService { unit } if unit == "api-blue"));
326    }
327
328    // --- render transforms ---------------------------------------------
329
330    const AUTHELIA_QUADLET: &str = "\
331[Container]
332Image=docker.io/authelia/authelia:4.39
333ContainerName=authelia
334Network=authelia.network
335PublishPort=${SERVICE_PORT_HTTP}:9091
336Volume=${SERVICE_HOME}/config:/config:Z
337EnvironmentFile=%h/.local/share/services/authelia/.env
338";
339
340    #[test]
341    fn podman_quadlet_renames_container_and_colorizes_port() {
342        let blue = podman_color_quadlet(AUTHELIA_QUADLET, "authelia", Color::Blue);
343        assert!(blue.contains("ContainerName=authelia-blue"));
344        assert!(!blue.contains("ContainerName=authelia\n"));
345        assert!(blue.contains("PublishPort=${SERVICE_PORT_HTTP_BLUE}:9091"));
346        // Image, network, volume, env file untouched — same artifact.
347        assert!(blue.contains("Image=docker.io/authelia/authelia:4.39"));
348        assert!(blue.contains("Network=authelia.network"));
349        assert!(blue.contains("services/authelia/.env"));
350
351        let green = podman_color_quadlet(AUTHELIA_QUADLET, "authelia", Color::Green);
352        assert!(green.contains("ContainerName=authelia-green"));
353        assert!(green.contains("PublishPort=${SERVICE_PORT_HTTP_GREEN}:9091"));
354    }
355
356    #[test]
357    fn podman_quadlet_render_is_idempotent() {
358        // Re-rendering an already-colorized quadlet must not double-suffix.
359        let once = podman_color_quadlet(AUTHELIA_QUADLET, "authelia", Color::Blue);
360        let twice = podman_color_quadlet(&once, "authelia", Color::Blue);
361        assert_eq!(once, twice);
362    }
363
364    #[test]
365    fn color_port_var_appends_uppercased_color() {
366        assert_eq!(
367            color_port_var("SERVICE_PORT_HTTP", Color::Blue),
368            "SERVICE_PORT_HTTP_BLUE"
369        );
370        assert_eq!(
371            color_port_var("SERVICE_PORT_HTTP", Color::Green),
372            "SERVICE_PORT_HTTP_GREEN"
373        );
374    }
375
376    #[test]
377    fn native_color_unit_isolates_workdir_and_overrides_port() {
378        let unit = native_color_unit(&NativeColorUnit {
379            description: "Demo API",
380            color: Color::Green,
381            workdir: "/home/u/.local/share/services/api/colors/green",
382            home: "/home/u/.local/share/services/api",
383            port_var: "SERVICE_PORT_HTTP",
384            port: 9002,
385            run: "python -m app",
386        });
387        assert!(unit.contains("WorkingDirectory=/home/u/.local/share/services/api/colors/green"));
388        // The port override must come AFTER EnvironmentFile so it wins.
389        let envfile = unit.find("EnvironmentFile=").unwrap();
390        let port_override = unit.find("Environment=SERVICE_PORT_HTTP=9002").unwrap();
391        assert!(
392            port_override > envfile,
393            "port override must follow EnvironmentFile"
394        );
395        assert!(unit.contains("ExecStart=/bin/sh -c 'exec python -m app'"));
396        assert!(unit.contains("Description=Demo API (green)"));
397    }
398
399    // --- plan + render consistency, across several service shapes -------
400
401    /// The unit names the swap plan starts/stops MUST match the unit names the
402    /// renders produce, or a deploy would start a slot that doesn't exist.
403    /// This pins that contract for a podman service end to end.
404    #[test]
405    fn e2e_podman_service_plan_matches_rendered_slots() {
406        let svc = "authelia";
407        let live = Color::Blue;
408
409        // Render both slots the way the install path would.
410        let blue_file = color_quadlet_filename(svc, Color::Blue);
411        let green_file = color_quadlet_filename(svc, Color::Green);
412        assert_eq!(blue_file, "authelia-blue.container");
413        assert_eq!(green_file, "authelia-green.container");
414
415        // Build the deploy plan (live=blue, so it rolls onto green).
416        let swap = ColorSwap {
417            service_name: svc.into(),
418            live,
419            prepare: Some(Step::PullImage {
420                image: "authelia:4.40".into(),
421            }),
422            health_url: "http://127.0.0.1:9002/api/health".into(),
423            health_timeout_secs: 60,
424            caddy_rewrite: Some(caddy_write()),
425        };
426        let target = swap.target();
427        let steps = color_swap_steps(swap);
428
429        // The unit started is the green slot's unit, whose quadlet file we render.
430        let started = match &steps[1] {
431            Step::StartService { unit } => unit.clone(),
432            _ => panic!("expected StartService at index 1"),
433        };
434        assert_eq!(started, color_unit(svc, target));
435        assert_eq!(format!("{started}.container"), green_file);
436    }
437
438    /// Same contract for a native (here: Python) service — proving the swap
439    /// choreography is runtime-agnostic.
440    #[test]
441    fn e2e_native_python_service_plan_matches_rendered_slots() {
442        let svc = "api";
443        let green_unit = native_color_unit(&NativeColorUnit {
444            description: "API",
445            color: Color::Green,
446            workdir: "/srv/api/colors/green",
447            home: "/srv/api",
448            port_var: "SERVICE_PORT_HTTP",
449            port: 9002,
450            run: "python -m app",
451        });
452        // The rendered green slot runs on 9002; the plan's health probe must
453        // hit that same port.
454        assert!(green_unit.contains("Environment=SERVICE_PORT_HTTP=9002"));
455
456        let steps = color_swap_steps(ColorSwap {
457            service_name: svc.into(),
458            live: Color::Blue,
459            prepare: Some(Step::Build {
460                dir: "/srv/api/colors/green".into(),
461                command: "pip install -r requirements.txt".into(),
462            }),
463            health_url: "http://127.0.0.1:9002/healthz".into(),
464            health_timeout_secs: 90,
465            caddy_rewrite: None,
466        });
467        assert!(matches!(&steps[1], Step::StartService { unit } if unit == "api-green"));
468        match &steps[0] {
469            Step::Build { dir, .. } => assert_eq!(dir.to_str().unwrap(), "/srv/api/colors/green"),
470            _ => panic!("expected Build in the green slot dir"),
471        }
472    }
473
474    #[test]
475    fn loopback_install_skips_caddy_but_still_swaps() {
476        let steps = color_swap_steps(ColorSwap {
477            service_name: "app".into(),
478            live: Color::Blue,
479            prepare: None,
480            health_url: "http://127.0.0.1:9002/healthz".into(),
481            health_timeout_secs: 30,
482            caddy_rewrite: None,
483        });
484        // No prepare, no caddy: start idle -> health -> stop old.
485        assert!(matches!(&steps[0], Step::StartService { unit } if unit == "app-green"));
486        assert!(matches!(steps[1], Step::WaitForHttpHealthy { .. }));
487        assert!(matches!(&steps[2], Step::StopService { unit } if unit == "app-blue"));
488        assert!(!steps.iter().any(|s| matches!(s, Step::ReloadCaddy)));
489        assert_eq!(steps.len(), 3);
490    }
491}