Skip to main content

ryra_core/
upgrade.rs

1//! Diff and upgrade flows for already-installed services.
2//!
3//! "Upgrade" means: re-render an installed service's quadlet + configs
4//! against the current registry, replace any files whose content changed,
5//! and restart the unit. The render path is shared with `add_service`
6//! (driven via [`PlanMode::Upgrade`]); the side-effect steps differ.
7//!
8//! Drift detection is grounded in `service.manifest` — the per-install render
9//! manifest written by `ryra add`. Each tracked file is in one of these
10//! states:
11//!
12//! - **Unchanged**: on-disk content matches what the registry would render.
13//! - **Modified**: registry rendered output differs, but on-disk hash still
14//!   matches the manifest, so we know the file is ours and can be safely
15//!   overwritten.
16//! - **Drift**: on-disk hash matches *neither* the manifest nor the planned
17//!   content — i.e. the user hand-edited it. Refused without `--force`.
18//! - **Added**: file is in the planned set but not in the manifest (registry
19//!   added it).
20//! - **Removed**: file is in the manifest but not in the planned set (registry
21//!   stopped shipping it).
22//!
23//! `.env` is excluded throughout: it carries generated secrets that legitimately
24//! drift across restarts, and re-rendering it on upgrade would clobber rotated
25//! credentials. Its absence from the manifest is the source of truth for that.
26
27use std::collections::{BTreeMap, BTreeSet};
28use std::path::{Path, PathBuf};
29use std::time::SystemTime;
30
31use crate::error::{Error, Result};
32use crate::exposure::Exposure;
33use crate::generate::GeneratedFile;
34use crate::manifest;
35use crate::metadata::{Metadata, load_metadata};
36use crate::registry::resolve::ServiceRef;
37use crate::registry::service_def::{Color, DeployStrategy, Runtime};
38use crate::{
39    AddResult, PlanMode, REGISTRY_DEFAULT, Step, add_service, caddy, deploy, is_service_installed,
40    paths::metadata_path, resolve_registry_dir, service_home,
41};
42
43// --- Native source-staleness ("a rebuild would pick up new code") ----------
44//
45// Config drift is detected by `diff_service` (above). But a `runtime =
46// "native"` service can change *without* its rendered config changing: you
47// edit the source and a `cargo build` / `bun install` / restart would ship it.
48// `service.toml` is unchanged, so the diff is clean and the service still looks
49// up to date. This module fills that gap with a language-agnostic signal: did
50// any source file change since the running process last started?
51//
52// The signal is the running process's own start time (no state is written
53// anywhere): we ask systemd for the unit's MainPID and read its start time from
54// `/proc/<pid>/stat`, then flag staleness when any source file is newer. That
55// works for *anything* systemd can run (bash, Python, Node, Rust, C++, ...) --
56// we never inspect a toolchain or look for a "binary". It's a *hint*, not a
57// gate: the remedy is always an idempotent `ryra upgrade`, and the comparison
58// is read-only, so a false positive just costs a needless rebuild.
59
60/// Directory names never treated as source inputs: VCS metadata and the usual
61/// build-output / dependency dirs across ecosystems, plus any dotdir (`.git`,
62/// editor/tool state). Best-effort and language-agnostic -- staleness is a
63/// hint, so a missed exclusion at worst shows a spurious "upgrade available"
64/// that an idempotent `ryra upgrade` clears.
65const IGNORED_DIRS: &[&str] = &[
66    "target",
67    "node_modules",
68    "dist",
69    "build",
70    "out",
71    "vendor",
72    "__pycache__",
73    "venv",
74];
75
76/// True if any regular file under `dir` (skipping [`IGNORED_DIRS`] and dotdirs)
77/// was modified after `since`. Stops at the first newer file; symlinks are not
78/// followed. Unreadable dirs/files are skipped (a hint, not a hard check).
79fn any_file_newer_than(dir: &Path, since: SystemTime) -> bool {
80    let Ok(entries) = std::fs::read_dir(dir) else {
81        return false;
82    };
83    for entry in entries.flatten() {
84        let Ok(file_type) = entry.file_type() else {
85            continue;
86        };
87        let path = entry.path();
88        if file_type.is_dir() {
89            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
90            if name.starts_with('.') || IGNORED_DIRS.contains(&name) {
91                continue;
92            }
93            if any_file_newer_than(&path, since) {
94                return true;
95            }
96        } else if file_type.is_file()
97            && let Ok(mtime) = entry.metadata().and_then(|m| m.modified())
98            && mtime > since
99        {
100            return true;
101        }
102    }
103    false
104}
105
106/// Rebuild the `ServiceRef` we stashed at install time (mirrors `replan`), so
107/// the source dir can be resolved the same way an upgrade would.
108fn service_ref_for(metadata: &Metadata, service_name: &str) -> ServiceRef {
109    if metadata.registry.is_empty() || metadata.registry == REGISTRY_DEFAULT {
110        ServiceRef::Default(service_name.to_string())
111    } else if crate::registry::resolve::is_path_like(&metadata.registry) {
112        ServiceRef::Path {
113            dir: PathBuf::from(&metadata.registry),
114            name: service_name.to_string(),
115        }
116    } else {
117        ServiceRef::Custom {
118            registry: metadata.registry.clone(),
119            service: service_name.to_string(),
120        }
121    }
122}
123
124/// The unit's MainPID per systemd, or `None` when the service is stopped
125/// (MainPID 0) or systemd can't be queried.
126fn unit_main_pid(service_name: &str) -> Option<u32> {
127    let out = std::process::Command::new("systemctl")
128        .args([
129            "--user",
130            "show",
131            &format!("{service_name}.service"),
132            "-p",
133            "MainPID",
134            "--value",
135        ])
136        .output()
137        .ok()?;
138    if !out.status.success() {
139        return None;
140    }
141    let pid: u32 = String::from_utf8_lossy(&out.stdout).trim().parse().ok()?;
142    (pid != 0).then_some(pid)
143}
144
145/// Wall-clock start time of `pid`, from `/proc/<pid>/stat` field 22 (starttime,
146/// in clock ticks since boot) plus `/proc/stat`'s `btime` (boot epoch). `None`
147/// if the process is gone or `/proc` can't be read.
148fn process_start_time(pid: u32) -> Option<SystemTime> {
149    // USER_HZ: the kernel's /proc clock-tick rate. Fixed at 100 on every
150    // mainstream Linux (the value is baked into the ABI, not the runtime CPU
151    // tick), so hardcoding it avoids a libc/sysconf dependency.
152    const USER_HZ: u64 = 100;
153
154    let stat = std::fs::read_to_string(format!("/proc/{pid}/stat")).ok()?;
155    // comm (field 2) is parenthesised and may itself contain spaces or `)`, so
156    // the numeric fields resume only after the LAST `)`. field 3 (state) is the
157    // first token there, making starttime (field 22) the 20th -> index 19.
158    let after_comm = stat.rsplit_once(')')?.1;
159    let starttime_ticks: u64 = after_comm.split_whitespace().nth(19)?.parse().ok()?;
160
161    let proc_stat = std::fs::read_to_string("/proc/stat").ok()?;
162    let btime: u64 = proc_stat
163        .lines()
164        .find_map(|l| l.strip_prefix("btime ")?.trim().parse().ok())?;
165
166    Some(std::time::UNIX_EPOCH + std::time::Duration::from_secs(btime + starttime_ticks / USER_HZ))
167}
168
169/// Per-file diff classification.
170#[derive(Debug, Clone, PartialEq, Eq)]
171pub enum DiffKind {
172    /// On-disk content matches the planned render. Nothing to do.
173    Unchanged,
174    /// Registry now renders different content. On-disk hash still matches
175    /// the manifest, so the file is ryra-owned and safe to overwrite.
176    Modified,
177    /// On-disk hash differs from both the manifest and the planned render —
178    /// the user hand-edited this file. Upgrade refuses without `--force`.
179    /// Includes the case where there is no manifest entry to compare against
180    /// (service installed before the manifest feature; treated conservatively
181    /// as drift until the user confirms with `--force`).
182    Drift,
183    /// File is in the planned render but absent from the manifest — registry
184    /// added it.
185    Added,
186    /// File is in the manifest but no longer rendered by the registry —
187    /// registry stopped shipping it. Upgrade deletes it.
188    Removed,
189}
190
191#[derive(Debug, Clone)]
192pub struct DiffEntry {
193    pub path: PathBuf,
194    pub kind: DiffKind,
195}
196
197/// One env var the registry expects in `.env` that the user's `.env`
198/// doesn't have. By design env tracking is *append-only* — we never flag
199/// a present-but-different value as drift, and we never propose
200/// removing a key. Users may have manually edited values or added their
201/// own keys; clobbering those would be the larger harm.
202///
203/// `kind` and `prompt` come straight from the registry's `EnvVar`
204/// definition, so the CLI can route Prompted / Required additions
205/// through the same interactive prompt that `ryra add` uses, while
206/// silently appending Default ones.
207#[derive(Debug, Clone)]
208pub struct EnvAddition {
209    pub key: String,
210    pub value: String,
211    pub kind: crate::registry::service_def::EnvKind,
212    pub prompt: Option<String>,
213}
214
215/// Result of comparing the registry's render to what's on disk.
216#[derive(Debug, Clone)]
217pub struct DiffResult {
218    pub service: String,
219    pub entries: Vec<DiffEntry>,
220    /// Static env vars the registry expects but the user's `.env` is
221    /// missing. Empty when the `.env` already covers everything tracked.
222    pub env_additions: Vec<EnvAddition>,
223    /// `runtime = "native"` only: the source changed since the running process
224    /// started, so a rebuild/restart would ship new code even though the
225    /// rendered config is unchanged. Always `false` for podman services and
226    /// stopped natives. Orthogonal to [`Self::is_clean`] (which is config-only)
227    /// -- a service is upgradable when the diff is dirty *or* this is set.
228    pub source_stale: bool,
229}
230
231impl DiffResult {
232    /// True when nothing about the install would change — neither files
233    /// nor env vars.
234    pub fn is_clean(&self) -> bool {
235        self.entries
236            .iter()
237            .all(|e| matches!(e.kind, DiffKind::Unchanged))
238            && self.env_additions.is_empty()
239    }
240
241    /// Files the user hand-edited. Upgrade must refuse to overwrite these
242    /// without `--force`.
243    pub fn drifted(&self) -> Vec<&DiffEntry> {
244        self.entries
245            .iter()
246            .filter(|e| matches!(e.kind, DiffKind::Drift))
247            .collect()
248    }
249}
250
251/// Reconstruct the planning inputs we stashed at install time and feed them
252/// back through `add_service` in upgrade mode. Returns the planned step
253/// list and the planned-file content map (path → content). The richer
254/// per-env metadata lives on `AddResult.tracked_envs`.
255async fn replan(service_name: &str) -> Result<Replanned> {
256    if !is_service_installed(service_name) {
257        return Err(Error::ServiceNotInstalled(service_name.to_string()));
258    }
259    let metadata = load_metadata(service_name)?
260        .ok_or_else(|| Error::ServiceNotInstalled(service_name.to_string()))?;
261
262    let exposure = match metadata.url.as_deref() {
263        Some(url) => Exposure::from_url(url),
264        None => Exposure::Loopback,
265    };
266
267    let service_ref = service_ref_for(&metadata, service_name);
268    let repo_dir = resolve_registry_dir(&service_ref).await?;
269    // The service's own dir under the resolved registry (where a native build/
270    // run happens). Surfaced so callers — the source-staleness check below —
271    // reuse this single resolution instead of resolving again.
272    let source_dir = crate::registry::find_service(&repo_dir, service_name)?.service_dir;
273    let native = matches!(metadata.runtime, Runtime::Native);
274
275    // Recover existing host ports from the install's `.env` so the
276    // re-render lands on the same numbers. Without this every dynamically
277    // allocated port shifts because `port_in_use` reports them taken.
278    let port_overrides = read_existing_ports(service_name)?;
279
280    // Trivial port-in-use closure: the upgrade caller pins every port via
281    // `port_overrides`, so the closure is never consulted. Returning false
282    // unconditionally is safe — no allocation runs.
283    let port_in_use = |_p: u16| false;
284
285    let enabled_groups: BTreeSet<String> = metadata.enabled_groups.iter().cloned().collect();
286    let selected_choices = metadata.selected_choices.clone();
287    // Recover the install's existing `.env` values so a re-render reuses what's
288    // already configured instead of re-demanding it. A required choice/group
289    // member (e.g. an `external` database's `DATABASE_URL`) is provided once at
290    // install and lives in the `.env`; without seeding it here the render treats
291    // it as "no value" and the upgrade/diff errors on a service that's running
292    // fine. Same rationale as `port_overrides` above: upgrade re-renders against
293    // the existing install, it doesn't re-ask for what's already set.
294    let env_overrides = read_existing_env_keys(service_name)?;
295    let result = add_service(crate::AddServiceParams {
296        service_name,
297        exposure: &exposure,
298        auth: match metadata.auth.clone() {
299            Some(kind) => crate::AuthChoice::Native(kind),
300            None => crate::AuthChoice::None,
301        },
302        // SMTP and backup enablement are per-install state — persisted by
303        // `ryra add` and `ryra config`. Upgrade preserves whatever the
304        // user picked.
305        enable_smtp: metadata.smtp_enabled,
306        enable_backup: metadata.backup_enabled,
307        env_overrides: &env_overrides,
308        enabled_groups: &enabled_groups,
309        selected_choices: &selected_choices,
310        registry_name: &metadata.registry,
311        repo_dir: &repo_dir,
312        pre_built_ctx: None,
313        port_in_use: &port_in_use,
314        // ACME mode is only consumed when adding the reverse proxy itself;
315        // upgrade never needs to seed the TLS snippet.
316        acme_mode: None,
317        mode: PlanMode::Upgrade,
318        port_overrides: &port_overrides,
319        // Upgrade preserves the on-disk `.env` via the append-only env_additions
320        // path (it skips this plan's `.env` WriteFile), so it doesn't seed the
321        // merge here; and a re-render never relaxes required-var validation.
322        existing_env_file: None,
323        allow_unset_required: false,
324    })?;
325
326    let mut planned: BTreeMap<PathBuf, String> = BTreeMap::new();
327    for step in &result.steps {
328        if let Step::WriteFile(file) = step {
329            planned.insert(file.path.clone(), file.content.clone());
330        }
331    }
332    Ok(Replanned {
333        result,
334        planned,
335        source_dir,
336        native,
337    })
338}
339
340/// Output of [`replan`]: the re-rendered plan plus the resolved source
341/// location, so callers don't resolve the registry a second time.
342struct Replanned {
343    result: AddResult,
344    planned: BTreeMap<PathBuf, String>,
345    /// The service's source dir (where a native build/run happens).
346    source_dir: PathBuf,
347    /// Whether this is a `runtime = "native"` install.
348    native: bool,
349}
350
351/// Parse the on-disk `.env` for a service into a key→value map. Lines
352/// without `=`, comments, and blanks are skipped. Returns an empty map if
353/// the file is absent — caller decides whether that's a soft error.
354fn read_existing_env_keys(service_name: &str) -> Result<BTreeMap<String, String>> {
355    let env_path = service_home(service_name)?.join(".env");
356    let mut out: BTreeMap<String, String> = BTreeMap::new();
357    let content = match std::fs::read_to_string(&env_path) {
358        Ok(c) => c,
359        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(out),
360        Err(source) => {
361            return Err(Error::FileRead {
362                path: env_path,
363                source,
364            });
365        }
366    };
367    for line in content.lines() {
368        let line = line.trim();
369        if line.is_empty() || line.starts_with('#') {
370            continue;
371        }
372        if let Some((k, v)) = line.split_once('=') {
373            out.insert(k.trim().to_string(), v.to_string());
374        }
375    }
376    Ok(out)
377}
378
379/// Parse `SERVICE_PORT_<NAME>=<port>` lines out of an installed service's
380/// `.env`. Returns a name → port map (lowercased name, matching the
381/// `[[ports]]` definition in service.toml). Also used by the metrics
382/// bridge to resolve host-network scrape targets retroactively.
383pub(crate) fn read_existing_ports(service_name: &str) -> Result<BTreeMap<String, u16>> {
384    let env_path = service_home(service_name)?.join(".env");
385    let mut overrides = BTreeMap::new();
386    let content = match std::fs::read_to_string(&env_path) {
387        Ok(c) => c,
388        // No .env yet means a half-installed service; let the planner
389        // re-allocate. (`add_service` will then surface a richer error if
390        // the install is genuinely broken.)
391        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(overrides),
392        Err(source) => {
393            return Err(Error::FileRead {
394                path: env_path,
395                source,
396            });
397        }
398    };
399    for line in content.lines() {
400        let line = line.trim();
401        if line.is_empty() || line.starts_with('#') {
402            continue;
403        }
404        let Some((key, value)) = line.split_once('=') else {
405            continue;
406        };
407        let Some(name) = key.strip_prefix("SERVICE_PORT_") else {
408            continue;
409        };
410        if let Ok(port) = value.trim().parse::<u16>() {
411            overrides.insert(name.to_ascii_lowercase(), port);
412        }
413    }
414    Ok(overrides)
415}
416
417/// Files we never want to flag as drift. The `.env` carries generated secrets
418/// that rotate at runtime; `service.manifest` itself is the manifest, not a
419/// tracked file; the auth bridge's CA bundle and `/etc/hosts` overlay are
420/// rewritten by an `ExecStartPre` hook on every start, so their on-disk bytes
421/// never match ryra's seed. All are excluded from the planned set during
422/// diffing so they don't appear as Removed/Added/Drift.
423fn should_skip_path(path: &std::path::Path, manifest_file: &std::path::Path) -> bool {
424    if path == manifest_file {
425        return true;
426    }
427    if crate::auth_bridge::is_hook_rewritten(path) {
428        return true;
429    }
430    matches!(path.file_name().and_then(|n| n.to_str()), Some(".env"))
431}
432
433/// Compute the diff between the registry's render and what's on disk for an
434/// installed service.
435pub async fn diff_service(service_name: &str) -> Result<DiffResult> {
436    let Replanned {
437        result,
438        planned,
439        source_dir,
440        native,
441    } = replan(service_name).await?;
442
443    // Native source-staleness rides along with the diff (same resolution, no
444    // second registry lookup): has any source file changed since the running
445    // process started? See the module note above on why this is the signal.
446    let source_stale = native
447        && unit_main_pid(service_name)
448            .and_then(process_start_time)
449            .is_some_and(|started| any_file_newer_than(&source_dir, started));
450
451    let manifest_file = manifest::manifest_path(service_name)?;
452    let (manifest_entries, _manifest_envs) = manifest::load(service_name)?.unwrap_or_default();
453    let manifest_by_path: BTreeMap<PathBuf, String> = manifest_entries
454        .into_iter()
455        .map(|e| (e.path, e.sha256))
456        .collect();
457
458    // Env additions: registry-expected static keys missing from the user's
459    // `.env`. Append-only — we ignore present-but-different values
460    // (could be a manual override) and never propose removals (could be
461    // a key the user added themselves that the registry happens not to
462    // ship). The registry-side list comes from the freshly-rendered
463    // `tracked_envs` (which carries kind + prompt for the CLI), not the
464    // on-disk manifest — that's the source of truth.
465    let existing_env = read_existing_env_keys(service_name)?;
466    let env_additions: Vec<EnvAddition> = result
467        .tracked_envs
468        .iter()
469        .filter(|p| !existing_env.contains_key(&p.key))
470        .map(|p| EnvAddition {
471            key: p.key.clone(),
472            value: p.value.clone(),
473            kind: p.kind.clone(),
474            prompt: p.prompt.clone(),
475        })
476        .collect();
477
478    let mut entries: Vec<DiffEntry> = Vec::new();
479    let mut seen: BTreeSet<PathBuf> = BTreeSet::new();
480
481    // Walk planned files first — Added / Modified / Drift / Unchanged.
482    for (path, content) in &planned {
483        if should_skip_path(path, &manifest_file) {
484            continue;
485        }
486        seen.insert(path.clone());
487        let planned_hash = manifest::hash_bytes(content.as_bytes());
488        let on_disk_hash = if path.exists() {
489            Some(manifest::hash_file(path)?)
490        } else {
491            None
492        };
493        let manifest_hash = manifest_by_path.get(path);
494
495        let kind = match (on_disk_hash.as_deref(), manifest_hash.map(String::as_str)) {
496            // File doesn't exist on disk.
497            (None, Some(_)) | (None, None) => match manifest_hash {
498                Some(_) => DiffKind::Modified, // we wrote it, user deleted it; restore
499                None => DiffKind::Added,       // registry adds it, fresh write
500            },
501            // On-disk content already matches what the registry would render.
502            (Some(d), _) if d == planned_hash => DiffKind::Unchanged,
503            // No manifest entry → can't tell if the user touched it.
504            // Conservative: treat as drift so --force is required once.
505            (Some(_), None) => DiffKind::Drift,
506            // On-disk matches the manifest but not the planned render →
507            // ryra-owned, safe to overwrite.
508            (Some(d), Some(l)) if d == l => DiffKind::Modified,
509            // On-disk matches neither lock nor plan → user hand-edited.
510            (Some(_), Some(_)) => DiffKind::Drift,
511        };
512        entries.push(DiffEntry {
513            path: path.clone(),
514            kind,
515        });
516    }
517
518    // Walk manifest entries that the planner no longer emits — Removed.
519    for path in manifest_by_path.keys() {
520        if seen.contains(path) {
521            continue;
522        }
523        if should_skip_path(path, &manifest_file) {
524            continue;
525        }
526        entries.push(DiffEntry {
527            path: path.clone(),
528            kind: DiffKind::Removed,
529        });
530    }
531
532    entries.sort_by(|a, b| a.path.cmp(&b.path));
533    Ok(DiffResult {
534        service: service_name.to_string(),
535        entries,
536        env_additions,
537        source_stale,
538    })
539}
540
541/// Plan a zero-downtime color swap for a `deploy = "blue-green"` install.
542///
543/// Returns `None` when the service isn't blue/green, so [`upgrade_service`] can
544/// fall through to its normal restart-based flow. Otherwise the plan:
545///   1. re-renders both color quadlets/units + reloads systemd (so the idle
546///      slot picks up any new image tag or config), keeping `.env` untouched;
547///   2. starts the *idle* slot and gates on its health endpoint;
548///   3. repoints the Caddy upstream at the idle slot and reloads gracefully;
549///   4. stops the old slot and flips `active_color` in metadata.
550///
551/// A health-gate timeout aborts before step 3, leaving the old slot live and
552/// routed — a failed deploy is a no-op, never an outage.
553pub async fn blue_green_swap(service_name: &str) -> Result<Option<UpgradeResult>> {
554    if !is_service_installed(service_name) {
555        return Err(Error::ServiceNotInstalled(service_name.to_string()));
556    }
557    let metadata = load_metadata(service_name)?
558        .ok_or_else(|| Error::ServiceNotInstalled(service_name.to_string()))?;
559
560    // Resolve the registry def to read the deploy strategy + health path.
561    let service_ref = service_ref_for(&metadata, service_name);
562    let repo_dir = resolve_registry_dir(&service_ref).await?;
563    let reg = crate::registry::find_service(&repo_dir, service_name)?;
564    let def = &reg.def;
565    if def.service.deploy != DeployStrategy::BlueGreen {
566        return Ok(None);
567    }
568    let health_check = def.service.health_check.clone().ok_or_else(|| {
569        Error::Template(format!(
570            "{service_name}: deploy = \"blue-green\" but no health_check — validation should have caught this"
571        ))
572    })?;
573
574    // Which slot is live, and which we're rolling onto.
575    let live = metadata.active_color.unwrap_or(Color::Blue);
576    let target = live.other();
577
578    // The idle slot's host port, from the install's `.env`
579    // (`SERVICE_PORT_HTTP_GREEN` etc., written by the blue/green add path).
580    let primary_port_name = def
581        .ports
582        .iter()
583        .find(|p| p.name.eq_ignore_ascii_case("http"))
584        .or_else(|| def.ports.first())
585        .map(|p| p.name.clone())
586        .ok_or_else(|| {
587            Error::Template(format!("{service_name}: blue/green needs a routable port"))
588        })?;
589    let existing_ports = read_existing_ports(service_name)?;
590    let target_key = format!("{}_{}", primary_port_name.to_ascii_lowercase(), target);
591    let target_port = existing_ports.get(&target_key).copied().ok_or_else(|| {
592        Error::Template(format!(
593            "{service_name}: missing {} in .env — reinstall to allocate the blue/green port pair",
594            deploy::color_port_var(
595                &format!("SERVICE_PORT_{}", primary_port_name.to_uppercase()),
596                target
597            )
598        ))
599    })?;
600    let health_url = format!("http://127.0.0.1:{target_port}{health_check}");
601
602    // Re-render the install (Upgrade mode): emits both color quadlets/units and
603    // pulls any new image. Keep those file writes + pulls + daemon-reload, but
604    // drop the add path's StartService/StopService (we orchestrate the swap
605    // ourselves), its `.env` write (preserve secrets), and its metadata write
606    // (we flip active_color below instead of resetting it to blue).
607    let replanned = replan(service_name).await?;
608    let env_filename = std::ffi::OsStr::new(".env");
609    let metadata_file = metadata_path(service_name)?;
610    // Never re-sync or rebuild the LIVE slot's working dir — that's the whole
611    // point of the isolation (an in-flight Python/Node process must not have its
612    // source mutated). Drop any SyncDir/Build that targets `colors/<live>`;
613    // keep the idle slot's. (Podman has no such steps — it re-pulls the image,
614    // which is harmless — so this is a native-only filter in practice.)
615    let live_slot = format!("colors/{live}");
616    let touches_live = |p: &std::path::Path| p.to_string_lossy().contains(&live_slot);
617    let mut steps: Vec<Step> = Vec::new();
618    for step in replanned.result.steps {
619        match step {
620            Step::StartService { .. } | Step::StopService { .. } => continue,
621            Step::WriteFile(GeneratedFile { ref path, .. })
622                if path.file_name() == Some(env_filename) || *path == metadata_file =>
623            {
624                continue;
625            }
626            Step::SyncDir { ref dst, .. } if touches_live(dst) => continue,
627            Step::Build { ref dir, .. } if touches_live(dir) => continue,
628            other => steps.push(other),
629        }
630    }
631
632    // Caddy: repoint the upstream at the idle slot. Only when the install has a
633    // routed URL and a Caddyfile exists (loopback installs swap without it).
634    let caddy_rewrite =
635        blue_green_caddy_rewrite(service_name, def, &metadata, target, target_port)?;
636
637    // The runtime-agnostic swap: start idle -> health-gate -> caddy reload ->
638    // stop old. Artifact prep (pull/build) already rode along in `steps` above.
639    steps.extend(deploy::color_swap_steps(deploy::ColorSwap {
640        service_name: service_name.to_string(),
641        live,
642        prepare: None,
643        health_url,
644        health_timeout_secs: def.service.health_timeout_secs(),
645        caddy_rewrite,
646    }));
647
648    // Flip active_color so the next deploy rolls back onto `live`.
649    let mut new_metadata = metadata.clone();
650    new_metadata.active_color = Some(target);
651    steps.push(Step::WriteFile(GeneratedFile {
652        path: metadata_file,
653        content: toml::to_string_pretty(&new_metadata)?,
654    }));
655
656    Ok(Some(UpgradeResult {
657        service: service_name.to_string(),
658        diff: diff_service(service_name).await?,
659        steps,
660        backup_dir: None,
661        planned_files: replanned.planned,
662        // A swap isn't visible as config drift (the new image/build lives behind
663        // the same quadlet), so force the apply just like the native rebuild path.
664        force_apply: true,
665    }))
666}
667
668/// Re-render the Caddy site block pointing at the idle color and splice it into
669/// the existing Caddyfile. `None` when the install has no routed URL or no
670/// Caddyfile on disk (a loopback blue/green install swaps without Caddy).
671fn blue_green_caddy_rewrite(
672    service_name: &str,
673    def: &crate::registry::service_def::ServiceDef,
674    metadata: &Metadata,
675    target: Color,
676    target_port: u16,
677) -> Result<Option<Step>> {
678    let Some(url) = metadata.url.as_deref() else {
679        return Ok(None);
680    };
681    let caddyfile_path = caddy::caddyfile_path()?;
682    let Ok(existing) = std::fs::read_to_string(&caddyfile_path) else {
683        return Ok(None);
684    };
685    let parsed = url::Url::parse(url)
686        .map_err(|e| Error::Template(format!("invalid service URL '{url}': {e}")))?;
687    let domain = parsed
688        .host_str()
689        .ok_or_else(|| Error::Template(format!("service URL '{url}' has no host")))?;
690    let paths = crate::config::ConfigPaths::resolve()?;
691    let config = crate::config::load_or_default(&paths.config_file)?;
692    // Podman slots are containers on Caddy's shared network, reachable by name
693    // (`<svc>-<color>:<container_port>`). Native slots are host processes, so
694    // Caddy reaches them over the host bridge at the color's *host* port.
695    let (target_host, port) = match metadata.runtime {
696        Runtime::Podman => (
697            deploy::color_unit(service_name, target),
698            def.ports.first().map(|p| p.container_port).unwrap_or(80),
699        ),
700        Runtime::Native => ("host.containers.internal".to_string(), target_port),
701    };
702    let block = caddy::render_site_block(&caddy::CaddySiteParams {
703        service_name: service_name.to_string(),
704        target_host,
705        domain: domain.to_string(),
706        container_port: port,
707        https_port: crate::caddy_https_port(&config),
708        force_internal_tls: false,
709    });
710    let updated = caddy::add_route(&existing, service_name, &block);
711    Ok(Some(Step::WriteFile(GeneratedFile {
712        path: caddyfile_path,
713        content: updated,
714    })))
715}
716
717/// Plan an upgrade for an installed service.
718///
719/// Returns the steps to execute and the backup directory where displaced
720/// files will be copied. The backup dir is *also* baked into the steps
721/// (as `Step::CopyFile` entries placed before each `Step::WriteFile`).
722pub async fn upgrade_service(service_name: &str, force: bool) -> Result<UpgradeResult> {
723    // Blue/green services upgrade by a color swap, not an in-place restart, so
724    // they take a different plan entirely. `blue_green_swap` returns None for
725    // restart-strategy installs, falling through to the standard flow below.
726    if let Some(plan) = blue_green_swap(service_name).await? {
727        return Ok(plan);
728    }
729
730    let diff = diff_service(service_name).await?;
731
732    if !force {
733        let drifted = diff.drifted();
734        if !drifted.is_empty() {
735            return Err(Error::HandEditedFiles {
736                service: service_name.to_string(),
737                paths: drifted.iter().map(|e| e.path.clone()).collect(),
738            });
739        }
740    }
741
742    let Replanned {
743        result, planned, ..
744    } = replan(service_name).await?;
745    let manifest_file = manifest::manifest_path(service_name)?;
746    let env_file = service_home(service_name)?.join(".env");
747
748    // Hard-fail if `.env` is missing. Append-only env handling can't
749    // reconstruct generated secrets (mysql_root_password, jwt_key, etc.)
750    // and would silently produce a half-written file that fails on
751    // restart. Surface the real problem instead.
752    if !env_file.exists() {
753        return Err(Error::Template(format!(
754            "{service_name}: `.env` is missing at {} — upgrade can't reconstruct generated secrets. \
755             Restore the file from a backup or reinstall the service.",
756            env_file.display()
757        )));
758    }
759
760    // Decide the backup directory once per upgrade run. Used whenever any
761    // file would be overwritten *or* the existing service.manifest exists (the
762    // lock is always backed up so `ryra revert` can reconstruct the
763    // pre-upgrade state). Empty when neither holds — keeps
764    // `~/.local/state/ryra/` from accumulating no-op dirs.
765    let backup_dir = backup_directory(service_name)?;
766    let needs_backup: BTreeSet<PathBuf> = diff
767        .entries
768        .iter()
769        .filter(|e| {
770            matches!(
771                e.kind,
772                DiffKind::Modified | DiffKind::Drift | DiffKind::Removed
773            )
774        })
775        .map(|e| e.path.clone())
776        .collect();
777    let manifest_will_be_backed_up = manifest_file.exists();
778    let backup_used = !needs_backup.is_empty() || manifest_will_be_backed_up;
779
780    // Filter the planned step list down to what an upgrade should actually do.
781    // - WriteFile for `.env` is dropped (preserve secrets).
782    // - PullImage stays (idempotent if cached, fetches new tag if registry bumped).
783    // - StartService is replaced with RestartService at the very end.
784    // - CreateDir / Symlink stay (idempotent and may be needed for new files).
785    // - DaemonReload stays.
786    // - CopyFile stays (vendored binaries; rare to upgrade but handled the same).
787    // - TailscaleSetup / TailscaleEnable were already gated out by PlanMode::Upgrade.
788    let mut steps: Vec<Step> = Vec::new();
789    if backup_used {
790        steps.push(Step::CreateDir(backup_dir.clone()));
791    }
792    let unchanged: BTreeSet<PathBuf> = diff
793        .entries
794        .iter()
795        .filter(|e| matches!(e.kind, DiffKind::Unchanged))
796        .map(|e| e.path.clone())
797        .collect();
798
799    let env_filename = std::ffi::OsStr::new(".env");
800    for step in result.steps {
801        match step {
802            // .env stays untouched on upgrade — generated secrets in the
803            // running service must not be regenerated.
804            Step::WriteFile(GeneratedFile { ref path, .. })
805                if path.file_name() == Some(env_filename) =>
806            {
807                continue;
808            }
809            // Identical content already on disk — skip the write entirely
810            // so the file's mtime stays put and `sha256sum -c` stays clean
811            // for unchanged entries.
812            Step::WriteFile(GeneratedFile { ref path, .. }) if unchanged.contains(path) => {
813                // The manifest is special: even if "unchanged" by content, we
814                // re-emit it because path-level adds/removes mean its content
815                // has changed and we need the new hashes recorded.
816                if path == &manifest_file {
817                    steps.push(step);
818                }
819                continue;
820            }
821            Step::WriteFile(ref file) => {
822                // Always back up the existing service.manifest too, even though
823                // it's filtered out of the diff. `ryra revert` reads the
824                // backed-up lock to know which files were Added during the
825                // upgrade (current lock − pre-upgrade lock) so it can delete
826                // them on revert. Without this, revert would leave
827                // upgrade-added files orphaned.
828                let should_backup = (needs_backup.contains(&file.path)
829                    || file.path == manifest_file)
830                    && file.path.exists();
831                if should_backup {
832                    let rel = backup_relpath(&file.path);
833                    let dst = backup_dir.join(rel);
834                    if let Some(parent) = dst.parent() {
835                        steps.push(Step::CreateDir(parent.to_path_buf()));
836                    }
837                    steps.push(Step::CopyFile {
838                        src: file.path.clone(),
839                        dst,
840                    });
841                }
842                steps.push(step);
843            }
844            // The replanned step list always ends with StartService; we
845            // strip it and append a RestartService at the very end so the
846            // unit picks up the new quadlet.
847            Step::StartService { .. } => continue,
848            other => steps.push(other),
849        }
850    }
851
852    // Removed files: back them up then delete.
853    for entry in &diff.entries {
854        if !matches!(entry.kind, DiffKind::Removed) {
855            continue;
856        }
857        if entry.path.exists() {
858            let rel = backup_relpath(&entry.path);
859            let dst = backup_dir.join(rel);
860            if let Some(parent) = dst.parent() {
861                steps.push(Step::CreateDir(parent.to_path_buf()));
862            }
863            steps.push(Step::CopyFile {
864                src: entry.path.clone(),
865                dst,
866            });
867        }
868        steps.push(Step::RemoveFile(entry.path.clone()));
869    }
870
871    // Env additions: append registry-required static env vars that the
872    // user's .env doesn't have. Append-only — we never rewrite the
873    // existing .env (that would clobber rotated secrets and any manual
874    // edits) and we never remove keys (the user might have added their
875    // own that the registry happens not to ship). The .env is
876    // intentionally NOT backed up: it only ever gains lines and the
877    // pre-existing content survives unchanged.
878    if !diff.env_additions.is_empty() {
879        let mut content = match std::fs::read_to_string(&env_file) {
880            Ok(c) => c,
881            // Service installed but .env missing? Treat the add as a
882            // fresh write — odd state, but the right one to recover to.
883            Err(e) if e.kind() == std::io::ErrorKind::NotFound => String::new(),
884            Err(source) => {
885                return Err(Error::FileRead {
886                    path: env_file.clone(),
887                    source,
888                });
889            }
890        };
891        if !content.is_empty() && !content.ends_with('\n') {
892            content.push('\n');
893        }
894        for add in &diff.env_additions {
895            content.push_str(&format!("{}={}\n", add.key, add.value));
896        }
897        steps.push(Step::WriteFile(GeneratedFile {
898            path: env_file,
899            content,
900        }));
901    }
902
903    // Pick up the new quadlet by restarting. RestartService is enough to
904    // re-read the env file, re-run ExecStartPre/Post, and pull in any new
905    // ExecStartPost script (the seafile case).
906    steps.push(Step::RestartService {
907        unit: service_name.to_string(),
908    });
909
910    // Native services rebuild from source on upgrade (the `Build` step) and
911    // restart. A source change leaves the rendered config clean, so force the
912    // apply; otherwise the CLI would short-circuit on the clean diff and never
913    // rebuild. The plan already ends in RestartService.
914    let force_apply = matches!(
915        crate::metadata::load_metadata(service_name),
916        Ok(Some(m)) if m.runtime == crate::registry::service_def::Runtime::Native
917    );
918
919    Ok(UpgradeResult {
920        service: service_name.to_string(),
921        diff,
922        steps,
923        backup_dir: if backup_used { Some(backup_dir) } else { None },
924        // The replanned env content is irrelevant for upgrade (we don't
925        // write it), but expose the template-render context bag in case
926        // future callers need it. Keep it empty for now to avoid
927        // confusing consumers.
928        planned_files: planned,
929        force_apply,
930    })
931}
932
933pub struct UpgradeResult {
934    pub service: String,
935    pub diff: DiffResult,
936    pub steps: Vec<Step>,
937    /// `None` when no files would be overwritten or removed.
938    pub backup_dir: Option<PathBuf>,
939    pub planned_files: BTreeMap<PathBuf, String>,
940    /// Apply even when the config diff is clean. True for native services: a
941    /// source rebuild isn't visible in the rendered config, so the plan must
942    /// still run (the `SyncBinary` step then no-ops if the binary is unchanged).
943    pub force_apply: bool,
944}
945
946/// One available backup snapshot for a service.
947#[derive(Debug, Clone)]
948pub struct BackupSnapshot {
949    /// Filesystem path: `~/.local/state/ryra/backups/<timestamp>/<service>/`.
950    pub path: PathBuf,
951    /// `YYYY-MM-DDTHH-MM-SSZ` timestamp from the parent dir name.
952    pub timestamp: String,
953}
954
955pub struct RevertResult {
956    pub service: String,
957    pub snapshot: BackupSnapshot,
958    pub steps: Vec<Step>,
959    /// Files to be copied from backup back to their original locations.
960    pub files_to_restore: Vec<PathBuf>,
961    /// Files added by the upgrade that didn't exist before — will be
962    /// removed by revert. Empty when the snapshot pre-dates the manifest
963    /// feature (we can't reconstruct what was added without it).
964    pub files_to_delete: Vec<PathBuf>,
965}
966
967/// List every backup snapshot for a service, newest first. Empty result
968/// means there's nothing to revert from.
969/// How many backup snapshots `ryra upgrade` retains per service before
970/// auto-pruning. Each snapshot is small (~tens of KB — config files +
971/// the manifest) so the cap is more about mental clutter than disk; 5
972/// is enough to revert a few iterations back without filling the
973/// `~/.local/state/ryra/backups/` tree with dead snapshots from years
974/// of upgrades.
975pub const DEFAULT_BACKUP_KEEP: usize = 5;
976
977/// Drop snapshots older than the most recent `keep` for this service.
978/// Returns the paths that were removed (newest-first within the
979/// removed set; the kept set keeps the same order). The shared
980/// timestamp dir is also removed when this was the last service-
981/// scoped subdir under it (multi-service upgrade runs share a
982/// timestamp dir; we don't want to nuke other services' state).
983pub fn prune_backups(service_name: &str, keep: usize) -> Result<Vec<PathBuf>> {
984    let backups_root = state_dir()?.join("backups");
985    prune_backups_in(&backups_root, service_name, keep)
986}
987
988/// Pure inner that operates on an explicit `<state>/backups/` root.
989/// Split out so tests can drive it against a tmp tree without touching
990/// the real XDG state dir.
991fn prune_backups_in(
992    backups_root: &std::path::Path,
993    service_name: &str,
994    keep: usize,
995) -> Result<Vec<PathBuf>> {
996    let snapshots = list_backups_in(backups_root, service_name)?;
997    if snapshots.len() <= keep {
998        return Ok(Vec::new());
999    }
1000    let mut removed: Vec<PathBuf> = Vec::new();
1001    for snap in snapshots.into_iter().skip(keep) {
1002        if let Err(e) = std::fs::remove_dir_all(&snap.path) {
1003            eprintln!(
1004                "warning: failed to prune backup {}: {e}",
1005                snap.path.display()
1006            );
1007            continue;
1008        }
1009        removed.push(snap.path.clone());
1010        if let Some(parent) = snap.path.parent()
1011            && let Ok(mut entries) = std::fs::read_dir(parent)
1012            && entries.next().is_none()
1013        {
1014            let _ = std::fs::remove_dir(parent);
1015        }
1016    }
1017    Ok(removed)
1018}
1019
1020pub fn list_backups(service_name: &str) -> Result<Vec<BackupSnapshot>> {
1021    let backups_root = state_dir()?.join("backups");
1022    list_backups_in(&backups_root, service_name)
1023}
1024
1025fn list_backups_in(
1026    backups_root: &std::path::Path,
1027    service_name: &str,
1028) -> Result<Vec<BackupSnapshot>> {
1029    if !backups_root.is_dir() {
1030        return Ok(Vec::new());
1031    }
1032    let mut snapshots: Vec<BackupSnapshot> = Vec::new();
1033    let entries = std::fs::read_dir(backups_root).map_err(|source| Error::FileRead {
1034        path: backups_root.to_path_buf(),
1035        source,
1036    })?;
1037    for entry in entries.flatten() {
1038        let stamp_dir = entry.path();
1039        if !stamp_dir.is_dir() {
1040            continue;
1041        }
1042        let svc_dir = stamp_dir.join(service_name);
1043        if !svc_dir.is_dir() {
1044            continue;
1045        }
1046        let Some(stamp) = stamp_dir.file_name().and_then(|n| n.to_str()) else {
1047            continue;
1048        };
1049        snapshots.push(BackupSnapshot {
1050            path: svc_dir,
1051            timestamp: stamp.to_string(),
1052        });
1053    }
1054    // Newest first: timestamp is `YYYY-MM-DDTHH-MM-SSZ`, lexical-descending == reverse-chronological.
1055    snapshots.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
1056    Ok(snapshots)
1057}
1058
1059/// Plan a revert for an installed service.
1060///
1061/// `at` selects a specific backup timestamp; `None` picks the most recent.
1062/// The returned plan: restore every file from the backup tree to its
1063/// original location, delete files added by the upgrade, daemon-reload,
1064/// restart the unit.
1065pub fn revert_service(service_name: &str, at: Option<&str>) -> Result<RevertResult> {
1066    if !is_service_installed(service_name) {
1067        return Err(Error::ServiceNotInstalled(service_name.to_string()));
1068    }
1069    let snapshot = pick_snapshot(service_name, at)?;
1070
1071    // Files to restore: walk the backup tree and reconstruct the original
1072    // absolute path for each one. The backup mirrors absolute paths under
1073    // `<snapshot>/<original-path-without-leading-slash>`, so the inverse is
1074    // simply prefixing `/` to each path-relative-to-snapshot.
1075    let mut files_to_restore: Vec<PathBuf> = Vec::new();
1076    walk_backup_files(&snapshot.path, &mut files_to_restore)?;
1077
1078    // Files to delete: anything in the *current* lock that isn't in the
1079    // *backed-up* lock was added by the upgrade and should disappear on
1080    // revert. If either lock is absent, leave the delete set empty —
1081    // safest no-op for snapshots that pre-date this feature.
1082    let backup_manifest_file =
1083        absolute_to_backup_path(&snapshot.path, &manifest::manifest_path(service_name)?);
1084    let (backup_manifest_entries, _) = read_manifest_at(&backup_manifest_file)?;
1085    let (current_manifest_entries, _) = manifest::load(service_name)?.unwrap_or_default();
1086
1087    let backup_manifest_set: BTreeSet<PathBuf> = backup_manifest_entries
1088        .iter()
1089        .map(|e| e.path.clone())
1090        .collect();
1091    let mut files_to_delete: Vec<PathBuf> = if backup_manifest_entries.is_empty() {
1092        // Pre-feature snapshot: no way to know what was added.
1093        Vec::new()
1094    } else {
1095        current_manifest_entries
1096            .iter()
1097            .map(|e| e.path.clone())
1098            .filter(|p| !backup_manifest_set.contains(p))
1099            .collect()
1100    };
1101    files_to_delete.sort();
1102
1103    // Build the step list.
1104    let mut steps: Vec<Step> = Vec::new();
1105    // Restore: backup → original. CopyFile creates parents itself, so no
1106    // CreateDir needed.
1107    for backup_path in &files_to_restore {
1108        let original = backup_to_absolute_path(&snapshot.path, backup_path);
1109        steps.push(Step::CopyFile {
1110            src: backup_path.clone(),
1111            dst: original,
1112        });
1113    }
1114    // Delete: each Added file, plus any orphan symlink in the quadlet dir
1115    // that pointed at it (only the actual file is in the lock; the
1116    // companion symlink in `~/.config/containers/systemd/` is not).
1117    let qd = crate::quadlet_dir()?;
1118    for path in &files_to_delete {
1119        if path.exists() {
1120            steps.push(Step::RemoveFile(path.clone()));
1121        }
1122        if let Some(name) = path.file_name() {
1123            let symlink = qd.join(name);
1124            if std::fs::symlink_metadata(&symlink).is_ok() {
1125                steps.push(Step::RemoveFile(symlink));
1126            }
1127        }
1128    }
1129    steps.push(Step::DaemonReload);
1130    steps.push(Step::RestartService {
1131        unit: service_name.to_string(),
1132    });
1133
1134    let files_to_restore_orig: Vec<PathBuf> = files_to_restore
1135        .iter()
1136        .map(|p| backup_to_absolute_path(&snapshot.path, p))
1137        .collect();
1138    Ok(RevertResult {
1139        service: service_name.to_string(),
1140        snapshot,
1141        steps,
1142        files_to_restore: files_to_restore_orig,
1143        files_to_delete,
1144    })
1145}
1146
1147/// Resolve the snapshot to revert to. `at` is a timestamp string (e.g.
1148/// `2026-05-05T13-33-50Z`); when absent, the most recent snapshot wins.
1149fn pick_snapshot(service_name: &str, at: Option<&str>) -> Result<BackupSnapshot> {
1150    let snapshots = list_backups(service_name)?;
1151    if snapshots.is_empty() {
1152        return Err(Error::NoBackup(service_name.to_string()));
1153    }
1154    match at {
1155        None => Ok(snapshots
1156            .into_iter()
1157            .next()
1158            .expect("non-empty checked above")),
1159        Some(stamp) => snapshots
1160            .into_iter()
1161            .find(|s| s.timestamp == stamp)
1162            .ok_or_else(|| Error::BackupNotFound {
1163                service: service_name.to_string(),
1164                stamp: stamp.to_string(),
1165            }),
1166    }
1167}
1168
1169/// Recursively collect every regular file under `root` into `out`. Symlinks
1170/// are followed; we don't expect any in a backup tree (we always copied
1171/// targets, never link entries).
1172fn walk_backup_files(root: &std::path::Path, out: &mut Vec<PathBuf>) -> Result<()> {
1173    let entries = std::fs::read_dir(root).map_err(|source| Error::FileRead {
1174        path: root.to_path_buf(),
1175        source,
1176    })?;
1177    for entry in entries.flatten() {
1178        let path = entry.path();
1179        let meta = match entry.metadata() {
1180            Ok(m) => m,
1181            Err(_) => continue,
1182        };
1183        if meta.is_dir() {
1184            walk_backup_files(&path, out)?;
1185        } else if meta.is_file() {
1186            out.push(path);
1187        }
1188    }
1189    Ok(())
1190}
1191
1192/// Inverse of `backup_relpath`: a backup path `<root>/home/user/foo`
1193/// maps back to `/home/user/foo`.
1194fn backup_to_absolute_path(root: &std::path::Path, backup: &std::path::Path) -> PathBuf {
1195    let rel = backup.strip_prefix(root).unwrap_or(backup);
1196    PathBuf::from("/").join(rel)
1197}
1198
1199/// Forward variant: `<root>` + `/home/user/foo` → `<root>/home/user/foo`.
1200fn absolute_to_backup_path(root: &std::path::Path, abs: &std::path::Path) -> PathBuf {
1201    let rel = abs.to_string_lossy();
1202    let stripped = rel.trim_start_matches('/');
1203    root.join(stripped)
1204}
1205
1206/// Read a manifest at the given path. Missing-file is treated as an empty
1207/// list — pre-feature backups simply have no lock to reference.
1208fn read_manifest_at(
1209    path: &std::path::Path,
1210) -> Result<(Vec<manifest::ManifestEntry>, Vec<manifest::EnvEntry>)> {
1211    if !path.exists() {
1212        return Ok((Vec::new(), Vec::new()));
1213    }
1214    let content = std::fs::read_to_string(path).map_err(|source| Error::FileRead {
1215        path: path.to_path_buf(),
1216        source,
1217    })?;
1218    manifest::parse(&content)
1219}
1220
1221/// `~/.local/state/ryra/backups/<timestamp>/<service>/`. Timestamp uses an
1222/// ISO-8601-ish form that sorts lexically (no colons — Windows-friendly,
1223/// not that it matters today, but the cost is zero).
1224fn backup_directory(service_name: &str) -> Result<PathBuf> {
1225    let state = state_dir()?;
1226    let now = std::time::SystemTime::now()
1227        .duration_since(std::time::UNIX_EPOCH)
1228        .map_err(|e| Error::Template(format!("system clock before UNIX epoch: {e}")))?
1229        .as_secs();
1230    let stamp = format_timestamp(now);
1231    Ok(state.join("backups").join(stamp).join(service_name))
1232}
1233
1234/// XDG state dir under `ryra/`. Created on demand by the CreateDir step.
1235fn state_dir() -> Result<PathBuf> {
1236    let base = dirs::state_dir()
1237        .or_else(|| dirs::home_dir().map(|h| h.join(".local").join("state")))
1238        .ok_or(Error::HomeDirNotFound)?;
1239    Ok(base.join("ryra"))
1240}
1241
1242/// Format a UNIX epoch into `YYYY-MM-DDTHH-MM-SSZ`. Avoids the chrono
1243/// dependency — we just need stable lexical sort.
1244fn format_timestamp(secs: u64) -> String {
1245    // Days from 1970-01-01.
1246    const SECS_PER_DAY: u64 = 86_400;
1247    let days = secs / SECS_PER_DAY;
1248    let time_of_day = secs % SECS_PER_DAY;
1249    let h = time_of_day / 3600;
1250    let m = (time_of_day % 3600) / 60;
1251    let s = time_of_day % 60;
1252    let (y, mo, d) = ymd_from_days(days);
1253    format!("{y:04}-{mo:02}-{d:02}T{h:02}-{m:02}-{s:02}Z")
1254}
1255
1256/// Convert "days since 1970-01-01" into `(year, month, day)` using the
1257/// civil-from-days algorithm (Howard Hinnant's date library, MIT). Self-
1258/// contained so we don't add a chrono/time dep just for backup naming.
1259fn ymd_from_days(days: u64) -> (i64, u32, u32) {
1260    let z = days as i64 + 719_468;
1261    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
1262    let doe = (z - era * 146_097) as u64;
1263    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
1264    let y = yoe as i64 + era * 400;
1265    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
1266    let mp = (5 * doy + 2) / 153;
1267    let d = (doy - (153 * mp + 2) / 5 + 1) as u32;
1268    let m = if mp < 10 { mp + 3 } else { mp - 9 } as u32;
1269    let y = if m <= 2 { y + 1 } else { y };
1270    (y, m, d)
1271}
1272
1273/// Map an absolute path into the backup tree. We strip the leading `/` so the
1274/// joined path doesn't escape the backup dir; everything else is preserved
1275/// verbatim so the user can `diff -r` across the original location.
1276fn backup_relpath(path: &std::path::Path) -> PathBuf {
1277    PathBuf::from(path.to_string_lossy().trim_start_matches('/'))
1278}
1279
1280#[cfg(test)]
1281mod tests {
1282    use super::*;
1283
1284    #[test]
1285    fn timestamp_round_numbers() {
1286        // 2026-01-01T00-00-00Z — sanity check on the calendar conversion.
1287        // 1767225600 = days from epoch * 86400 for 2026-01-01.
1288        // (epoch 0 = 1970-01-01; 56 years incl. leap days = 20454 days.)
1289        // Easier: just verify a known value end-to-end.
1290        let s = format_timestamp(0);
1291        assert_eq!(s, "1970-01-01T00-00-00Z");
1292        let s = format_timestamp(86_400);
1293        assert_eq!(s, "1970-01-02T00-00-00Z");
1294        let s = format_timestamp(31_536_000); // not a leap year (1970)
1295        assert_eq!(s, "1971-01-01T00-00-00Z");
1296    }
1297
1298    #[test]
1299    fn backup_relpath_strips_leading_slash() {
1300        let p = backup_relpath(std::path::Path::new("/home/user/foo/bar"));
1301        assert_eq!(p, PathBuf::from("home/user/foo/bar"));
1302    }
1303
1304    /// Stand up a tmp backups tree with the given timestamps and a
1305    /// service subdir under each, then run `prune_backups_in` against it.
1306    /// Returns (kept timestamps newest-first, removed paths). Hermetic:
1307    /// no env vars touched, no shared global state.
1308    fn setup_and_prune(stamps: &[&str], keep: usize) -> (Vec<String>, Vec<PathBuf>) {
1309        let tmp = std::env::temp_dir().join(format!(
1310            "ryra-prune-test-{}-{}",
1311            std::process::id(),
1312            std::time::SystemTime::now()
1313                .duration_since(std::time::UNIX_EPOCH)
1314                .unwrap()
1315                .as_nanos()
1316        ));
1317        let backups_root = tmp.join("backups");
1318        for s in stamps {
1319            std::fs::create_dir_all(backups_root.join(s).join("svc")).unwrap();
1320        }
1321        let removed = prune_backups_in(&backups_root, "svc", keep).unwrap();
1322        let mut kept: Vec<String> = std::fs::read_dir(&backups_root)
1323            .unwrap()
1324            .filter_map(|e| e.ok())
1325            .filter_map(|e| e.file_name().into_string().ok())
1326            .collect();
1327        kept.sort();
1328        kept.reverse();
1329        let _ = std::fs::remove_dir_all(&tmp);
1330        (kept, removed)
1331    }
1332
1333    #[test]
1334    fn prune_keeps_newest_n() {
1335        // Five timestamps, keep=3 — the two oldest (lex-smallest) should go.
1336        let (kept, removed) = setup_and_prune(
1337            &[
1338                "2026-01-01T00-00-00Z",
1339                "2026-02-01T00-00-00Z",
1340                "2026-03-01T00-00-00Z",
1341                "2026-04-01T00-00-00Z",
1342                "2026-05-01T00-00-00Z",
1343            ],
1344            3,
1345        );
1346        assert_eq!(kept.len(), 3);
1347        assert_eq!(kept[0], "2026-05-01T00-00-00Z");
1348        assert_eq!(kept[2], "2026-03-01T00-00-00Z");
1349        assert_eq!(removed.len(), 2);
1350    }
1351
1352    #[test]
1353    fn prune_no_op_when_under_keep() {
1354        let (kept, removed) = setup_and_prune(&["2026-01-01T00-00-00Z", "2026-02-01T00-00-00Z"], 5);
1355        assert_eq!(kept.len(), 2);
1356        assert!(removed.is_empty());
1357    }
1358
1359    fn unique_tmp(prefix: &str) -> PathBuf {
1360        std::env::temp_dir().join(format!(
1361            "{prefix}-{}-{}",
1362            std::process::id(),
1363            std::time::SystemTime::now()
1364                .duration_since(std::time::UNIX_EPOCH)
1365                .unwrap()
1366                .as_nanos()
1367        ))
1368    }
1369
1370    #[test]
1371    fn source_staleness_ignores_build_and_dotdirs() {
1372        use std::time::Duration;
1373
1374        let tmp = unique_tmp("ryra-stale");
1375        std::fs::create_dir_all(tmp.join("src")).unwrap();
1376        std::fs::create_dir_all(tmp.join("target")).unwrap();
1377        std::fs::create_dir_all(tmp.join(".git")).unwrap();
1378        std::fs::write(tmp.join("src/main.rs"), "fn main(){}").unwrap();
1379        std::fs::write(tmp.join("target/app"), "bin").unwrap();
1380        std::fs::write(tmp.join(".git/HEAD"), "ref").unwrap();
1381
1382        // Baseline after everything we wrote: nothing is newer.
1383        assert!(!any_file_newer_than(
1384            &tmp,
1385            SystemTime::now() + Duration::from_secs(3600)
1386        ));
1387        // Baseline before everything: the source file trips staleness.
1388        assert!(any_file_newer_than(
1389            &tmp,
1390            SystemTime::now() - Duration::from_secs(3600)
1391        ));
1392
1393        // When only ignored dirs hold newer files, staleness stays false.
1394        let ignored_only = unique_tmp("ryra-stale-ign");
1395        std::fs::create_dir_all(ignored_only.join("node_modules")).unwrap();
1396        std::fs::write(ignored_only.join("node_modules/x.js"), "x").unwrap();
1397        assert!(!any_file_newer_than(
1398            &ignored_only,
1399            SystemTime::now() - Duration::from_secs(3600)
1400        ));
1401
1402        let _ = std::fs::remove_dir_all(&tmp);
1403        let _ = std::fs::remove_dir_all(&ignored_only);
1404    }
1405
1406    #[test]
1407    fn should_skip_path_excludes_env_and_manifest() {
1408        let lock = PathBuf::from("/svc/service.manifest");
1409        assert!(should_skip_path(&PathBuf::from("/svc/.env"), &lock));
1410        assert!(should_skip_path(&lock, &lock));
1411        assert!(!should_skip_path(
1412            &PathBuf::from("/svc/configs/x.sh"),
1413            &lock
1414        ));
1415    }
1416}