Skip to main content

ryra_core/
upgrade.rs

1//! Diff and upgrade flows for already-installed services.
2//!
3//! "Upgrade" means: re-render an installed service's quadlet + configs
4//! against the current registry, replace any files whose content changed,
5//! and restart the unit. The render path is shared with `add_service`
6//! (driven via [`PlanMode::Upgrade`]); the side-effect steps differ.
7//!
8//! Drift detection is grounded in `service.manifest` — the per-install render
9//! manifest written by `ryra add`. Each tracked file is in one of these
10//! states:
11//!
12//! - **Unchanged**: on-disk content matches what the registry would render.
13//! - **Modified**: registry rendered output differs, but on-disk hash still
14//!   matches the manifest, so we know the file is ours and can be safely
15//!   overwritten.
16//! - **Drift**: on-disk hash matches *neither* the manifest nor the planned
17//!   content — i.e. the user hand-edited it. Refused without `--force`.
18//! - **Added**: file is in the planned set but not in the manifest (registry
19//!   added it).
20//! - **Removed**: file is in the manifest but not in the planned set (registry
21//!   stopped shipping it).
22//!
23//! `.env` is excluded throughout: it carries generated secrets that legitimately
24//! drift across restarts, and re-rendering it on upgrade would clobber rotated
25//! credentials. Its absence from the manifest is the source of truth for that.
26
27use std::collections::{BTreeMap, BTreeSet};
28use std::path::{Path, PathBuf};
29use std::time::SystemTime;
30
31use crate::error::{Error, Result};
32use crate::exposure::Exposure;
33use crate::generate::GeneratedFile;
34use crate::manifest;
35use crate::metadata::{Metadata, load_metadata};
36use crate::registry::resolve::ServiceRef;
37use crate::registry::service_def::{Color, DeployStrategy, Runtime};
38use crate::{
39    AddResult, PlanMode, REGISTRY_DEFAULT, Step, add_service, caddy, deploy, is_service_installed,
40    paths::metadata_path, resolve_registry_dir, service_home,
41};
42
43// --- Native source-staleness ("a rebuild would pick up new code") ----------
44//
45// Config drift is detected by `diff_service` (above). But a `runtime =
46// "native"` service can change *without* its rendered config changing: you
47// edit the source and a `cargo build` / `bun install` / restart would ship it.
48// `service.toml` is unchanged, so the diff is clean and the service still looks
49// up to date. This module fills that gap with a language-agnostic signal: did
50// any source file change since the running process last started?
51//
52// The signal is the running process's own start time (no state is written
53// anywhere): we ask systemd for the unit's MainPID and read its start time from
54// `/proc/<pid>/stat`, then flag staleness when any source file is newer. That
55// works for *anything* systemd can run (bash, Python, Node, Rust, C++, ...) --
56// we never inspect a toolchain or look for a "binary". It's a *hint*, not a
57// gate: the remedy is always an idempotent `ryra upgrade`, and the comparison
58// is read-only, so a false positive just costs a needless rebuild.
59
60/// Directory names never treated as source inputs: VCS metadata and the usual
61/// build-output / dependency dirs across ecosystems, plus any dotdir (`.git`,
62/// editor/tool state). Best-effort and language-agnostic -- staleness is a
63/// hint, so a missed exclusion at worst shows a spurious "upgrade available"
64/// that an idempotent `ryra upgrade` clears.
65const IGNORED_DIRS: &[&str] = &[
66    "target",
67    "node_modules",
68    "dist",
69    "build",
70    "out",
71    "vendor",
72    "__pycache__",
73    "venv",
74];
75
76/// True if any regular file under `dir` (skipping [`IGNORED_DIRS`] and dotdirs)
77/// was modified after `since`. Stops at the first newer file; symlinks are not
78/// followed. Unreadable dirs/files are skipped (a hint, not a hard check).
79fn any_file_newer_than(dir: &Path, since: SystemTime) -> bool {
80    let Ok(entries) = std::fs::read_dir(dir) else {
81        return false;
82    };
83    for entry in entries.flatten() {
84        let Ok(file_type) = entry.file_type() else {
85            continue;
86        };
87        let path = entry.path();
88        if file_type.is_dir() {
89            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
90            if name.starts_with('.') || IGNORED_DIRS.contains(&name) {
91                continue;
92            }
93            if any_file_newer_than(&path, since) {
94                return true;
95            }
96        } else if file_type.is_file()
97            && let Ok(mtime) = entry.metadata().and_then(|m| m.modified())
98            && mtime > since
99        {
100            return true;
101        }
102    }
103    false
104}
105
106/// Rebuild the `ServiceRef` we stashed at install time (mirrors `replan`), so
107/// the source dir can be resolved the same way an upgrade would.
108fn service_ref_for(metadata: &Metadata, service_name: &str) -> ServiceRef {
109    if metadata.registry.is_empty() || metadata.registry == REGISTRY_DEFAULT {
110        ServiceRef::Default(service_name.to_string())
111    } else if crate::registry::resolve::is_path_like(&metadata.registry) {
112        ServiceRef::Path {
113            dir: PathBuf::from(&metadata.registry),
114            name: service_name.to_string(),
115        }
116    } else {
117        ServiceRef::Custom {
118            registry: metadata.registry.clone(),
119            service: service_name.to_string(),
120        }
121    }
122}
123
124/// The unit's MainPID per systemd, or `None` when the service is stopped
125/// (MainPID 0) or systemd can't be queried.
126fn unit_main_pid(service_name: &str) -> Option<u32> {
127    let out = std::process::Command::new("systemctl")
128        .args([
129            "--user",
130            "show",
131            &format!("{service_name}.service"),
132            "-p",
133            "MainPID",
134            "--value",
135        ])
136        .output()
137        .ok()?;
138    if !out.status.success() {
139        return None;
140    }
141    let pid: u32 = String::from_utf8_lossy(&out.stdout).trim().parse().ok()?;
142    (pid != 0).then_some(pid)
143}
144
145/// Wall-clock start time of `pid`, from `/proc/<pid>/stat` field 22 (starttime,
146/// in clock ticks since boot) plus `/proc/stat`'s `btime` (boot epoch). `None`
147/// if the process is gone or `/proc` can't be read.
148fn process_start_time(pid: u32) -> Option<SystemTime> {
149    // USER_HZ: the kernel's /proc clock-tick rate. Fixed at 100 on every
150    // mainstream Linux (the value is baked into the ABI, not the runtime CPU
151    // tick), so hardcoding it avoids a libc/sysconf dependency.
152    const USER_HZ: u64 = 100;
153
154    let stat = std::fs::read_to_string(format!("/proc/{pid}/stat")).ok()?;
155    // comm (field 2) is parenthesised and may itself contain spaces or `)`, so
156    // the numeric fields resume only after the LAST `)`. field 3 (state) is the
157    // first token there, making starttime (field 22) the 20th -> index 19.
158    let after_comm = stat.rsplit_once(')')?.1;
159    let starttime_ticks: u64 = after_comm.split_whitespace().nth(19)?.parse().ok()?;
160
161    let proc_stat = std::fs::read_to_string("/proc/stat").ok()?;
162    let btime: u64 = proc_stat
163        .lines()
164        .find_map(|l| l.strip_prefix("btime ")?.trim().parse().ok())?;
165
166    Some(std::time::UNIX_EPOCH + std::time::Duration::from_secs(btime + starttime_ticks / USER_HZ))
167}
168
169/// Per-file diff classification.
170#[derive(Debug, Clone, PartialEq, Eq)]
171pub enum DiffKind {
172    /// On-disk content matches the planned render. Nothing to do.
173    Unchanged,
174    /// Registry now renders different content. On-disk hash still matches
175    /// the manifest, so the file is ryra-owned and safe to overwrite.
176    Modified,
177    /// On-disk hash differs from both the manifest and the planned render —
178    /// the user hand-edited this file. Upgrade refuses without `--force`.
179    /// Includes the case where there is no manifest entry to compare against
180    /// (service installed before the manifest feature; treated conservatively
181    /// as drift until the user confirms with `--force`).
182    Drift,
183    /// File is in the planned render but absent from the manifest — registry
184    /// added it.
185    Added,
186    /// File is in the manifest but no longer rendered by the registry —
187    /// registry stopped shipping it. Upgrade deletes it.
188    Removed,
189}
190
191#[derive(Debug, Clone)]
192pub struct DiffEntry {
193    pub path: PathBuf,
194    pub kind: DiffKind,
195}
196
197/// One env var the registry expects in `.env` that the user's `.env`
198/// doesn't have. By design env tracking is *append-only* — we never flag
199/// a present-but-different value as drift, and we never propose
200/// removing a key. Users may have manually edited values or added their
201/// own keys; clobbering those would be the larger harm.
202///
203/// `kind` and `prompt` come straight from the registry's `EnvVar`
204/// definition, so the CLI can route Prompted / Required additions
205/// through the same interactive prompt that `ryra add` uses, while
206/// silently appending Default ones.
207#[derive(Debug, Clone)]
208pub struct EnvAddition {
209    pub key: String,
210    pub value: String,
211    pub kind: crate::registry::service_def::EnvKind,
212    pub prompt: Option<String>,
213}
214
215/// Result of comparing the registry's render to what's on disk.
216#[derive(Debug, Clone)]
217pub struct DiffResult {
218    pub service: String,
219    pub entries: Vec<DiffEntry>,
220    /// Static env vars the registry expects but the user's `.env` is
221    /// missing. Empty when the `.env` already covers everything tracked.
222    pub env_additions: Vec<EnvAddition>,
223    /// `runtime = "native"` only: the source changed since the running process
224    /// started, so a rebuild/restart would ship new code even though the
225    /// rendered config is unchanged. Always `false` for podman services and
226    /// stopped natives. Orthogonal to [`Self::is_clean`] (which is config-only)
227    /// -- a service is upgradable when the diff is dirty *or* this is set.
228    pub source_stale: bool,
229}
230
231impl DiffResult {
232    /// True when nothing about the install would change — neither files
233    /// nor env vars.
234    pub fn is_clean(&self) -> bool {
235        self.entries
236            .iter()
237            .all(|e| matches!(e.kind, DiffKind::Unchanged))
238            && self.env_additions.is_empty()
239    }
240
241    /// Files the user hand-edited. Upgrade must refuse to overwrite these
242    /// without `--force`.
243    pub fn drifted(&self) -> Vec<&DiffEntry> {
244        self.entries
245            .iter()
246            .filter(|e| matches!(e.kind, DiffKind::Drift))
247            .collect()
248    }
249}
250
251/// Reconstruct the planning inputs we stashed at install time and feed them
252/// back through `add_service` in upgrade mode. Returns the planned step
253/// list and the planned-file content map (path → content). The richer
254/// per-env metadata lives on `AddResult.tracked_envs`.
255async fn replan(service_name: &str) -> Result<Replanned> {
256    if !is_service_installed(service_name) {
257        return Err(Error::ServiceNotInstalled(service_name.to_string()));
258    }
259    let metadata = load_metadata(service_name)?
260        .ok_or_else(|| Error::ServiceNotInstalled(service_name.to_string()))?;
261
262    let exposure = match metadata.url.as_deref() {
263        Some(url) => Exposure::from_url(url),
264        None => Exposure::Loopback,
265    };
266
267    let service_ref = service_ref_for(&metadata, service_name);
268    let repo_dir = resolve_registry_dir(&service_ref).await?;
269    // The service's own dir under the resolved registry (where a native build/
270    // run happens). Surfaced so callers — the source-staleness check below —
271    // reuse this single resolution instead of resolving again.
272    let source_dir = crate::registry::find_service(&repo_dir, service_name)?.service_dir;
273    let native = matches!(metadata.runtime, Runtime::Native);
274
275    // Recover existing host ports from the install's `.env` so the
276    // re-render lands on the same numbers. Without this every dynamically
277    // allocated port shifts because `port_in_use` reports them taken.
278    let port_overrides = read_existing_ports(service_name)?;
279
280    // Trivial port-in-use closure: the upgrade caller pins every port via
281    // `port_overrides`, so the closure is never consulted. Returning false
282    // unconditionally is safe — no allocation runs.
283    let port_in_use = |_p: u16| false;
284
285    let enabled_groups: BTreeSet<String> = metadata.enabled_groups.iter().cloned().collect();
286    let selected_choices = metadata.selected_choices.clone();
287    let no_env_overrides = BTreeMap::new();
288    let result = add_service(crate::AddServiceParams {
289        service_name,
290        exposure: &exposure,
291        auth: match metadata.auth.clone() {
292            Some(kind) => crate::AuthChoice::Native(kind),
293            None => crate::AuthChoice::None,
294        },
295        // SMTP and backup enablement are per-install state — persisted by
296        // `ryra add` and `ryra configure`. Upgrade preserves whatever the
297        // user picked.
298        enable_smtp: metadata.smtp_enabled,
299        enable_backup: metadata.backup_enabled,
300        env_overrides: &no_env_overrides,
301        enabled_groups: &enabled_groups,
302        selected_choices: &selected_choices,
303        registry_name: &metadata.registry,
304        repo_dir: &repo_dir,
305        pre_built_ctx: None,
306        port_in_use: &port_in_use,
307        // ACME mode is only consumed when adding the reverse proxy itself;
308        // upgrade never needs to seed the TLS snippet.
309        acme_mode: None,
310        mode: PlanMode::Upgrade,
311        port_overrides: &port_overrides,
312    })?;
313
314    let mut planned: BTreeMap<PathBuf, String> = BTreeMap::new();
315    for step in &result.steps {
316        if let Step::WriteFile(file) = step {
317            planned.insert(file.path.clone(), file.content.clone());
318        }
319    }
320    Ok(Replanned {
321        result,
322        planned,
323        source_dir,
324        native,
325    })
326}
327
328/// Output of [`replan`]: the re-rendered plan plus the resolved source
329/// location, so callers don't resolve the registry a second time.
330struct Replanned {
331    result: AddResult,
332    planned: BTreeMap<PathBuf, String>,
333    /// The service's source dir (where a native build/run happens).
334    source_dir: PathBuf,
335    /// Whether this is a `runtime = "native"` install.
336    native: bool,
337}
338
339/// Parse the on-disk `.env` for a service into a key→value map. Lines
340/// without `=`, comments, and blanks are skipped. Returns an empty map if
341/// the file is absent — caller decides whether that's a soft error.
342fn read_existing_env_keys(service_name: &str) -> Result<BTreeMap<String, String>> {
343    let env_path = service_home(service_name)?.join(".env");
344    let mut out: BTreeMap<String, String> = BTreeMap::new();
345    let content = match std::fs::read_to_string(&env_path) {
346        Ok(c) => c,
347        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(out),
348        Err(source) => {
349            return Err(Error::FileRead {
350                path: env_path,
351                source,
352            });
353        }
354    };
355    for line in content.lines() {
356        let line = line.trim();
357        if line.is_empty() || line.starts_with('#') {
358            continue;
359        }
360        if let Some((k, v)) = line.split_once('=') {
361            out.insert(k.trim().to_string(), v.to_string());
362        }
363    }
364    Ok(out)
365}
366
367/// Parse `SERVICE_PORT_<NAME>=<port>` lines out of an installed service's
368/// `.env`. Returns a name → port map (lowercased name, matching the
369/// `[[ports]]` definition in service.toml). Also used by the metrics
370/// bridge to resolve host-network scrape targets retroactively.
371pub(crate) fn read_existing_ports(service_name: &str) -> Result<BTreeMap<String, u16>> {
372    let env_path = service_home(service_name)?.join(".env");
373    let mut overrides = BTreeMap::new();
374    let content = match std::fs::read_to_string(&env_path) {
375        Ok(c) => c,
376        // No .env yet means a half-installed service; let the planner
377        // re-allocate. (`add_service` will then surface a richer error if
378        // the install is genuinely broken.)
379        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(overrides),
380        Err(source) => {
381            return Err(Error::FileRead {
382                path: env_path,
383                source,
384            });
385        }
386    };
387    for line in content.lines() {
388        let line = line.trim();
389        if line.is_empty() || line.starts_with('#') {
390            continue;
391        }
392        let Some((key, value)) = line.split_once('=') else {
393            continue;
394        };
395        let Some(name) = key.strip_prefix("SERVICE_PORT_") else {
396            continue;
397        };
398        if let Ok(port) = value.trim().parse::<u16>() {
399            overrides.insert(name.to_ascii_lowercase(), port);
400        }
401    }
402    Ok(overrides)
403}
404
405/// Lockfile-tracked files we never want to flag as drift. The `.env` carries
406/// generated secrets that rotate at runtime; `service.manifest` itself is the
407/// manifest, not a tracked file. Both are excluded from the planned set
408/// during diffing so they don't appear as Removed/Added.
409fn should_skip_path(path: &std::path::Path, manifest_file: &std::path::Path) -> bool {
410    if path == manifest_file {
411        return true;
412    }
413    matches!(path.file_name().and_then(|n| n.to_str()), Some(".env"))
414}
415
416/// Compute the diff between the registry's render and what's on disk for an
417/// installed service.
418pub async fn diff_service(service_name: &str) -> Result<DiffResult> {
419    let Replanned {
420        result,
421        planned,
422        source_dir,
423        native,
424    } = replan(service_name).await?;
425
426    // Native source-staleness rides along with the diff (same resolution, no
427    // second registry lookup): has any source file changed since the running
428    // process started? See the module note above on why this is the signal.
429    let source_stale = native
430        && unit_main_pid(service_name)
431            .and_then(process_start_time)
432            .is_some_and(|started| any_file_newer_than(&source_dir, started));
433
434    let manifest_file = manifest::manifest_path(service_name)?;
435    let (manifest_entries, _manifest_envs) = manifest::load(service_name)?.unwrap_or_default();
436    let manifest_by_path: BTreeMap<PathBuf, String> = manifest_entries
437        .into_iter()
438        .map(|e| (e.path, e.sha256))
439        .collect();
440
441    // Env additions: registry-expected static keys missing from the user's
442    // `.env`. Append-only — we ignore present-but-different values
443    // (could be a manual override) and never propose removals (could be
444    // a key the user added themselves that the registry happens not to
445    // ship). The registry-side list comes from the freshly-rendered
446    // `tracked_envs` (which carries kind + prompt for the CLI), not the
447    // on-disk manifest — that's the source of truth.
448    let existing_env = read_existing_env_keys(service_name)?;
449    let env_additions: Vec<EnvAddition> = result
450        .tracked_envs
451        .iter()
452        .filter(|p| !existing_env.contains_key(&p.key))
453        .map(|p| EnvAddition {
454            key: p.key.clone(),
455            value: p.value.clone(),
456            kind: p.kind.clone(),
457            prompt: p.prompt.clone(),
458        })
459        .collect();
460
461    let mut entries: Vec<DiffEntry> = Vec::new();
462    let mut seen: BTreeSet<PathBuf> = BTreeSet::new();
463
464    // Walk planned files first — Added / Modified / Drift / Unchanged.
465    for (path, content) in &planned {
466        if should_skip_path(path, &manifest_file) {
467            continue;
468        }
469        seen.insert(path.clone());
470        let planned_hash = manifest::hash_bytes(content.as_bytes());
471        let on_disk_hash = if path.exists() {
472            Some(manifest::hash_file(path)?)
473        } else {
474            None
475        };
476        let manifest_hash = manifest_by_path.get(path);
477
478        let kind = match (on_disk_hash.as_deref(), manifest_hash.map(String::as_str)) {
479            // File doesn't exist on disk.
480            (None, Some(_)) | (None, None) => match manifest_hash {
481                Some(_) => DiffKind::Modified, // we wrote it, user deleted it; restore
482                None => DiffKind::Added,       // registry adds it, fresh write
483            },
484            // On-disk content already matches what the registry would render.
485            (Some(d), _) if d == planned_hash => DiffKind::Unchanged,
486            // No manifest entry → can't tell if the user touched it.
487            // Conservative: treat as drift so --force is required once.
488            (Some(_), None) => DiffKind::Drift,
489            // On-disk matches the manifest but not the planned render →
490            // ryra-owned, safe to overwrite.
491            (Some(d), Some(l)) if d == l => DiffKind::Modified,
492            // On-disk matches neither lock nor plan → user hand-edited.
493            (Some(_), Some(_)) => DiffKind::Drift,
494        };
495        entries.push(DiffEntry {
496            path: path.clone(),
497            kind,
498        });
499    }
500
501    // Walk manifest entries that the planner no longer emits — Removed.
502    for path in manifest_by_path.keys() {
503        if seen.contains(path) {
504            continue;
505        }
506        if should_skip_path(path, &manifest_file) {
507            continue;
508        }
509        entries.push(DiffEntry {
510            path: path.clone(),
511            kind: DiffKind::Removed,
512        });
513    }
514
515    entries.sort_by(|a, b| a.path.cmp(&b.path));
516    Ok(DiffResult {
517        service: service_name.to_string(),
518        entries,
519        env_additions,
520        source_stale,
521    })
522}
523
524/// Plan a zero-downtime color swap for a `deploy = "blue-green"` install.
525///
526/// Returns `None` when the service isn't blue/green, so [`upgrade_service`] can
527/// fall through to its normal restart-based flow. Otherwise the plan:
528///   1. re-renders both color quadlets/units + reloads systemd (so the idle
529///      slot picks up any new image tag or config), keeping `.env` untouched;
530///   2. starts the *idle* slot and gates on its health endpoint;
531///   3. repoints the Caddy upstream at the idle slot and reloads gracefully;
532///   4. stops the old slot and flips `active_color` in metadata.
533///
534/// A health-gate timeout aborts before step 3, leaving the old slot live and
535/// routed — a failed deploy is a no-op, never an outage.
536pub async fn blue_green_swap(service_name: &str) -> Result<Option<UpgradeResult>> {
537    if !is_service_installed(service_name) {
538        return Err(Error::ServiceNotInstalled(service_name.to_string()));
539    }
540    let metadata = load_metadata(service_name)?
541        .ok_or_else(|| Error::ServiceNotInstalled(service_name.to_string()))?;
542
543    // Resolve the registry def to read the deploy strategy + health path.
544    let service_ref = service_ref_for(&metadata, service_name);
545    let repo_dir = resolve_registry_dir(&service_ref).await?;
546    let reg = crate::registry::find_service(&repo_dir, service_name)?;
547    let def = &reg.def;
548    if def.service.deploy != DeployStrategy::BlueGreen {
549        return Ok(None);
550    }
551    let health_check = def.service.health_check.clone().ok_or_else(|| {
552        Error::Template(format!(
553            "{service_name}: deploy = \"blue-green\" but no health_check — validation should have caught this"
554        ))
555    })?;
556
557    // Which slot is live, and which we're rolling onto.
558    let live = metadata.active_color.unwrap_or(Color::Blue);
559    let target = live.other();
560
561    // The idle slot's host port, from the install's `.env`
562    // (`SERVICE_PORT_HTTP_GREEN` etc., written by the blue/green add path).
563    let primary_port_name = def
564        .ports
565        .iter()
566        .find(|p| p.name.eq_ignore_ascii_case("http"))
567        .or_else(|| def.ports.first())
568        .map(|p| p.name.clone())
569        .ok_or_else(|| {
570            Error::Template(format!("{service_name}: blue/green needs a routable port"))
571        })?;
572    let existing_ports = read_existing_ports(service_name)?;
573    let target_key = format!("{}_{}", primary_port_name.to_ascii_lowercase(), target);
574    let target_port = existing_ports.get(&target_key).copied().ok_or_else(|| {
575        Error::Template(format!(
576            "{service_name}: missing {} in .env — reinstall to allocate the blue/green port pair",
577            deploy::color_port_var(
578                &format!("SERVICE_PORT_{}", primary_port_name.to_uppercase()),
579                target
580            )
581        ))
582    })?;
583    let health_url = format!("http://127.0.0.1:{target_port}{health_check}");
584
585    // Re-render the install (Upgrade mode): emits both color quadlets/units and
586    // pulls any new image. Keep those file writes + pulls + daemon-reload, but
587    // drop the add path's StartService/StopService (we orchestrate the swap
588    // ourselves), its `.env` write (preserve secrets), and its metadata write
589    // (we flip active_color below instead of resetting it to blue).
590    let replanned = replan(service_name).await?;
591    let env_filename = std::ffi::OsStr::new(".env");
592    let metadata_file = metadata_path(service_name)?;
593    // Never re-sync or rebuild the LIVE slot's working dir — that's the whole
594    // point of the isolation (an in-flight Python/Node process must not have its
595    // source mutated). Drop any SyncDir/Build that targets `colors/<live>`;
596    // keep the idle slot's. (Podman has no such steps — it re-pulls the image,
597    // which is harmless — so this is a native-only filter in practice.)
598    let live_slot = format!("colors/{live}");
599    let touches_live = |p: &std::path::Path| p.to_string_lossy().contains(&live_slot);
600    let mut steps: Vec<Step> = Vec::new();
601    for step in replanned.result.steps {
602        match step {
603            Step::StartService { .. } | Step::StopService { .. } => continue,
604            Step::WriteFile(GeneratedFile { ref path, .. })
605                if path.file_name() == Some(env_filename) || *path == metadata_file =>
606            {
607                continue;
608            }
609            Step::SyncDir { ref dst, .. } if touches_live(dst) => continue,
610            Step::Build { ref dir, .. } if touches_live(dir) => continue,
611            other => steps.push(other),
612        }
613    }
614
615    // Caddy: repoint the upstream at the idle slot. Only when the install has a
616    // routed URL and a Caddyfile exists (loopback installs swap without it).
617    let caddy_rewrite =
618        blue_green_caddy_rewrite(service_name, def, &metadata, target, target_port)?;
619
620    // The runtime-agnostic swap: start idle -> health-gate -> caddy reload ->
621    // stop old. Artifact prep (pull/build) already rode along in `steps` above.
622    steps.extend(deploy::color_swap_steps(deploy::ColorSwap {
623        service_name: service_name.to_string(),
624        live,
625        prepare: None,
626        health_url,
627        health_timeout_secs: def.service.health_timeout_secs(),
628        caddy_rewrite,
629    }));
630
631    // Flip active_color so the next deploy rolls back onto `live`.
632    let mut new_metadata = metadata.clone();
633    new_metadata.active_color = Some(target);
634    steps.push(Step::WriteFile(GeneratedFile {
635        path: metadata_file,
636        content: toml::to_string_pretty(&new_metadata)?,
637    }));
638
639    Ok(Some(UpgradeResult {
640        service: service_name.to_string(),
641        diff: diff_service(service_name).await?,
642        steps,
643        backup_dir: None,
644        planned_files: replanned.planned,
645        // A swap isn't visible as config drift (the new image/build lives behind
646        // the same quadlet), so force the apply just like the native rebuild path.
647        force_apply: true,
648    }))
649}
650
651/// Re-render the Caddy site block pointing at the idle color and splice it into
652/// the existing Caddyfile. `None` when the install has no routed URL or no
653/// Caddyfile on disk (a loopback blue/green install swaps without Caddy).
654fn blue_green_caddy_rewrite(
655    service_name: &str,
656    def: &crate::registry::service_def::ServiceDef,
657    metadata: &Metadata,
658    target: Color,
659    target_port: u16,
660) -> Result<Option<Step>> {
661    let Some(url) = metadata.url.as_deref() else {
662        return Ok(None);
663    };
664    let caddyfile_path = caddy::caddyfile_path()?;
665    let Ok(existing) = std::fs::read_to_string(&caddyfile_path) else {
666        return Ok(None);
667    };
668    let parsed = url::Url::parse(url)
669        .map_err(|e| Error::Template(format!("invalid service URL '{url}': {e}")))?;
670    let domain = parsed
671        .host_str()
672        .ok_or_else(|| Error::Template(format!("service URL '{url}' has no host")))?;
673    let paths = crate::config::ConfigPaths::resolve()?;
674    let config = crate::config::load_or_default(&paths.config_file)?;
675    // Podman slots are containers on Caddy's shared network, reachable by name
676    // (`<svc>-<color>:<container_port>`). Native slots are host processes, so
677    // Caddy reaches them over the host bridge at the color's *host* port.
678    let (target_host, port) = match metadata.runtime {
679        Runtime::Podman => (
680            deploy::color_unit(service_name, target),
681            def.ports.first().map(|p| p.container_port).unwrap_or(80),
682        ),
683        Runtime::Native => ("host.containers.internal".to_string(), target_port),
684    };
685    let block = caddy::render_site_block(&caddy::CaddySiteParams {
686        service_name: service_name.to_string(),
687        target_host,
688        domain: domain.to_string(),
689        container_port: port,
690        https_port: crate::caddy_https_port(&config),
691        force_internal_tls: false,
692    });
693    let updated = caddy::add_route(&existing, service_name, &block);
694    Ok(Some(Step::WriteFile(GeneratedFile {
695        path: caddyfile_path,
696        content: updated,
697    })))
698}
699
700/// Plan an upgrade for an installed service.
701///
702/// Returns the steps to execute and the backup directory where displaced
703/// files will be copied. The backup dir is *also* baked into the steps
704/// (as `Step::CopyFile` entries placed before each `Step::WriteFile`).
705pub async fn upgrade_service(service_name: &str, force: bool) -> Result<UpgradeResult> {
706    // Blue/green services upgrade by a color swap, not an in-place restart, so
707    // they take a different plan entirely. `blue_green_swap` returns None for
708    // restart-strategy installs, falling through to the standard flow below.
709    if let Some(plan) = blue_green_swap(service_name).await? {
710        return Ok(plan);
711    }
712
713    let diff = diff_service(service_name).await?;
714
715    if !force {
716        let drifted = diff.drifted();
717        if !drifted.is_empty() {
718            return Err(Error::HandEditedFiles {
719                service: service_name.to_string(),
720                paths: drifted.iter().map(|e| e.path.clone()).collect(),
721            });
722        }
723    }
724
725    let Replanned {
726        result, planned, ..
727    } = replan(service_name).await?;
728    let manifest_file = manifest::manifest_path(service_name)?;
729    let env_file = service_home(service_name)?.join(".env");
730
731    // Hard-fail if `.env` is missing. Append-only env handling can't
732    // reconstruct generated secrets (mysql_root_password, jwt_key, etc.)
733    // and would silently produce a half-written file that fails on
734    // restart. Surface the real problem instead.
735    if !env_file.exists() {
736        return Err(Error::Template(format!(
737            "{service_name}: `.env` is missing at {} — upgrade can't reconstruct generated secrets. \
738             Restore the file from a backup or reinstall the service.",
739            env_file.display()
740        )));
741    }
742
743    // Decide the backup directory once per upgrade run. Used whenever any
744    // file would be overwritten *or* the existing service.manifest exists (the
745    // lock is always backed up so `ryra revert` can reconstruct the
746    // pre-upgrade state). Empty when neither holds — keeps
747    // `~/.local/state/ryra/` from accumulating no-op dirs.
748    let backup_dir = backup_directory(service_name)?;
749    let needs_backup: BTreeSet<PathBuf> = diff
750        .entries
751        .iter()
752        .filter(|e| {
753            matches!(
754                e.kind,
755                DiffKind::Modified | DiffKind::Drift | DiffKind::Removed
756            )
757        })
758        .map(|e| e.path.clone())
759        .collect();
760    let manifest_will_be_backed_up = manifest_file.exists();
761    let backup_used = !needs_backup.is_empty() || manifest_will_be_backed_up;
762
763    // Filter the planned step list down to what an upgrade should actually do.
764    // - WriteFile for `.env` is dropped (preserve secrets).
765    // - PullImage stays (idempotent if cached, fetches new tag if registry bumped).
766    // - StartService is replaced with RestartService at the very end.
767    // - CreateDir / Symlink stay (idempotent and may be needed for new files).
768    // - DaemonReload stays.
769    // - CopyFile stays (vendored binaries; rare to upgrade but handled the same).
770    // - TailscaleSetup / TailscaleEnable were already gated out by PlanMode::Upgrade.
771    let mut steps: Vec<Step> = Vec::new();
772    if backup_used {
773        steps.push(Step::CreateDir(backup_dir.clone()));
774    }
775    let unchanged: BTreeSet<PathBuf> = diff
776        .entries
777        .iter()
778        .filter(|e| matches!(e.kind, DiffKind::Unchanged))
779        .map(|e| e.path.clone())
780        .collect();
781
782    let env_filename = std::ffi::OsStr::new(".env");
783    for step in result.steps {
784        match step {
785            // .env stays untouched on upgrade — generated secrets in the
786            // running service must not be regenerated.
787            Step::WriteFile(GeneratedFile { ref path, .. })
788                if path.file_name() == Some(env_filename) =>
789            {
790                continue;
791            }
792            // Identical content already on disk — skip the write entirely
793            // so the file's mtime stays put and `sha256sum -c` stays clean
794            // for unchanged entries.
795            Step::WriteFile(GeneratedFile { ref path, .. }) if unchanged.contains(path) => {
796                // The manifest is special: even if "unchanged" by content, we
797                // re-emit it because path-level adds/removes mean its content
798                // has changed and we need the new hashes recorded.
799                if path == &manifest_file {
800                    steps.push(step);
801                }
802                continue;
803            }
804            Step::WriteFile(ref file) => {
805                // Always back up the existing service.manifest too, even though
806                // it's filtered out of the diff. `ryra revert` reads the
807                // backed-up lock to know which files were Added during the
808                // upgrade (current lock − pre-upgrade lock) so it can delete
809                // them on revert. Without this, revert would leave
810                // upgrade-added files orphaned.
811                let should_backup = (needs_backup.contains(&file.path)
812                    || file.path == manifest_file)
813                    && file.path.exists();
814                if should_backup {
815                    let rel = backup_relpath(&file.path);
816                    let dst = backup_dir.join(rel);
817                    if let Some(parent) = dst.parent() {
818                        steps.push(Step::CreateDir(parent.to_path_buf()));
819                    }
820                    steps.push(Step::CopyFile {
821                        src: file.path.clone(),
822                        dst,
823                    });
824                }
825                steps.push(step);
826            }
827            // The replanned step list always ends with StartService; we
828            // strip it and append a RestartService at the very end so the
829            // unit picks up the new quadlet.
830            Step::StartService { .. } => continue,
831            other => steps.push(other),
832        }
833    }
834
835    // Removed files: back them up then delete.
836    for entry in &diff.entries {
837        if !matches!(entry.kind, DiffKind::Removed) {
838            continue;
839        }
840        if entry.path.exists() {
841            let rel = backup_relpath(&entry.path);
842            let dst = backup_dir.join(rel);
843            if let Some(parent) = dst.parent() {
844                steps.push(Step::CreateDir(parent.to_path_buf()));
845            }
846            steps.push(Step::CopyFile {
847                src: entry.path.clone(),
848                dst,
849            });
850        }
851        steps.push(Step::RemoveFile(entry.path.clone()));
852    }
853
854    // Env additions: append registry-required static env vars that the
855    // user's .env doesn't have. Append-only — we never rewrite the
856    // existing .env (that would clobber rotated secrets and any manual
857    // edits) and we never remove keys (the user might have added their
858    // own that the registry happens not to ship). The .env is
859    // intentionally NOT backed up: it only ever gains lines and the
860    // pre-existing content survives unchanged.
861    if !diff.env_additions.is_empty() {
862        let mut content = match std::fs::read_to_string(&env_file) {
863            Ok(c) => c,
864            // Service installed but .env missing? Treat the add as a
865            // fresh write — odd state, but the right one to recover to.
866            Err(e) if e.kind() == std::io::ErrorKind::NotFound => String::new(),
867            Err(source) => {
868                return Err(Error::FileRead {
869                    path: env_file.clone(),
870                    source,
871                });
872            }
873        };
874        if !content.is_empty() && !content.ends_with('\n') {
875            content.push('\n');
876        }
877        for add in &diff.env_additions {
878            content.push_str(&format!("{}={}\n", add.key, add.value));
879        }
880        steps.push(Step::WriteFile(GeneratedFile {
881            path: env_file,
882            content,
883        }));
884    }
885
886    // Pick up the new quadlet by restarting. RestartService is enough to
887    // re-read the env file, re-run ExecStartPre/Post, and pull in any new
888    // ExecStartPost script (the seafile case).
889    steps.push(Step::RestartService {
890        unit: service_name.to_string(),
891    });
892
893    // Native services rebuild from source on upgrade (the `Build` step) and
894    // restart. A source change leaves the rendered config clean, so force the
895    // apply; otherwise the CLI would short-circuit on the clean diff and never
896    // rebuild. The plan already ends in RestartService.
897    let force_apply = matches!(
898        crate::metadata::load_metadata(service_name),
899        Ok(Some(m)) if m.runtime == crate::registry::service_def::Runtime::Native
900    );
901
902    Ok(UpgradeResult {
903        service: service_name.to_string(),
904        diff,
905        steps,
906        backup_dir: if backup_used { Some(backup_dir) } else { None },
907        // The replanned env content is irrelevant for upgrade (we don't
908        // write it), but expose the template-render context bag in case
909        // future callers need it. Keep it empty for now to avoid
910        // confusing consumers.
911        planned_files: planned,
912        force_apply,
913    })
914}
915
916pub struct UpgradeResult {
917    pub service: String,
918    pub diff: DiffResult,
919    pub steps: Vec<Step>,
920    /// `None` when no files would be overwritten or removed.
921    pub backup_dir: Option<PathBuf>,
922    pub planned_files: BTreeMap<PathBuf, String>,
923    /// Apply even when the config diff is clean. True for native services: a
924    /// source rebuild isn't visible in the rendered config, so the plan must
925    /// still run (the `SyncBinary` step then no-ops if the binary is unchanged).
926    pub force_apply: bool,
927}
928
929/// One available backup snapshot for a service.
930#[derive(Debug, Clone)]
931pub struct BackupSnapshot {
932    /// Filesystem path: `~/.local/state/ryra/backups/<timestamp>/<service>/`.
933    pub path: PathBuf,
934    /// `YYYY-MM-DDTHH-MM-SSZ` timestamp from the parent dir name.
935    pub timestamp: String,
936}
937
938pub struct RevertResult {
939    pub service: String,
940    pub snapshot: BackupSnapshot,
941    pub steps: Vec<Step>,
942    /// Files to be copied from backup back to their original locations.
943    pub files_to_restore: Vec<PathBuf>,
944    /// Files added by the upgrade that didn't exist before — will be
945    /// removed by revert. Empty when the snapshot pre-dates the manifest
946    /// feature (we can't reconstruct what was added without it).
947    pub files_to_delete: Vec<PathBuf>,
948}
949
950/// List every backup snapshot for a service, newest first. Empty result
951/// means there's nothing to revert from.
952/// How many backup snapshots `ryra upgrade` retains per service before
953/// auto-pruning. Each snapshot is small (~tens of KB — config files +
954/// the manifest) so the cap is more about mental clutter than disk; 5
955/// is enough to revert a few iterations back without filling the
956/// `~/.local/state/ryra/backups/` tree with dead snapshots from years
957/// of upgrades.
958pub const DEFAULT_BACKUP_KEEP: usize = 5;
959
960/// Drop snapshots older than the most recent `keep` for this service.
961/// Returns the paths that were removed (newest-first within the
962/// removed set; the kept set keeps the same order). The shared
963/// timestamp dir is also removed when this was the last service-
964/// scoped subdir under it (multi-service upgrade runs share a
965/// timestamp dir; we don't want to nuke other services' state).
966pub fn prune_backups(service_name: &str, keep: usize) -> Result<Vec<PathBuf>> {
967    let backups_root = state_dir()?.join("backups");
968    prune_backups_in(&backups_root, service_name, keep)
969}
970
971/// Pure inner that operates on an explicit `<state>/backups/` root.
972/// Split out so tests can drive it against a tmp tree without touching
973/// the real XDG state dir.
974fn prune_backups_in(
975    backups_root: &std::path::Path,
976    service_name: &str,
977    keep: usize,
978) -> Result<Vec<PathBuf>> {
979    let snapshots = list_backups_in(backups_root, service_name)?;
980    if snapshots.len() <= keep {
981        return Ok(Vec::new());
982    }
983    let mut removed: Vec<PathBuf> = Vec::new();
984    for snap in snapshots.into_iter().skip(keep) {
985        if let Err(e) = std::fs::remove_dir_all(&snap.path) {
986            eprintln!(
987                "warning: failed to prune backup {}: {e}",
988                snap.path.display()
989            );
990            continue;
991        }
992        removed.push(snap.path.clone());
993        if let Some(parent) = snap.path.parent()
994            && let Ok(mut entries) = std::fs::read_dir(parent)
995            && entries.next().is_none()
996        {
997            let _ = std::fs::remove_dir(parent);
998        }
999    }
1000    Ok(removed)
1001}
1002
1003pub fn list_backups(service_name: &str) -> Result<Vec<BackupSnapshot>> {
1004    let backups_root = state_dir()?.join("backups");
1005    list_backups_in(&backups_root, service_name)
1006}
1007
1008fn list_backups_in(
1009    backups_root: &std::path::Path,
1010    service_name: &str,
1011) -> Result<Vec<BackupSnapshot>> {
1012    if !backups_root.is_dir() {
1013        return Ok(Vec::new());
1014    }
1015    let mut snapshots: Vec<BackupSnapshot> = Vec::new();
1016    let entries = std::fs::read_dir(backups_root).map_err(|source| Error::FileRead {
1017        path: backups_root.to_path_buf(),
1018        source,
1019    })?;
1020    for entry in entries.flatten() {
1021        let stamp_dir = entry.path();
1022        if !stamp_dir.is_dir() {
1023            continue;
1024        }
1025        let svc_dir = stamp_dir.join(service_name);
1026        if !svc_dir.is_dir() {
1027            continue;
1028        }
1029        let Some(stamp) = stamp_dir.file_name().and_then(|n| n.to_str()) else {
1030            continue;
1031        };
1032        snapshots.push(BackupSnapshot {
1033            path: svc_dir,
1034            timestamp: stamp.to_string(),
1035        });
1036    }
1037    // Newest first: timestamp is `YYYY-MM-DDTHH-MM-SSZ`, lexical-descending == reverse-chronological.
1038    snapshots.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
1039    Ok(snapshots)
1040}
1041
1042/// Plan a revert for an installed service.
1043///
1044/// `at` selects a specific backup timestamp; `None` picks the most recent.
1045/// The returned plan: restore every file from the backup tree to its
1046/// original location, delete files added by the upgrade, daemon-reload,
1047/// restart the unit.
1048pub fn revert_service(service_name: &str, at: Option<&str>) -> Result<RevertResult> {
1049    if !is_service_installed(service_name) {
1050        return Err(Error::ServiceNotInstalled(service_name.to_string()));
1051    }
1052    let snapshot = pick_snapshot(service_name, at)?;
1053
1054    // Files to restore: walk the backup tree and reconstruct the original
1055    // absolute path for each one. The backup mirrors absolute paths under
1056    // `<snapshot>/<original-path-without-leading-slash>`, so the inverse is
1057    // simply prefixing `/` to each path-relative-to-snapshot.
1058    let mut files_to_restore: Vec<PathBuf> = Vec::new();
1059    walk_backup_files(&snapshot.path, &mut files_to_restore)?;
1060
1061    // Files to delete: anything in the *current* lock that isn't in the
1062    // *backed-up* lock was added by the upgrade and should disappear on
1063    // revert. If either lock is absent, leave the delete set empty —
1064    // safest no-op for snapshots that pre-date this feature.
1065    let backup_manifest_file =
1066        absolute_to_backup_path(&snapshot.path, &manifest::manifest_path(service_name)?);
1067    let (backup_manifest_entries, _) = read_manifest_at(&backup_manifest_file)?;
1068    let (current_manifest_entries, _) = manifest::load(service_name)?.unwrap_or_default();
1069
1070    let backup_manifest_set: BTreeSet<PathBuf> = backup_manifest_entries
1071        .iter()
1072        .map(|e| e.path.clone())
1073        .collect();
1074    let mut files_to_delete: Vec<PathBuf> = if backup_manifest_entries.is_empty() {
1075        // Pre-feature snapshot: no way to know what was added.
1076        Vec::new()
1077    } else {
1078        current_manifest_entries
1079            .iter()
1080            .map(|e| e.path.clone())
1081            .filter(|p| !backup_manifest_set.contains(p))
1082            .collect()
1083    };
1084    files_to_delete.sort();
1085
1086    // Build the step list.
1087    let mut steps: Vec<Step> = Vec::new();
1088    // Restore: backup → original. CopyFile creates parents itself, so no
1089    // CreateDir needed.
1090    for backup_path in &files_to_restore {
1091        let original = backup_to_absolute_path(&snapshot.path, backup_path);
1092        steps.push(Step::CopyFile {
1093            src: backup_path.clone(),
1094            dst: original,
1095        });
1096    }
1097    // Delete: each Added file, plus any orphan symlink in the quadlet dir
1098    // that pointed at it (only the actual file is in the lock; the
1099    // companion symlink in `~/.config/containers/systemd/` is not).
1100    let qd = crate::quadlet_dir()?;
1101    for path in &files_to_delete {
1102        if path.exists() {
1103            steps.push(Step::RemoveFile(path.clone()));
1104        }
1105        if let Some(name) = path.file_name() {
1106            let symlink = qd.join(name);
1107            if std::fs::symlink_metadata(&symlink).is_ok() {
1108                steps.push(Step::RemoveFile(symlink));
1109            }
1110        }
1111    }
1112    steps.push(Step::DaemonReload);
1113    steps.push(Step::RestartService {
1114        unit: service_name.to_string(),
1115    });
1116
1117    let files_to_restore_orig: Vec<PathBuf> = files_to_restore
1118        .iter()
1119        .map(|p| backup_to_absolute_path(&snapshot.path, p))
1120        .collect();
1121    Ok(RevertResult {
1122        service: service_name.to_string(),
1123        snapshot,
1124        steps,
1125        files_to_restore: files_to_restore_orig,
1126        files_to_delete,
1127    })
1128}
1129
1130/// Resolve the snapshot to revert to. `at` is a timestamp string (e.g.
1131/// `2026-05-05T13-33-50Z`); when absent, the most recent snapshot wins.
1132fn pick_snapshot(service_name: &str, at: Option<&str>) -> Result<BackupSnapshot> {
1133    let snapshots = list_backups(service_name)?;
1134    if snapshots.is_empty() {
1135        return Err(Error::NoBackup(service_name.to_string()));
1136    }
1137    match at {
1138        None => Ok(snapshots
1139            .into_iter()
1140            .next()
1141            .expect("non-empty checked above")),
1142        Some(stamp) => snapshots
1143            .into_iter()
1144            .find(|s| s.timestamp == stamp)
1145            .ok_or_else(|| Error::BackupNotFound {
1146                service: service_name.to_string(),
1147                stamp: stamp.to_string(),
1148            }),
1149    }
1150}
1151
1152/// Recursively collect every regular file under `root` into `out`. Symlinks
1153/// are followed; we don't expect any in a backup tree (we always copied
1154/// targets, never link entries).
1155fn walk_backup_files(root: &std::path::Path, out: &mut Vec<PathBuf>) -> Result<()> {
1156    let entries = std::fs::read_dir(root).map_err(|source| Error::FileRead {
1157        path: root.to_path_buf(),
1158        source,
1159    })?;
1160    for entry in entries.flatten() {
1161        let path = entry.path();
1162        let meta = match entry.metadata() {
1163            Ok(m) => m,
1164            Err(_) => continue,
1165        };
1166        if meta.is_dir() {
1167            walk_backup_files(&path, out)?;
1168        } else if meta.is_file() {
1169            out.push(path);
1170        }
1171    }
1172    Ok(())
1173}
1174
1175/// Inverse of `backup_relpath`: a backup path `<root>/home/user/foo`
1176/// maps back to `/home/user/foo`.
1177fn backup_to_absolute_path(root: &std::path::Path, backup: &std::path::Path) -> PathBuf {
1178    let rel = backup.strip_prefix(root).unwrap_or(backup);
1179    PathBuf::from("/").join(rel)
1180}
1181
1182/// Forward variant: `<root>` + `/home/user/foo` → `<root>/home/user/foo`.
1183fn absolute_to_backup_path(root: &std::path::Path, abs: &std::path::Path) -> PathBuf {
1184    let rel = abs.to_string_lossy();
1185    let stripped = rel.trim_start_matches('/');
1186    root.join(stripped)
1187}
1188
1189/// Read a manifest at the given path. Missing-file is treated as an empty
1190/// list — pre-feature backups simply have no lock to reference.
1191fn read_manifest_at(
1192    path: &std::path::Path,
1193) -> Result<(Vec<manifest::ManifestEntry>, Vec<manifest::EnvEntry>)> {
1194    if !path.exists() {
1195        return Ok((Vec::new(), Vec::new()));
1196    }
1197    let content = std::fs::read_to_string(path).map_err(|source| Error::FileRead {
1198        path: path.to_path_buf(),
1199        source,
1200    })?;
1201    manifest::parse(&content)
1202}
1203
1204/// `~/.local/state/ryra/backups/<timestamp>/<service>/`. Timestamp uses an
1205/// ISO-8601-ish form that sorts lexically (no colons — Windows-friendly,
1206/// not that it matters today, but the cost is zero).
1207fn backup_directory(service_name: &str) -> Result<PathBuf> {
1208    let state = state_dir()?;
1209    let now = std::time::SystemTime::now()
1210        .duration_since(std::time::UNIX_EPOCH)
1211        .map_err(|e| Error::Template(format!("system clock before UNIX epoch: {e}")))?
1212        .as_secs();
1213    let stamp = format_timestamp(now);
1214    Ok(state.join("backups").join(stamp).join(service_name))
1215}
1216
1217/// XDG state dir under `ryra/`. Created on demand by the CreateDir step.
1218fn state_dir() -> Result<PathBuf> {
1219    let base = dirs::state_dir()
1220        .or_else(|| dirs::home_dir().map(|h| h.join(".local").join("state")))
1221        .ok_or(Error::HomeDirNotFound)?;
1222    Ok(base.join("ryra"))
1223}
1224
1225/// Format a UNIX epoch into `YYYY-MM-DDTHH-MM-SSZ`. Avoids the chrono
1226/// dependency — we just need stable lexical sort.
1227fn format_timestamp(secs: u64) -> String {
1228    // Days from 1970-01-01.
1229    const SECS_PER_DAY: u64 = 86_400;
1230    let days = secs / SECS_PER_DAY;
1231    let time_of_day = secs % SECS_PER_DAY;
1232    let h = time_of_day / 3600;
1233    let m = (time_of_day % 3600) / 60;
1234    let s = time_of_day % 60;
1235    let (y, mo, d) = ymd_from_days(days);
1236    format!("{y:04}-{mo:02}-{d:02}T{h:02}-{m:02}-{s:02}Z")
1237}
1238
1239/// Convert "days since 1970-01-01" into `(year, month, day)` using the
1240/// civil-from-days algorithm (Howard Hinnant's date library, MIT). Self-
1241/// contained so we don't add a chrono/time dep just for backup naming.
1242fn ymd_from_days(days: u64) -> (i64, u32, u32) {
1243    let z = days as i64 + 719_468;
1244    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
1245    let doe = (z - era * 146_097) as u64;
1246    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
1247    let y = yoe as i64 + era * 400;
1248    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
1249    let mp = (5 * doy + 2) / 153;
1250    let d = (doy - (153 * mp + 2) / 5 + 1) as u32;
1251    let m = if mp < 10 { mp + 3 } else { mp - 9 } as u32;
1252    let y = if m <= 2 { y + 1 } else { y };
1253    (y, m, d)
1254}
1255
1256/// Map an absolute path into the backup tree. We strip the leading `/` so the
1257/// joined path doesn't escape the backup dir; everything else is preserved
1258/// verbatim so the user can `diff -r` across the original location.
1259fn backup_relpath(path: &std::path::Path) -> PathBuf {
1260    PathBuf::from(path.to_string_lossy().trim_start_matches('/'))
1261}
1262
1263#[cfg(test)]
1264mod tests {
1265    use super::*;
1266
1267    #[test]
1268    fn timestamp_round_numbers() {
1269        // 2026-01-01T00-00-00Z — sanity check on the calendar conversion.
1270        // 1767225600 = days from epoch * 86400 for 2026-01-01.
1271        // (epoch 0 = 1970-01-01; 56 years incl. leap days = 20454 days.)
1272        // Easier: just verify a known value end-to-end.
1273        let s = format_timestamp(0);
1274        assert_eq!(s, "1970-01-01T00-00-00Z");
1275        let s = format_timestamp(86_400);
1276        assert_eq!(s, "1970-01-02T00-00-00Z");
1277        let s = format_timestamp(31_536_000); // not a leap year (1970)
1278        assert_eq!(s, "1971-01-01T00-00-00Z");
1279    }
1280
1281    #[test]
1282    fn backup_relpath_strips_leading_slash() {
1283        let p = backup_relpath(std::path::Path::new("/home/user/foo/bar"));
1284        assert_eq!(p, PathBuf::from("home/user/foo/bar"));
1285    }
1286
1287    /// Stand up a tmp backups tree with the given timestamps and a
1288    /// service subdir under each, then run `prune_backups_in` against it.
1289    /// Returns (kept timestamps newest-first, removed paths). Hermetic:
1290    /// no env vars touched, no shared global state.
1291    fn setup_and_prune(stamps: &[&str], keep: usize) -> (Vec<String>, Vec<PathBuf>) {
1292        let tmp = std::env::temp_dir().join(format!(
1293            "ryra-prune-test-{}-{}",
1294            std::process::id(),
1295            std::time::SystemTime::now()
1296                .duration_since(std::time::UNIX_EPOCH)
1297                .unwrap()
1298                .as_nanos()
1299        ));
1300        let backups_root = tmp.join("backups");
1301        for s in stamps {
1302            std::fs::create_dir_all(backups_root.join(s).join("svc")).unwrap();
1303        }
1304        let removed = prune_backups_in(&backups_root, "svc", keep).unwrap();
1305        let mut kept: Vec<String> = std::fs::read_dir(&backups_root)
1306            .unwrap()
1307            .filter_map(|e| e.ok())
1308            .filter_map(|e| e.file_name().into_string().ok())
1309            .collect();
1310        kept.sort();
1311        kept.reverse();
1312        let _ = std::fs::remove_dir_all(&tmp);
1313        (kept, removed)
1314    }
1315
1316    #[test]
1317    fn prune_keeps_newest_n() {
1318        // Five timestamps, keep=3 — the two oldest (lex-smallest) should go.
1319        let (kept, removed) = setup_and_prune(
1320            &[
1321                "2026-01-01T00-00-00Z",
1322                "2026-02-01T00-00-00Z",
1323                "2026-03-01T00-00-00Z",
1324                "2026-04-01T00-00-00Z",
1325                "2026-05-01T00-00-00Z",
1326            ],
1327            3,
1328        );
1329        assert_eq!(kept.len(), 3);
1330        assert_eq!(kept[0], "2026-05-01T00-00-00Z");
1331        assert_eq!(kept[2], "2026-03-01T00-00-00Z");
1332        assert_eq!(removed.len(), 2);
1333    }
1334
1335    #[test]
1336    fn prune_no_op_when_under_keep() {
1337        let (kept, removed) = setup_and_prune(&["2026-01-01T00-00-00Z", "2026-02-01T00-00-00Z"], 5);
1338        assert_eq!(kept.len(), 2);
1339        assert!(removed.is_empty());
1340    }
1341
1342    fn unique_tmp(prefix: &str) -> PathBuf {
1343        std::env::temp_dir().join(format!(
1344            "{prefix}-{}-{}",
1345            std::process::id(),
1346            std::time::SystemTime::now()
1347                .duration_since(std::time::UNIX_EPOCH)
1348                .unwrap()
1349                .as_nanos()
1350        ))
1351    }
1352
1353    #[test]
1354    fn source_staleness_ignores_build_and_dotdirs() {
1355        use std::time::Duration;
1356
1357        let tmp = unique_tmp("ryra-stale");
1358        std::fs::create_dir_all(tmp.join("src")).unwrap();
1359        std::fs::create_dir_all(tmp.join("target")).unwrap();
1360        std::fs::create_dir_all(tmp.join(".git")).unwrap();
1361        std::fs::write(tmp.join("src/main.rs"), "fn main(){}").unwrap();
1362        std::fs::write(tmp.join("target/app"), "bin").unwrap();
1363        std::fs::write(tmp.join(".git/HEAD"), "ref").unwrap();
1364
1365        // Baseline after everything we wrote: nothing is newer.
1366        assert!(!any_file_newer_than(
1367            &tmp,
1368            SystemTime::now() + Duration::from_secs(3600)
1369        ));
1370        // Baseline before everything: the source file trips staleness.
1371        assert!(any_file_newer_than(
1372            &tmp,
1373            SystemTime::now() - Duration::from_secs(3600)
1374        ));
1375
1376        // When only ignored dirs hold newer files, staleness stays false.
1377        let ignored_only = unique_tmp("ryra-stale-ign");
1378        std::fs::create_dir_all(ignored_only.join("node_modules")).unwrap();
1379        std::fs::write(ignored_only.join("node_modules/x.js"), "x").unwrap();
1380        assert!(!any_file_newer_than(
1381            &ignored_only,
1382            SystemTime::now() - Duration::from_secs(3600)
1383        ));
1384
1385        let _ = std::fs::remove_dir_all(&tmp);
1386        let _ = std::fs::remove_dir_all(&ignored_only);
1387    }
1388
1389    #[test]
1390    fn should_skip_path_excludes_env_and_manifest() {
1391        let lock = PathBuf::from("/svc/service.manifest");
1392        assert!(should_skip_path(&PathBuf::from("/svc/.env"), &lock));
1393        assert!(should_skip_path(&lock, &lock));
1394        assert!(!should_skip_path(
1395            &PathBuf::from("/svc/configs/x.sh"),
1396            &lock
1397        ));
1398    }
1399}