Skip to main content

ryra_core/
upgrade.rs

1//! Diff and upgrade flows for already-installed services.
2//!
3//! "Upgrade" means: re-render an installed service's quadlet + configs
4//! against the current registry, replace any files whose content changed,
5//! and restart the unit. The render path is shared with `add_service`
6//! (driven via [`PlanMode::Upgrade`]); the side-effect steps differ.
7//!
8//! Drift detection is grounded in `service.manifest` — the per-install render
9//! manifest written by `ryra add`. Each tracked file is in one of these
10//! states:
11//!
12//! - **Unchanged**: on-disk content matches what the registry would render.
13//! - **Modified**: registry rendered output differs, but on-disk hash still
14//!   matches the manifest, so we know the file is ours and can be safely
15//!   overwritten.
16//! - **Drift**: on-disk hash matches *neither* the manifest nor the planned
17//!   content — i.e. the user hand-edited it. Refused without `--force`.
18//! - **Added**: file is in the planned set but not in the manifest (registry
19//!   added it).
20//! - **Removed**: file is in the manifest but not in the planned set (registry
21//!   stopped shipping it).
22//!
23//! `.env` is excluded throughout: it carries generated secrets that legitimately
24//! drift across restarts, and re-rendering it on upgrade would clobber rotated
25//! credentials. Its absence from the manifest is the source of truth for that.
26
27use std::collections::{BTreeMap, BTreeSet};
28use std::path::{Path, PathBuf};
29use std::time::SystemTime;
30
31use crate::error::{Error, Result};
32use crate::exposure::Exposure;
33use crate::generate::GeneratedFile;
34use crate::manifest;
35use crate::metadata::{Metadata, load_metadata};
36use crate::registry::resolve::ServiceRef;
37use crate::registry::service_def::{Color, DeployStrategy, Runtime};
38use crate::{
39    AddResult, PlanMode, REGISTRY_DEFAULT, Step, add_service, caddy, deploy, is_service_installed,
40    paths::metadata_path, resolve_registry_dir, service_home,
41};
42
43// --- Native source-staleness ("a rebuild would pick up new code") ----------
44//
45// Config drift is detected by `diff_service` (above). But a `runtime =
46// "native"` service can change *without* its rendered config changing: you
47// edit the source and a `cargo build` / `bun install` / restart would ship it.
48// `service.toml` is unchanged, so the diff is clean and the service still looks
49// up to date. This module fills that gap with a language-agnostic signal: did
50// any source file change since the running process last started?
51//
52// The signal is the running process's own start time (no state is written
53// anywhere): we ask systemd for the unit's MainPID and read its start time from
54// `/proc/<pid>/stat`, then flag staleness when any source file is newer. That
55// works for *anything* systemd can run (bash, Python, Node, Rust, C++, ...) --
56// we never inspect a toolchain or look for a "binary". It's a *hint*, not a
57// gate: the remedy is always an idempotent `ryra upgrade`, and the comparison
58// is read-only, so a false positive just costs a needless rebuild.
59
60/// Directory names never treated as source inputs: VCS metadata and the usual
61/// build-output / dependency dirs across ecosystems, plus any dotdir (`.git`,
62/// editor/tool state). Best-effort and language-agnostic -- staleness is a
63/// hint, so a missed exclusion at worst shows a spurious "upgrade available"
64/// that an idempotent `ryra upgrade` clears.
65const IGNORED_DIRS: &[&str] = &[
66    "target",
67    "node_modules",
68    "dist",
69    "build",
70    "out",
71    "vendor",
72    "__pycache__",
73    "venv",
74];
75
76/// True if any regular file under `dir` (skipping [`IGNORED_DIRS`] and dotdirs)
77/// was modified after `since`. Stops at the first newer file; symlinks are not
78/// followed. Unreadable dirs/files are skipped (a hint, not a hard check).
79fn any_file_newer_than(dir: &Path, since: SystemTime) -> bool {
80    let Ok(entries) = std::fs::read_dir(dir) else {
81        return false;
82    };
83    for entry in entries.flatten() {
84        let Ok(file_type) = entry.file_type() else {
85            continue;
86        };
87        let path = entry.path();
88        if file_type.is_dir() {
89            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
90            if name.starts_with('.') || IGNORED_DIRS.contains(&name) {
91                continue;
92            }
93            if any_file_newer_than(&path, since) {
94                return true;
95            }
96        } else if file_type.is_file()
97            && let Ok(mtime) = entry.metadata().and_then(|m| m.modified())
98            && mtime > since
99        {
100            return true;
101        }
102    }
103    false
104}
105
106/// Rebuild the `ServiceRef` we stashed at install time (mirrors `replan`), so
107/// the source dir can be resolved the same way an upgrade would.
108fn service_ref_for(metadata: &Metadata, service_name: &str) -> ServiceRef {
109    if metadata.registry.is_empty() || metadata.registry == REGISTRY_DEFAULT {
110        ServiceRef::Default(service_name.to_string())
111    } else if crate::registry::resolve::is_path_like(&metadata.registry) {
112        ServiceRef::Path {
113            dir: PathBuf::from(&metadata.registry),
114            name: service_name.to_string(),
115        }
116    } else {
117        ServiceRef::Custom {
118            registry: metadata.registry.clone(),
119            service: service_name.to_string(),
120        }
121    }
122}
123
124/// The unit's MainPID per systemd, or `None` when the service is stopped
125/// (MainPID 0) or systemd can't be queried.
126fn unit_main_pid(service_name: &str) -> Option<u32> {
127    let out = std::process::Command::new("systemctl")
128        .args([
129            "--user",
130            "show",
131            &format!("{service_name}.service"),
132            "-p",
133            "MainPID",
134            "--value",
135        ])
136        .output()
137        .ok()?;
138    if !out.status.success() {
139        return None;
140    }
141    let pid: u32 = String::from_utf8_lossy(&out.stdout).trim().parse().ok()?;
142    (pid != 0).then_some(pid)
143}
144
145/// Wall-clock start time of `pid`, from `/proc/<pid>/stat` field 22 (starttime,
146/// in clock ticks since boot) plus `/proc/stat`'s `btime` (boot epoch). `None`
147/// if the process is gone or `/proc` can't be read.
148fn process_start_time(pid: u32) -> Option<SystemTime> {
149    // USER_HZ: the kernel's /proc clock-tick rate. Fixed at 100 on every
150    // mainstream Linux (the value is baked into the ABI, not the runtime CPU
151    // tick), so hardcoding it avoids a libc/sysconf dependency.
152    const USER_HZ: u64 = 100;
153
154    let stat = std::fs::read_to_string(format!("/proc/{pid}/stat")).ok()?;
155    // comm (field 2) is parenthesised and may itself contain spaces or `)`, so
156    // the numeric fields resume only after the LAST `)`. field 3 (state) is the
157    // first token there, making starttime (field 22) the 20th -> index 19.
158    let after_comm = stat.rsplit_once(')')?.1;
159    let starttime_ticks: u64 = after_comm.split_whitespace().nth(19)?.parse().ok()?;
160
161    let proc_stat = std::fs::read_to_string("/proc/stat").ok()?;
162    let btime: u64 = proc_stat
163        .lines()
164        .find_map(|l| l.strip_prefix("btime ")?.trim().parse().ok())?;
165
166    Some(std::time::UNIX_EPOCH + std::time::Duration::from_secs(btime + starttime_ticks / USER_HZ))
167}
168
169/// Per-file diff classification.
170#[derive(Debug, Clone, PartialEq, Eq)]
171pub enum DiffKind {
172    /// On-disk content matches the planned render. Nothing to do.
173    Unchanged,
174    /// Registry now renders different content. On-disk hash still matches
175    /// the manifest, so the file is ryra-owned and safe to overwrite.
176    Modified,
177    /// On-disk hash differs from both the manifest and the planned render —
178    /// the user hand-edited this file. Upgrade refuses without `--force`.
179    /// Includes the case where there is no manifest entry to compare against
180    /// (service installed before the manifest feature; treated conservatively
181    /// as drift until the user confirms with `--force`).
182    Drift,
183    /// File is in the planned render but absent from the manifest — registry
184    /// added it.
185    Added,
186    /// File is in the manifest but no longer rendered by the registry —
187    /// registry stopped shipping it. Upgrade deletes it.
188    Removed,
189}
190
191#[derive(Debug, Clone)]
192pub struct DiffEntry {
193    pub path: PathBuf,
194    pub kind: DiffKind,
195}
196
197/// One env var the registry expects in `.env` that the user's `.env`
198/// doesn't have. By design env tracking is *append-only* — we never flag
199/// a present-but-different value as drift, and we never propose
200/// removing a key. Users may have manually edited values or added their
201/// own keys; clobbering those would be the larger harm.
202///
203/// `kind` and `prompt` come straight from the registry's `EnvVar`
204/// definition, so the CLI can route Prompted / Required additions
205/// through the same interactive prompt that `ryra add` uses, while
206/// silently appending Default ones.
207#[derive(Debug, Clone)]
208pub struct EnvAddition {
209    pub key: String,
210    pub value: String,
211    pub kind: crate::registry::service_def::EnvKind,
212    pub prompt: Option<String>,
213}
214
215/// Result of comparing the registry's render to what's on disk.
216#[derive(Debug, Clone)]
217pub struct DiffResult {
218    pub service: String,
219    pub entries: Vec<DiffEntry>,
220    /// Static env vars the registry expects but the user's `.env` is
221    /// missing. Empty when the `.env` already covers everything tracked.
222    pub env_additions: Vec<EnvAddition>,
223    /// `runtime = "native"` only: the source changed since the running process
224    /// started, so a rebuild/restart would ship new code even though the
225    /// rendered config is unchanged. Always `false` for podman services and
226    /// stopped natives. Orthogonal to [`Self::is_clean`] (which is config-only)
227    /// -- a service is upgradable when the diff is dirty *or* this is set.
228    pub source_stale: bool,
229}
230
231impl DiffResult {
232    /// True when nothing about the install would change — neither files
233    /// nor env vars.
234    pub fn is_clean(&self) -> bool {
235        self.entries
236            .iter()
237            .all(|e| matches!(e.kind, DiffKind::Unchanged))
238            && self.env_additions.is_empty()
239    }
240
241    /// Files the user hand-edited. Upgrade must refuse to overwrite these
242    /// without `--force`.
243    pub fn drifted(&self) -> Vec<&DiffEntry> {
244        self.entries
245            .iter()
246            .filter(|e| matches!(e.kind, DiffKind::Drift))
247            .collect()
248    }
249}
250
251/// Reconstruct the planning inputs we stashed at install time and feed them
252/// back through `add_service` in upgrade mode. Returns the planned step
253/// list and the planned-file content map (path → content). The richer
254/// per-env metadata lives on `AddResult.tracked_envs`.
255async fn replan(service_name: &str) -> Result<Replanned> {
256    if !is_service_installed(service_name) {
257        return Err(Error::ServiceNotInstalled(service_name.to_string()));
258    }
259    let metadata = load_metadata(service_name)?
260        .ok_or_else(|| Error::ServiceNotInstalled(service_name.to_string()))?;
261
262    let exposure = match metadata.url.as_deref() {
263        Some(url) => Exposure::from_url(url),
264        None => Exposure::Loopback,
265    };
266
267    let service_ref = service_ref_for(&metadata, service_name);
268    let repo_dir = resolve_registry_dir(&service_ref).await?;
269    // The service's own dir under the resolved registry (where a native build/
270    // run happens). Surfaced so callers — the source-staleness check below —
271    // reuse this single resolution instead of resolving again.
272    let source_dir = crate::registry::find_service(&repo_dir, service_name)?.service_dir;
273    let native = matches!(metadata.runtime, Runtime::Native);
274
275    // Recover existing host ports from the install's `.env` so the
276    // re-render lands on the same numbers. Without this every dynamically
277    // allocated port shifts because `port_in_use` reports them taken.
278    let port_overrides = read_existing_ports(service_name)?;
279
280    // Trivial port-in-use closure: the upgrade caller pins every port via
281    // `port_overrides`, so the closure is never consulted. Returning false
282    // unconditionally is safe — no allocation runs.
283    let port_in_use = |_p: u16| false;
284
285    let enabled_groups: BTreeSet<String> = metadata.enabled_groups.iter().cloned().collect();
286    let selected_choices = metadata.selected_choices.clone();
287    // Recover the install's existing `.env` values so a re-render reuses what's
288    // already configured instead of re-demanding it. A required choice/group
289    // member (e.g. an `external` database's `DATABASE_URL`) is provided once at
290    // install and lives in the `.env`; without seeding it here the render treats
291    // it as "no value" and the upgrade/diff errors on a service that's running
292    // fine. Same rationale as `port_overrides` above: upgrade re-renders against
293    // the existing install, it doesn't re-ask for what's already set.
294    let env_overrides = read_existing_env_keys(service_name)?;
295    let result = add_service(crate::AddServiceParams {
296        service_name,
297        exposure: &exposure,
298        auth: match metadata.auth.clone() {
299            Some(kind) => crate::AuthChoice::Native(kind),
300            None => crate::AuthChoice::None,
301        },
302        // SMTP and backup enablement are per-install state — persisted by
303        // `ryra add` and `ryra config`. Upgrade preserves whatever the
304        // user picked.
305        enable_smtp: metadata.smtp_enabled,
306        enable_backup: metadata.backup_enabled,
307        env_overrides: &env_overrides,
308        enabled_groups: &enabled_groups,
309        selected_choices: &selected_choices,
310        registry_name: &metadata.registry,
311        repo_dir: &repo_dir,
312        pre_built_ctx: None,
313        port_in_use: &port_in_use,
314        // ACME mode is only consumed when adding the reverse proxy itself;
315        // upgrade never needs to seed the TLS snippet.
316        acme_mode: None,
317        mode: PlanMode::Upgrade,
318        port_overrides: &port_overrides,
319        // Upgrade preserves the on-disk `.env` via the append-only env_additions
320        // path (it skips this plan's `.env` WriteFile), so it doesn't seed the
321        // merge here; and a re-render never relaxes required-var validation.
322        existing_env_file: None,
323        allow_unset_required: false,
324    })?;
325
326    let mut planned: BTreeMap<PathBuf, String> = BTreeMap::new();
327    for step in &result.steps {
328        if let Step::WriteFile(file) = step {
329            planned.insert(file.path.clone(), file.content.clone());
330        }
331    }
332    Ok(Replanned {
333        result,
334        planned,
335        source_dir,
336        native,
337    })
338}
339
340/// Output of [`replan`]: the re-rendered plan plus the resolved source
341/// location, so callers don't resolve the registry a second time.
342struct Replanned {
343    result: AddResult,
344    planned: BTreeMap<PathBuf, String>,
345    /// The service's source dir (where a native build/run happens).
346    source_dir: PathBuf,
347    /// Whether this is a `runtime = "native"` install.
348    native: bool,
349}
350
351/// Parse the on-disk `.env` for a service into a key→value map. Lines
352/// without `=`, comments, and blanks are skipped. Returns an empty map if
353/// the file is absent — caller decides whether that's a soft error.
354fn read_existing_env_keys(service_name: &str) -> Result<BTreeMap<String, String>> {
355    let env_path = service_home(service_name)?.join(".env");
356    let mut out: BTreeMap<String, String> = BTreeMap::new();
357    let content = match std::fs::read_to_string(&env_path) {
358        Ok(c) => c,
359        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(out),
360        Err(source) => {
361            return Err(Error::FileRead {
362                path: env_path,
363                source,
364            });
365        }
366    };
367    for line in content.lines() {
368        let line = line.trim();
369        if line.is_empty() || line.starts_with('#') {
370            continue;
371        }
372        if let Some((k, v)) = line.split_once('=') {
373            out.insert(k.trim().to_string(), v.to_string());
374        }
375    }
376    Ok(out)
377}
378
379/// Parse `SERVICE_PORT_<NAME>=<port>` lines out of an installed service's
380/// `.env`. Returns a name → port map (lowercased name, matching the
381/// `[[ports]]` definition in service.toml). Also used by the metrics
382/// bridge to resolve host-network scrape targets retroactively.
383pub(crate) fn read_existing_ports(service_name: &str) -> Result<BTreeMap<String, u16>> {
384    let env_path = service_home(service_name)?.join(".env");
385    let mut overrides = BTreeMap::new();
386    let content = match std::fs::read_to_string(&env_path) {
387        Ok(c) => c,
388        // No .env yet means a half-installed service; let the planner
389        // re-allocate. (`add_service` will then surface a richer error if
390        // the install is genuinely broken.)
391        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(overrides),
392        Err(source) => {
393            return Err(Error::FileRead {
394                path: env_path,
395                source,
396            });
397        }
398    };
399    for line in content.lines() {
400        let line = line.trim();
401        if line.is_empty() || line.starts_with('#') {
402            continue;
403        }
404        let Some((key, value)) = line.split_once('=') else {
405            continue;
406        };
407        let Some(name) = key.strip_prefix("SERVICE_PORT_") else {
408            continue;
409        };
410        if let Ok(port) = value.trim().parse::<u16>() {
411            overrides.insert(name.to_ascii_lowercase(), port);
412        }
413    }
414    Ok(overrides)
415}
416
417/// Lockfile-tracked files we never want to flag as drift. The `.env` carries
418/// generated secrets that rotate at runtime; `service.manifest` itself is the
419/// manifest, not a tracked file. Both are excluded from the planned set
420/// during diffing so they don't appear as Removed/Added.
421fn should_skip_path(path: &std::path::Path, manifest_file: &std::path::Path) -> bool {
422    if path == manifest_file {
423        return true;
424    }
425    matches!(path.file_name().and_then(|n| n.to_str()), Some(".env"))
426}
427
428/// Compute the diff between the registry's render and what's on disk for an
429/// installed service.
430pub async fn diff_service(service_name: &str) -> Result<DiffResult> {
431    let Replanned {
432        result,
433        planned,
434        source_dir,
435        native,
436    } = replan(service_name).await?;
437
438    // Native source-staleness rides along with the diff (same resolution, no
439    // second registry lookup): has any source file changed since the running
440    // process started? See the module note above on why this is the signal.
441    let source_stale = native
442        && unit_main_pid(service_name)
443            .and_then(process_start_time)
444            .is_some_and(|started| any_file_newer_than(&source_dir, started));
445
446    let manifest_file = manifest::manifest_path(service_name)?;
447    let (manifest_entries, _manifest_envs) = manifest::load(service_name)?.unwrap_or_default();
448    let manifest_by_path: BTreeMap<PathBuf, String> = manifest_entries
449        .into_iter()
450        .map(|e| (e.path, e.sha256))
451        .collect();
452
453    // Env additions: registry-expected static keys missing from the user's
454    // `.env`. Append-only — we ignore present-but-different values
455    // (could be a manual override) and never propose removals (could be
456    // a key the user added themselves that the registry happens not to
457    // ship). The registry-side list comes from the freshly-rendered
458    // `tracked_envs` (which carries kind + prompt for the CLI), not the
459    // on-disk manifest — that's the source of truth.
460    let existing_env = read_existing_env_keys(service_name)?;
461    let env_additions: Vec<EnvAddition> = result
462        .tracked_envs
463        .iter()
464        .filter(|p| !existing_env.contains_key(&p.key))
465        .map(|p| EnvAddition {
466            key: p.key.clone(),
467            value: p.value.clone(),
468            kind: p.kind.clone(),
469            prompt: p.prompt.clone(),
470        })
471        .collect();
472
473    let mut entries: Vec<DiffEntry> = Vec::new();
474    let mut seen: BTreeSet<PathBuf> = BTreeSet::new();
475
476    // Walk planned files first — Added / Modified / Drift / Unchanged.
477    for (path, content) in &planned {
478        if should_skip_path(path, &manifest_file) {
479            continue;
480        }
481        seen.insert(path.clone());
482        let planned_hash = manifest::hash_bytes(content.as_bytes());
483        let on_disk_hash = if path.exists() {
484            Some(manifest::hash_file(path)?)
485        } else {
486            None
487        };
488        let manifest_hash = manifest_by_path.get(path);
489
490        let kind = match (on_disk_hash.as_deref(), manifest_hash.map(String::as_str)) {
491            // File doesn't exist on disk.
492            (None, Some(_)) | (None, None) => match manifest_hash {
493                Some(_) => DiffKind::Modified, // we wrote it, user deleted it; restore
494                None => DiffKind::Added,       // registry adds it, fresh write
495            },
496            // On-disk content already matches what the registry would render.
497            (Some(d), _) if d == planned_hash => DiffKind::Unchanged,
498            // No manifest entry → can't tell if the user touched it.
499            // Conservative: treat as drift so --force is required once.
500            (Some(_), None) => DiffKind::Drift,
501            // On-disk matches the manifest but not the planned render →
502            // ryra-owned, safe to overwrite.
503            (Some(d), Some(l)) if d == l => DiffKind::Modified,
504            // On-disk matches neither lock nor plan → user hand-edited.
505            (Some(_), Some(_)) => DiffKind::Drift,
506        };
507        entries.push(DiffEntry {
508            path: path.clone(),
509            kind,
510        });
511    }
512
513    // Walk manifest entries that the planner no longer emits — Removed.
514    for path in manifest_by_path.keys() {
515        if seen.contains(path) {
516            continue;
517        }
518        if should_skip_path(path, &manifest_file) {
519            continue;
520        }
521        entries.push(DiffEntry {
522            path: path.clone(),
523            kind: DiffKind::Removed,
524        });
525    }
526
527    entries.sort_by(|a, b| a.path.cmp(&b.path));
528    Ok(DiffResult {
529        service: service_name.to_string(),
530        entries,
531        env_additions,
532        source_stale,
533    })
534}
535
536/// Plan a zero-downtime color swap for a `deploy = "blue-green"` install.
537///
538/// Returns `None` when the service isn't blue/green, so [`upgrade_service`] can
539/// fall through to its normal restart-based flow. Otherwise the plan:
540///   1. re-renders both color quadlets/units + reloads systemd (so the idle
541///      slot picks up any new image tag or config), keeping `.env` untouched;
542///   2. starts the *idle* slot and gates on its health endpoint;
543///   3. repoints the Caddy upstream at the idle slot and reloads gracefully;
544///   4. stops the old slot and flips `active_color` in metadata.
545///
546/// A health-gate timeout aborts before step 3, leaving the old slot live and
547/// routed — a failed deploy is a no-op, never an outage.
548pub async fn blue_green_swap(service_name: &str) -> Result<Option<UpgradeResult>> {
549    if !is_service_installed(service_name) {
550        return Err(Error::ServiceNotInstalled(service_name.to_string()));
551    }
552    let metadata = load_metadata(service_name)?
553        .ok_or_else(|| Error::ServiceNotInstalled(service_name.to_string()))?;
554
555    // Resolve the registry def to read the deploy strategy + health path.
556    let service_ref = service_ref_for(&metadata, service_name);
557    let repo_dir = resolve_registry_dir(&service_ref).await?;
558    let reg = crate::registry::find_service(&repo_dir, service_name)?;
559    let def = &reg.def;
560    if def.service.deploy != DeployStrategy::BlueGreen {
561        return Ok(None);
562    }
563    let health_check = def.service.health_check.clone().ok_or_else(|| {
564        Error::Template(format!(
565            "{service_name}: deploy = \"blue-green\" but no health_check — validation should have caught this"
566        ))
567    })?;
568
569    // Which slot is live, and which we're rolling onto.
570    let live = metadata.active_color.unwrap_or(Color::Blue);
571    let target = live.other();
572
573    // The idle slot's host port, from the install's `.env`
574    // (`SERVICE_PORT_HTTP_GREEN` etc., written by the blue/green add path).
575    let primary_port_name = def
576        .ports
577        .iter()
578        .find(|p| p.name.eq_ignore_ascii_case("http"))
579        .or_else(|| def.ports.first())
580        .map(|p| p.name.clone())
581        .ok_or_else(|| {
582            Error::Template(format!("{service_name}: blue/green needs a routable port"))
583        })?;
584    let existing_ports = read_existing_ports(service_name)?;
585    let target_key = format!("{}_{}", primary_port_name.to_ascii_lowercase(), target);
586    let target_port = existing_ports.get(&target_key).copied().ok_or_else(|| {
587        Error::Template(format!(
588            "{service_name}: missing {} in .env — reinstall to allocate the blue/green port pair",
589            deploy::color_port_var(
590                &format!("SERVICE_PORT_{}", primary_port_name.to_uppercase()),
591                target
592            )
593        ))
594    })?;
595    let health_url = format!("http://127.0.0.1:{target_port}{health_check}");
596
597    // Re-render the install (Upgrade mode): emits both color quadlets/units and
598    // pulls any new image. Keep those file writes + pulls + daemon-reload, but
599    // drop the add path's StartService/StopService (we orchestrate the swap
600    // ourselves), its `.env` write (preserve secrets), and its metadata write
601    // (we flip active_color below instead of resetting it to blue).
602    let replanned = replan(service_name).await?;
603    let env_filename = std::ffi::OsStr::new(".env");
604    let metadata_file = metadata_path(service_name)?;
605    // Never re-sync or rebuild the LIVE slot's working dir — that's the whole
606    // point of the isolation (an in-flight Python/Node process must not have its
607    // source mutated). Drop any SyncDir/Build that targets `colors/<live>`;
608    // keep the idle slot's. (Podman has no such steps — it re-pulls the image,
609    // which is harmless — so this is a native-only filter in practice.)
610    let live_slot = format!("colors/{live}");
611    let touches_live = |p: &std::path::Path| p.to_string_lossy().contains(&live_slot);
612    let mut steps: Vec<Step> = Vec::new();
613    for step in replanned.result.steps {
614        match step {
615            Step::StartService { .. } | Step::StopService { .. } => continue,
616            Step::WriteFile(GeneratedFile { ref path, .. })
617                if path.file_name() == Some(env_filename) || *path == metadata_file =>
618            {
619                continue;
620            }
621            Step::SyncDir { ref dst, .. } if touches_live(dst) => continue,
622            Step::Build { ref dir, .. } if touches_live(dir) => continue,
623            other => steps.push(other),
624        }
625    }
626
627    // Caddy: repoint the upstream at the idle slot. Only when the install has a
628    // routed URL and a Caddyfile exists (loopback installs swap without it).
629    let caddy_rewrite =
630        blue_green_caddy_rewrite(service_name, def, &metadata, target, target_port)?;
631
632    // The runtime-agnostic swap: start idle -> health-gate -> caddy reload ->
633    // stop old. Artifact prep (pull/build) already rode along in `steps` above.
634    steps.extend(deploy::color_swap_steps(deploy::ColorSwap {
635        service_name: service_name.to_string(),
636        live,
637        prepare: None,
638        health_url,
639        health_timeout_secs: def.service.health_timeout_secs(),
640        caddy_rewrite,
641    }));
642
643    // Flip active_color so the next deploy rolls back onto `live`.
644    let mut new_metadata = metadata.clone();
645    new_metadata.active_color = Some(target);
646    steps.push(Step::WriteFile(GeneratedFile {
647        path: metadata_file,
648        content: toml::to_string_pretty(&new_metadata)?,
649    }));
650
651    Ok(Some(UpgradeResult {
652        service: service_name.to_string(),
653        diff: diff_service(service_name).await?,
654        steps,
655        backup_dir: None,
656        planned_files: replanned.planned,
657        // A swap isn't visible as config drift (the new image/build lives behind
658        // the same quadlet), so force the apply just like the native rebuild path.
659        force_apply: true,
660    }))
661}
662
663/// Re-render the Caddy site block pointing at the idle color and splice it into
664/// the existing Caddyfile. `None` when the install has no routed URL or no
665/// Caddyfile on disk (a loopback blue/green install swaps without Caddy).
666fn blue_green_caddy_rewrite(
667    service_name: &str,
668    def: &crate::registry::service_def::ServiceDef,
669    metadata: &Metadata,
670    target: Color,
671    target_port: u16,
672) -> Result<Option<Step>> {
673    let Some(url) = metadata.url.as_deref() else {
674        return Ok(None);
675    };
676    let caddyfile_path = caddy::caddyfile_path()?;
677    let Ok(existing) = std::fs::read_to_string(&caddyfile_path) else {
678        return Ok(None);
679    };
680    let parsed = url::Url::parse(url)
681        .map_err(|e| Error::Template(format!("invalid service URL '{url}': {e}")))?;
682    let domain = parsed
683        .host_str()
684        .ok_or_else(|| Error::Template(format!("service URL '{url}' has no host")))?;
685    let paths = crate::config::ConfigPaths::resolve()?;
686    let config = crate::config::load_or_default(&paths.config_file)?;
687    // Podman slots are containers on Caddy's shared network, reachable by name
688    // (`<svc>-<color>:<container_port>`). Native slots are host processes, so
689    // Caddy reaches them over the host bridge at the color's *host* port.
690    let (target_host, port) = match metadata.runtime {
691        Runtime::Podman => (
692            deploy::color_unit(service_name, target),
693            def.ports.first().map(|p| p.container_port).unwrap_or(80),
694        ),
695        Runtime::Native => ("host.containers.internal".to_string(), target_port),
696    };
697    let block = caddy::render_site_block(&caddy::CaddySiteParams {
698        service_name: service_name.to_string(),
699        target_host,
700        domain: domain.to_string(),
701        container_port: port,
702        https_port: crate::caddy_https_port(&config),
703        force_internal_tls: false,
704    });
705    let updated = caddy::add_route(&existing, service_name, &block);
706    Ok(Some(Step::WriteFile(GeneratedFile {
707        path: caddyfile_path,
708        content: updated,
709    })))
710}
711
712/// Plan an upgrade for an installed service.
713///
714/// Returns the steps to execute and the backup directory where displaced
715/// files will be copied. The backup dir is *also* baked into the steps
716/// (as `Step::CopyFile` entries placed before each `Step::WriteFile`).
717pub async fn upgrade_service(service_name: &str, force: bool) -> Result<UpgradeResult> {
718    // Blue/green services upgrade by a color swap, not an in-place restart, so
719    // they take a different plan entirely. `blue_green_swap` returns None for
720    // restart-strategy installs, falling through to the standard flow below.
721    if let Some(plan) = blue_green_swap(service_name).await? {
722        return Ok(plan);
723    }
724
725    let diff = diff_service(service_name).await?;
726
727    if !force {
728        let drifted = diff.drifted();
729        if !drifted.is_empty() {
730            return Err(Error::HandEditedFiles {
731                service: service_name.to_string(),
732                paths: drifted.iter().map(|e| e.path.clone()).collect(),
733            });
734        }
735    }
736
737    let Replanned {
738        result, planned, ..
739    } = replan(service_name).await?;
740    let manifest_file = manifest::manifest_path(service_name)?;
741    let env_file = service_home(service_name)?.join(".env");
742
743    // Hard-fail if `.env` is missing. Append-only env handling can't
744    // reconstruct generated secrets (mysql_root_password, jwt_key, etc.)
745    // and would silently produce a half-written file that fails on
746    // restart. Surface the real problem instead.
747    if !env_file.exists() {
748        return Err(Error::Template(format!(
749            "{service_name}: `.env` is missing at {} — upgrade can't reconstruct generated secrets. \
750             Restore the file from a backup or reinstall the service.",
751            env_file.display()
752        )));
753    }
754
755    // Decide the backup directory once per upgrade run. Used whenever any
756    // file would be overwritten *or* the existing service.manifest exists (the
757    // lock is always backed up so `ryra revert` can reconstruct the
758    // pre-upgrade state). Empty when neither holds — keeps
759    // `~/.local/state/ryra/` from accumulating no-op dirs.
760    let backup_dir = backup_directory(service_name)?;
761    let needs_backup: BTreeSet<PathBuf> = diff
762        .entries
763        .iter()
764        .filter(|e| {
765            matches!(
766                e.kind,
767                DiffKind::Modified | DiffKind::Drift | DiffKind::Removed
768            )
769        })
770        .map(|e| e.path.clone())
771        .collect();
772    let manifest_will_be_backed_up = manifest_file.exists();
773    let backup_used = !needs_backup.is_empty() || manifest_will_be_backed_up;
774
775    // Filter the planned step list down to what an upgrade should actually do.
776    // - WriteFile for `.env` is dropped (preserve secrets).
777    // - PullImage stays (idempotent if cached, fetches new tag if registry bumped).
778    // - StartService is replaced with RestartService at the very end.
779    // - CreateDir / Symlink stay (idempotent and may be needed for new files).
780    // - DaemonReload stays.
781    // - CopyFile stays (vendored binaries; rare to upgrade but handled the same).
782    // - TailscaleSetup / TailscaleEnable were already gated out by PlanMode::Upgrade.
783    let mut steps: Vec<Step> = Vec::new();
784    if backup_used {
785        steps.push(Step::CreateDir(backup_dir.clone()));
786    }
787    let unchanged: BTreeSet<PathBuf> = diff
788        .entries
789        .iter()
790        .filter(|e| matches!(e.kind, DiffKind::Unchanged))
791        .map(|e| e.path.clone())
792        .collect();
793
794    let env_filename = std::ffi::OsStr::new(".env");
795    for step in result.steps {
796        match step {
797            // .env stays untouched on upgrade — generated secrets in the
798            // running service must not be regenerated.
799            Step::WriteFile(GeneratedFile { ref path, .. })
800                if path.file_name() == Some(env_filename) =>
801            {
802                continue;
803            }
804            // Identical content already on disk — skip the write entirely
805            // so the file's mtime stays put and `sha256sum -c` stays clean
806            // for unchanged entries.
807            Step::WriteFile(GeneratedFile { ref path, .. }) if unchanged.contains(path) => {
808                // The manifest is special: even if "unchanged" by content, we
809                // re-emit it because path-level adds/removes mean its content
810                // has changed and we need the new hashes recorded.
811                if path == &manifest_file {
812                    steps.push(step);
813                }
814                continue;
815            }
816            Step::WriteFile(ref file) => {
817                // Always back up the existing service.manifest too, even though
818                // it's filtered out of the diff. `ryra revert` reads the
819                // backed-up lock to know which files were Added during the
820                // upgrade (current lock − pre-upgrade lock) so it can delete
821                // them on revert. Without this, revert would leave
822                // upgrade-added files orphaned.
823                let should_backup = (needs_backup.contains(&file.path)
824                    || file.path == manifest_file)
825                    && file.path.exists();
826                if should_backup {
827                    let rel = backup_relpath(&file.path);
828                    let dst = backup_dir.join(rel);
829                    if let Some(parent) = dst.parent() {
830                        steps.push(Step::CreateDir(parent.to_path_buf()));
831                    }
832                    steps.push(Step::CopyFile {
833                        src: file.path.clone(),
834                        dst,
835                    });
836                }
837                steps.push(step);
838            }
839            // The replanned step list always ends with StartService; we
840            // strip it and append a RestartService at the very end so the
841            // unit picks up the new quadlet.
842            Step::StartService { .. } => continue,
843            other => steps.push(other),
844        }
845    }
846
847    // Removed files: back them up then delete.
848    for entry in &diff.entries {
849        if !matches!(entry.kind, DiffKind::Removed) {
850            continue;
851        }
852        if entry.path.exists() {
853            let rel = backup_relpath(&entry.path);
854            let dst = backup_dir.join(rel);
855            if let Some(parent) = dst.parent() {
856                steps.push(Step::CreateDir(parent.to_path_buf()));
857            }
858            steps.push(Step::CopyFile {
859                src: entry.path.clone(),
860                dst,
861            });
862        }
863        steps.push(Step::RemoveFile(entry.path.clone()));
864    }
865
866    // Env additions: append registry-required static env vars that the
867    // user's .env doesn't have. Append-only — we never rewrite the
868    // existing .env (that would clobber rotated secrets and any manual
869    // edits) and we never remove keys (the user might have added their
870    // own that the registry happens not to ship). The .env is
871    // intentionally NOT backed up: it only ever gains lines and the
872    // pre-existing content survives unchanged.
873    if !diff.env_additions.is_empty() {
874        let mut content = match std::fs::read_to_string(&env_file) {
875            Ok(c) => c,
876            // Service installed but .env missing? Treat the add as a
877            // fresh write — odd state, but the right one to recover to.
878            Err(e) if e.kind() == std::io::ErrorKind::NotFound => String::new(),
879            Err(source) => {
880                return Err(Error::FileRead {
881                    path: env_file.clone(),
882                    source,
883                });
884            }
885        };
886        if !content.is_empty() && !content.ends_with('\n') {
887            content.push('\n');
888        }
889        for add in &diff.env_additions {
890            content.push_str(&format!("{}={}\n", add.key, add.value));
891        }
892        steps.push(Step::WriteFile(GeneratedFile {
893            path: env_file,
894            content,
895        }));
896    }
897
898    // Pick up the new quadlet by restarting. RestartService is enough to
899    // re-read the env file, re-run ExecStartPre/Post, and pull in any new
900    // ExecStartPost script (the seafile case).
901    steps.push(Step::RestartService {
902        unit: service_name.to_string(),
903    });
904
905    // Native services rebuild from source on upgrade (the `Build` step) and
906    // restart. A source change leaves the rendered config clean, so force the
907    // apply; otherwise the CLI would short-circuit on the clean diff and never
908    // rebuild. The plan already ends in RestartService.
909    let force_apply = matches!(
910        crate::metadata::load_metadata(service_name),
911        Ok(Some(m)) if m.runtime == crate::registry::service_def::Runtime::Native
912    );
913
914    Ok(UpgradeResult {
915        service: service_name.to_string(),
916        diff,
917        steps,
918        backup_dir: if backup_used { Some(backup_dir) } else { None },
919        // The replanned env content is irrelevant for upgrade (we don't
920        // write it), but expose the template-render context bag in case
921        // future callers need it. Keep it empty for now to avoid
922        // confusing consumers.
923        planned_files: planned,
924        force_apply,
925    })
926}
927
928pub struct UpgradeResult {
929    pub service: String,
930    pub diff: DiffResult,
931    pub steps: Vec<Step>,
932    /// `None` when no files would be overwritten or removed.
933    pub backup_dir: Option<PathBuf>,
934    pub planned_files: BTreeMap<PathBuf, String>,
935    /// Apply even when the config diff is clean. True for native services: a
936    /// source rebuild isn't visible in the rendered config, so the plan must
937    /// still run (the `SyncBinary` step then no-ops if the binary is unchanged).
938    pub force_apply: bool,
939}
940
941/// One available backup snapshot for a service.
942#[derive(Debug, Clone)]
943pub struct BackupSnapshot {
944    /// Filesystem path: `~/.local/state/ryra/backups/<timestamp>/<service>/`.
945    pub path: PathBuf,
946    /// `YYYY-MM-DDTHH-MM-SSZ` timestamp from the parent dir name.
947    pub timestamp: String,
948}
949
950pub struct RevertResult {
951    pub service: String,
952    pub snapshot: BackupSnapshot,
953    pub steps: Vec<Step>,
954    /// Files to be copied from backup back to their original locations.
955    pub files_to_restore: Vec<PathBuf>,
956    /// Files added by the upgrade that didn't exist before — will be
957    /// removed by revert. Empty when the snapshot pre-dates the manifest
958    /// feature (we can't reconstruct what was added without it).
959    pub files_to_delete: Vec<PathBuf>,
960}
961
962/// List every backup snapshot for a service, newest first. Empty result
963/// means there's nothing to revert from.
964/// How many backup snapshots `ryra upgrade` retains per service before
965/// auto-pruning. Each snapshot is small (~tens of KB — config files +
966/// the manifest) so the cap is more about mental clutter than disk; 5
967/// is enough to revert a few iterations back without filling the
968/// `~/.local/state/ryra/backups/` tree with dead snapshots from years
969/// of upgrades.
970pub const DEFAULT_BACKUP_KEEP: usize = 5;
971
972/// Drop snapshots older than the most recent `keep` for this service.
973/// Returns the paths that were removed (newest-first within the
974/// removed set; the kept set keeps the same order). The shared
975/// timestamp dir is also removed when this was the last service-
976/// scoped subdir under it (multi-service upgrade runs share a
977/// timestamp dir; we don't want to nuke other services' state).
978pub fn prune_backups(service_name: &str, keep: usize) -> Result<Vec<PathBuf>> {
979    let backups_root = state_dir()?.join("backups");
980    prune_backups_in(&backups_root, service_name, keep)
981}
982
983/// Pure inner that operates on an explicit `<state>/backups/` root.
984/// Split out so tests can drive it against a tmp tree without touching
985/// the real XDG state dir.
986fn prune_backups_in(
987    backups_root: &std::path::Path,
988    service_name: &str,
989    keep: usize,
990) -> Result<Vec<PathBuf>> {
991    let snapshots = list_backups_in(backups_root, service_name)?;
992    if snapshots.len() <= keep {
993        return Ok(Vec::new());
994    }
995    let mut removed: Vec<PathBuf> = Vec::new();
996    for snap in snapshots.into_iter().skip(keep) {
997        if let Err(e) = std::fs::remove_dir_all(&snap.path) {
998            eprintln!(
999                "warning: failed to prune backup {}: {e}",
1000                snap.path.display()
1001            );
1002            continue;
1003        }
1004        removed.push(snap.path.clone());
1005        if let Some(parent) = snap.path.parent()
1006            && let Ok(mut entries) = std::fs::read_dir(parent)
1007            && entries.next().is_none()
1008        {
1009            let _ = std::fs::remove_dir(parent);
1010        }
1011    }
1012    Ok(removed)
1013}
1014
1015pub fn list_backups(service_name: &str) -> Result<Vec<BackupSnapshot>> {
1016    let backups_root = state_dir()?.join("backups");
1017    list_backups_in(&backups_root, service_name)
1018}
1019
1020fn list_backups_in(
1021    backups_root: &std::path::Path,
1022    service_name: &str,
1023) -> Result<Vec<BackupSnapshot>> {
1024    if !backups_root.is_dir() {
1025        return Ok(Vec::new());
1026    }
1027    let mut snapshots: Vec<BackupSnapshot> = Vec::new();
1028    let entries = std::fs::read_dir(backups_root).map_err(|source| Error::FileRead {
1029        path: backups_root.to_path_buf(),
1030        source,
1031    })?;
1032    for entry in entries.flatten() {
1033        let stamp_dir = entry.path();
1034        if !stamp_dir.is_dir() {
1035            continue;
1036        }
1037        let svc_dir = stamp_dir.join(service_name);
1038        if !svc_dir.is_dir() {
1039            continue;
1040        }
1041        let Some(stamp) = stamp_dir.file_name().and_then(|n| n.to_str()) else {
1042            continue;
1043        };
1044        snapshots.push(BackupSnapshot {
1045            path: svc_dir,
1046            timestamp: stamp.to_string(),
1047        });
1048    }
1049    // Newest first: timestamp is `YYYY-MM-DDTHH-MM-SSZ`, lexical-descending == reverse-chronological.
1050    snapshots.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
1051    Ok(snapshots)
1052}
1053
1054/// Plan a revert for an installed service.
1055///
1056/// `at` selects a specific backup timestamp; `None` picks the most recent.
1057/// The returned plan: restore every file from the backup tree to its
1058/// original location, delete files added by the upgrade, daemon-reload,
1059/// restart the unit.
1060pub fn revert_service(service_name: &str, at: Option<&str>) -> Result<RevertResult> {
1061    if !is_service_installed(service_name) {
1062        return Err(Error::ServiceNotInstalled(service_name.to_string()));
1063    }
1064    let snapshot = pick_snapshot(service_name, at)?;
1065
1066    // Files to restore: walk the backup tree and reconstruct the original
1067    // absolute path for each one. The backup mirrors absolute paths under
1068    // `<snapshot>/<original-path-without-leading-slash>`, so the inverse is
1069    // simply prefixing `/` to each path-relative-to-snapshot.
1070    let mut files_to_restore: Vec<PathBuf> = Vec::new();
1071    walk_backup_files(&snapshot.path, &mut files_to_restore)?;
1072
1073    // Files to delete: anything in the *current* lock that isn't in the
1074    // *backed-up* lock was added by the upgrade and should disappear on
1075    // revert. If either lock is absent, leave the delete set empty —
1076    // safest no-op for snapshots that pre-date this feature.
1077    let backup_manifest_file =
1078        absolute_to_backup_path(&snapshot.path, &manifest::manifest_path(service_name)?);
1079    let (backup_manifest_entries, _) = read_manifest_at(&backup_manifest_file)?;
1080    let (current_manifest_entries, _) = manifest::load(service_name)?.unwrap_or_default();
1081
1082    let backup_manifest_set: BTreeSet<PathBuf> = backup_manifest_entries
1083        .iter()
1084        .map(|e| e.path.clone())
1085        .collect();
1086    let mut files_to_delete: Vec<PathBuf> = if backup_manifest_entries.is_empty() {
1087        // Pre-feature snapshot: no way to know what was added.
1088        Vec::new()
1089    } else {
1090        current_manifest_entries
1091            .iter()
1092            .map(|e| e.path.clone())
1093            .filter(|p| !backup_manifest_set.contains(p))
1094            .collect()
1095    };
1096    files_to_delete.sort();
1097
1098    // Build the step list.
1099    let mut steps: Vec<Step> = Vec::new();
1100    // Restore: backup → original. CopyFile creates parents itself, so no
1101    // CreateDir needed.
1102    for backup_path in &files_to_restore {
1103        let original = backup_to_absolute_path(&snapshot.path, backup_path);
1104        steps.push(Step::CopyFile {
1105            src: backup_path.clone(),
1106            dst: original,
1107        });
1108    }
1109    // Delete: each Added file, plus any orphan symlink in the quadlet dir
1110    // that pointed at it (only the actual file is in the lock; the
1111    // companion symlink in `~/.config/containers/systemd/` is not).
1112    let qd = crate::quadlet_dir()?;
1113    for path in &files_to_delete {
1114        if path.exists() {
1115            steps.push(Step::RemoveFile(path.clone()));
1116        }
1117        if let Some(name) = path.file_name() {
1118            let symlink = qd.join(name);
1119            if std::fs::symlink_metadata(&symlink).is_ok() {
1120                steps.push(Step::RemoveFile(symlink));
1121            }
1122        }
1123    }
1124    steps.push(Step::DaemonReload);
1125    steps.push(Step::RestartService {
1126        unit: service_name.to_string(),
1127    });
1128
1129    let files_to_restore_orig: Vec<PathBuf> = files_to_restore
1130        .iter()
1131        .map(|p| backup_to_absolute_path(&snapshot.path, p))
1132        .collect();
1133    Ok(RevertResult {
1134        service: service_name.to_string(),
1135        snapshot,
1136        steps,
1137        files_to_restore: files_to_restore_orig,
1138        files_to_delete,
1139    })
1140}
1141
1142/// Resolve the snapshot to revert to. `at` is a timestamp string (e.g.
1143/// `2026-05-05T13-33-50Z`); when absent, the most recent snapshot wins.
1144fn pick_snapshot(service_name: &str, at: Option<&str>) -> Result<BackupSnapshot> {
1145    let snapshots = list_backups(service_name)?;
1146    if snapshots.is_empty() {
1147        return Err(Error::NoBackup(service_name.to_string()));
1148    }
1149    match at {
1150        None => Ok(snapshots
1151            .into_iter()
1152            .next()
1153            .expect("non-empty checked above")),
1154        Some(stamp) => snapshots
1155            .into_iter()
1156            .find(|s| s.timestamp == stamp)
1157            .ok_or_else(|| Error::BackupNotFound {
1158                service: service_name.to_string(),
1159                stamp: stamp.to_string(),
1160            }),
1161    }
1162}
1163
1164/// Recursively collect every regular file under `root` into `out`. Symlinks
1165/// are followed; we don't expect any in a backup tree (we always copied
1166/// targets, never link entries).
1167fn walk_backup_files(root: &std::path::Path, out: &mut Vec<PathBuf>) -> Result<()> {
1168    let entries = std::fs::read_dir(root).map_err(|source| Error::FileRead {
1169        path: root.to_path_buf(),
1170        source,
1171    })?;
1172    for entry in entries.flatten() {
1173        let path = entry.path();
1174        let meta = match entry.metadata() {
1175            Ok(m) => m,
1176            Err(_) => continue,
1177        };
1178        if meta.is_dir() {
1179            walk_backup_files(&path, out)?;
1180        } else if meta.is_file() {
1181            out.push(path);
1182        }
1183    }
1184    Ok(())
1185}
1186
1187/// Inverse of `backup_relpath`: a backup path `<root>/home/user/foo`
1188/// maps back to `/home/user/foo`.
1189fn backup_to_absolute_path(root: &std::path::Path, backup: &std::path::Path) -> PathBuf {
1190    let rel = backup.strip_prefix(root).unwrap_or(backup);
1191    PathBuf::from("/").join(rel)
1192}
1193
1194/// Forward variant: `<root>` + `/home/user/foo` → `<root>/home/user/foo`.
1195fn absolute_to_backup_path(root: &std::path::Path, abs: &std::path::Path) -> PathBuf {
1196    let rel = abs.to_string_lossy();
1197    let stripped = rel.trim_start_matches('/');
1198    root.join(stripped)
1199}
1200
1201/// Read a manifest at the given path. Missing-file is treated as an empty
1202/// list — pre-feature backups simply have no lock to reference.
1203fn read_manifest_at(
1204    path: &std::path::Path,
1205) -> Result<(Vec<manifest::ManifestEntry>, Vec<manifest::EnvEntry>)> {
1206    if !path.exists() {
1207        return Ok((Vec::new(), Vec::new()));
1208    }
1209    let content = std::fs::read_to_string(path).map_err(|source| Error::FileRead {
1210        path: path.to_path_buf(),
1211        source,
1212    })?;
1213    manifest::parse(&content)
1214}
1215
1216/// `~/.local/state/ryra/backups/<timestamp>/<service>/`. Timestamp uses an
1217/// ISO-8601-ish form that sorts lexically (no colons — Windows-friendly,
1218/// not that it matters today, but the cost is zero).
1219fn backup_directory(service_name: &str) -> Result<PathBuf> {
1220    let state = state_dir()?;
1221    let now = std::time::SystemTime::now()
1222        .duration_since(std::time::UNIX_EPOCH)
1223        .map_err(|e| Error::Template(format!("system clock before UNIX epoch: {e}")))?
1224        .as_secs();
1225    let stamp = format_timestamp(now);
1226    Ok(state.join("backups").join(stamp).join(service_name))
1227}
1228
1229/// XDG state dir under `ryra/`. Created on demand by the CreateDir step.
1230fn state_dir() -> Result<PathBuf> {
1231    let base = dirs::state_dir()
1232        .or_else(|| dirs::home_dir().map(|h| h.join(".local").join("state")))
1233        .ok_or(Error::HomeDirNotFound)?;
1234    Ok(base.join("ryra"))
1235}
1236
1237/// Format a UNIX epoch into `YYYY-MM-DDTHH-MM-SSZ`. Avoids the chrono
1238/// dependency — we just need stable lexical sort.
1239fn format_timestamp(secs: u64) -> String {
1240    // Days from 1970-01-01.
1241    const SECS_PER_DAY: u64 = 86_400;
1242    let days = secs / SECS_PER_DAY;
1243    let time_of_day = secs % SECS_PER_DAY;
1244    let h = time_of_day / 3600;
1245    let m = (time_of_day % 3600) / 60;
1246    let s = time_of_day % 60;
1247    let (y, mo, d) = ymd_from_days(days);
1248    format!("{y:04}-{mo:02}-{d:02}T{h:02}-{m:02}-{s:02}Z")
1249}
1250
1251/// Convert "days since 1970-01-01" into `(year, month, day)` using the
1252/// civil-from-days algorithm (Howard Hinnant's date library, MIT). Self-
1253/// contained so we don't add a chrono/time dep just for backup naming.
1254fn ymd_from_days(days: u64) -> (i64, u32, u32) {
1255    let z = days as i64 + 719_468;
1256    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
1257    let doe = (z - era * 146_097) as u64;
1258    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
1259    let y = yoe as i64 + era * 400;
1260    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
1261    let mp = (5 * doy + 2) / 153;
1262    let d = (doy - (153 * mp + 2) / 5 + 1) as u32;
1263    let m = if mp < 10 { mp + 3 } else { mp - 9 } as u32;
1264    let y = if m <= 2 { y + 1 } else { y };
1265    (y, m, d)
1266}
1267
1268/// Map an absolute path into the backup tree. We strip the leading `/` so the
1269/// joined path doesn't escape the backup dir; everything else is preserved
1270/// verbatim so the user can `diff -r` across the original location.
1271fn backup_relpath(path: &std::path::Path) -> PathBuf {
1272    PathBuf::from(path.to_string_lossy().trim_start_matches('/'))
1273}
1274
1275#[cfg(test)]
1276mod tests {
1277    use super::*;
1278
1279    #[test]
1280    fn timestamp_round_numbers() {
1281        // 2026-01-01T00-00-00Z — sanity check on the calendar conversion.
1282        // 1767225600 = days from epoch * 86400 for 2026-01-01.
1283        // (epoch 0 = 1970-01-01; 56 years incl. leap days = 20454 days.)
1284        // Easier: just verify a known value end-to-end.
1285        let s = format_timestamp(0);
1286        assert_eq!(s, "1970-01-01T00-00-00Z");
1287        let s = format_timestamp(86_400);
1288        assert_eq!(s, "1970-01-02T00-00-00Z");
1289        let s = format_timestamp(31_536_000); // not a leap year (1970)
1290        assert_eq!(s, "1971-01-01T00-00-00Z");
1291    }
1292
1293    #[test]
1294    fn backup_relpath_strips_leading_slash() {
1295        let p = backup_relpath(std::path::Path::new("/home/user/foo/bar"));
1296        assert_eq!(p, PathBuf::from("home/user/foo/bar"));
1297    }
1298
1299    /// Stand up a tmp backups tree with the given timestamps and a
1300    /// service subdir under each, then run `prune_backups_in` against it.
1301    /// Returns (kept timestamps newest-first, removed paths). Hermetic:
1302    /// no env vars touched, no shared global state.
1303    fn setup_and_prune(stamps: &[&str], keep: usize) -> (Vec<String>, Vec<PathBuf>) {
1304        let tmp = std::env::temp_dir().join(format!(
1305            "ryra-prune-test-{}-{}",
1306            std::process::id(),
1307            std::time::SystemTime::now()
1308                .duration_since(std::time::UNIX_EPOCH)
1309                .unwrap()
1310                .as_nanos()
1311        ));
1312        let backups_root = tmp.join("backups");
1313        for s in stamps {
1314            std::fs::create_dir_all(backups_root.join(s).join("svc")).unwrap();
1315        }
1316        let removed = prune_backups_in(&backups_root, "svc", keep).unwrap();
1317        let mut kept: Vec<String> = std::fs::read_dir(&backups_root)
1318            .unwrap()
1319            .filter_map(|e| e.ok())
1320            .filter_map(|e| e.file_name().into_string().ok())
1321            .collect();
1322        kept.sort();
1323        kept.reverse();
1324        let _ = std::fs::remove_dir_all(&tmp);
1325        (kept, removed)
1326    }
1327
1328    #[test]
1329    fn prune_keeps_newest_n() {
1330        // Five timestamps, keep=3 — the two oldest (lex-smallest) should go.
1331        let (kept, removed) = setup_and_prune(
1332            &[
1333                "2026-01-01T00-00-00Z",
1334                "2026-02-01T00-00-00Z",
1335                "2026-03-01T00-00-00Z",
1336                "2026-04-01T00-00-00Z",
1337                "2026-05-01T00-00-00Z",
1338            ],
1339            3,
1340        );
1341        assert_eq!(kept.len(), 3);
1342        assert_eq!(kept[0], "2026-05-01T00-00-00Z");
1343        assert_eq!(kept[2], "2026-03-01T00-00-00Z");
1344        assert_eq!(removed.len(), 2);
1345    }
1346
1347    #[test]
1348    fn prune_no_op_when_under_keep() {
1349        let (kept, removed) = setup_and_prune(&["2026-01-01T00-00-00Z", "2026-02-01T00-00-00Z"], 5);
1350        assert_eq!(kept.len(), 2);
1351        assert!(removed.is_empty());
1352    }
1353
1354    fn unique_tmp(prefix: &str) -> PathBuf {
1355        std::env::temp_dir().join(format!(
1356            "{prefix}-{}-{}",
1357            std::process::id(),
1358            std::time::SystemTime::now()
1359                .duration_since(std::time::UNIX_EPOCH)
1360                .unwrap()
1361                .as_nanos()
1362        ))
1363    }
1364
1365    #[test]
1366    fn source_staleness_ignores_build_and_dotdirs() {
1367        use std::time::Duration;
1368
1369        let tmp = unique_tmp("ryra-stale");
1370        std::fs::create_dir_all(tmp.join("src")).unwrap();
1371        std::fs::create_dir_all(tmp.join("target")).unwrap();
1372        std::fs::create_dir_all(tmp.join(".git")).unwrap();
1373        std::fs::write(tmp.join("src/main.rs"), "fn main(){}").unwrap();
1374        std::fs::write(tmp.join("target/app"), "bin").unwrap();
1375        std::fs::write(tmp.join(".git/HEAD"), "ref").unwrap();
1376
1377        // Baseline after everything we wrote: nothing is newer.
1378        assert!(!any_file_newer_than(
1379            &tmp,
1380            SystemTime::now() + Duration::from_secs(3600)
1381        ));
1382        // Baseline before everything: the source file trips staleness.
1383        assert!(any_file_newer_than(
1384            &tmp,
1385            SystemTime::now() - Duration::from_secs(3600)
1386        ));
1387
1388        // When only ignored dirs hold newer files, staleness stays false.
1389        let ignored_only = unique_tmp("ryra-stale-ign");
1390        std::fs::create_dir_all(ignored_only.join("node_modules")).unwrap();
1391        std::fs::write(ignored_only.join("node_modules/x.js"), "x").unwrap();
1392        assert!(!any_file_newer_than(
1393            &ignored_only,
1394            SystemTime::now() - Duration::from_secs(3600)
1395        ));
1396
1397        let _ = std::fs::remove_dir_all(&tmp);
1398        let _ = std::fs::remove_dir_all(&ignored_only);
1399    }
1400
1401    #[test]
1402    fn should_skip_path_excludes_env_and_manifest() {
1403        let lock = PathBuf::from("/svc/service.manifest");
1404        assert!(should_skip_path(&PathBuf::from("/svc/.env"), &lock));
1405        assert!(should_skip_path(&lock, &lock));
1406        assert!(!should_skip_path(
1407            &PathBuf::from("/svc/configs/x.sh"),
1408            &lock
1409        ));
1410    }
1411}