Skip to main content

ryra_core/
upgrade.rs

1//! Diff and upgrade flows for already-installed services.
2//!
3//! "Upgrade" means: re-render an installed service's quadlet + configs
4//! against the current registry, replace any files whose content changed,
5//! and restart the unit. The render path is shared with `add_service`
6//! (driven via [`PlanMode::Upgrade`]); the side-effect steps differ.
7//!
8//! Drift detection is grounded in `service.manifest` — the per-install render
9//! manifest written by `ryra add`. Each tracked file is in one of these
10//! states:
11//!
12//! - **Unchanged**: on-disk content matches what the registry would render.
13//! - **Modified**: registry rendered output differs, but on-disk hash still
14//!   matches the manifest, so we know the file is ours and can be safely
15//!   overwritten.
16//! - **Drift**: on-disk hash matches *neither* the manifest nor the planned
17//!   content — i.e. the user hand-edited it. Refused without `--force`.
18//! - **Added**: file is in the planned set but not in the manifest (registry
19//!   added it).
20//! - **Removed**: file is in the manifest but not in the planned set (registry
21//!   stopped shipping it).
22//!
23//! `.env` is excluded throughout: it carries generated secrets that legitimately
24//! drift across restarts, and re-rendering it on upgrade would clobber rotated
25//! credentials. Its absence from the manifest is the source of truth for that.
26
27use std::collections::{BTreeMap, BTreeSet};
28use std::path::{Path, PathBuf};
29use std::time::SystemTime;
30
31use crate::error::{Error, Result};
32use crate::exposure::Exposure;
33use crate::generate::GeneratedFile;
34use crate::manifest;
35use crate::metadata::{Metadata, load_metadata};
36use crate::registry::resolve::ServiceRef;
37use crate::registry::service_def::{Color, DeployStrategy, Runtime};
38use crate::{
39    AddResult, PlanMode, REGISTRY_DEFAULT, Step, add_service, caddy, deploy, is_service_installed,
40    paths::metadata_path, resolve_registry_dir, service_home,
41};
42
43// --- Native source-staleness ("a rebuild would pick up new code") ----------
44//
45// Config drift is detected by `diff_service` (above). But a `runtime =
46// "native"` service can change *without* its rendered config changing: you
47// edit the source and a `cargo build` / `bun install` / restart would ship it.
48// `service.toml` is unchanged, so the diff is clean and the service still looks
49// up to date. This module fills that gap with a language-agnostic signal: did
50// any source file change since the running process last started?
51//
52// The signal is the running process's own start time (no state is written
53// anywhere): we ask systemd for the unit's MainPID and read its start time from
54// `/proc/<pid>/stat`, then flag staleness when any source file is newer. That
55// works for *anything* systemd can run (bash, Python, Node, Rust, C++, ...) --
56// we never inspect a toolchain or look for a "binary". It's a *hint*, not a
57// gate: the remedy is always an idempotent `ryra upgrade`, and the comparison
58// is read-only, so a false positive just costs a needless rebuild.
59
60/// Directory names never treated as source inputs: VCS metadata and the usual
61/// build-output / dependency dirs across ecosystems, plus any dotdir (`.git`,
62/// editor/tool state). Best-effort and language-agnostic -- staleness is a
63/// hint, so a missed exclusion at worst shows a spurious "upgrade available"
64/// that an idempotent `ryra upgrade` clears.
65const IGNORED_DIRS: &[&str] = &[
66    "target",
67    "node_modules",
68    "dist",
69    "build",
70    "out",
71    "vendor",
72    "__pycache__",
73    "venv",
74];
75
76/// True if any regular file under `dir` (skipping [`IGNORED_DIRS`] and dotdirs)
77/// was modified after `since`. Stops at the first newer file; symlinks are not
78/// followed. Unreadable dirs/files are skipped (a hint, not a hard check).
79fn any_file_newer_than(dir: &Path, since: SystemTime) -> bool {
80    let Ok(entries) = std::fs::read_dir(dir) else {
81        return false;
82    };
83    for entry in entries.flatten() {
84        let Ok(file_type) = entry.file_type() else {
85            continue;
86        };
87        let path = entry.path();
88        if file_type.is_dir() {
89            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
90            if name.starts_with('.') || IGNORED_DIRS.contains(&name) {
91                continue;
92            }
93            if any_file_newer_than(&path, since) {
94                return true;
95            }
96        } else if file_type.is_file()
97            && let Ok(mtime) = entry.metadata().and_then(|m| m.modified())
98            && mtime > since
99        {
100            return true;
101        }
102    }
103    false
104}
105
106/// Rebuild the `ServiceRef` we stashed at install time (mirrors `replan`), so
107/// the source dir can be resolved the same way an upgrade would.
108fn service_ref_for(metadata: &Metadata, service_name: &str) -> ServiceRef {
109    if metadata.registry.is_empty() || metadata.registry == REGISTRY_DEFAULT {
110        ServiceRef::Default(service_name.to_string())
111    } else if crate::registry::resolve::is_path_like(&metadata.registry) {
112        ServiceRef::Path {
113            dir: PathBuf::from(&metadata.registry),
114            name: service_name.to_string(),
115        }
116    } else {
117        ServiceRef::Custom {
118            registry: metadata.registry.clone(),
119            service: service_name.to_string(),
120        }
121    }
122}
123
124/// The unit's MainPID per systemd, or `None` when the service is stopped
125/// (MainPID 0) or systemd can't be queried.
126fn unit_main_pid(service_name: &str) -> Option<u32> {
127    let out = std::process::Command::new("systemctl")
128        .args([
129            "--user",
130            "show",
131            &format!("{service_name}.service"),
132            "-p",
133            "MainPID",
134            "--value",
135        ])
136        .output()
137        .ok()?;
138    if !out.status.success() {
139        return None;
140    }
141    let pid: u32 = String::from_utf8_lossy(&out.stdout).trim().parse().ok()?;
142    (pid != 0).then_some(pid)
143}
144
145/// Wall-clock start time of `pid`, from `/proc/<pid>/stat` field 22 (starttime,
146/// in clock ticks since boot) plus `/proc/stat`'s `btime` (boot epoch). `None`
147/// if the process is gone or `/proc` can't be read.
148fn process_start_time(pid: u32) -> Option<SystemTime> {
149    // USER_HZ: the kernel's /proc clock-tick rate. Fixed at 100 on every
150    // mainstream Linux (the value is baked into the ABI, not the runtime CPU
151    // tick), so hardcoding it avoids a libc/sysconf dependency.
152    const USER_HZ: u64 = 100;
153
154    let stat = std::fs::read_to_string(format!("/proc/{pid}/stat")).ok()?;
155    // comm (field 2) is parenthesised and may itself contain spaces or `)`, so
156    // the numeric fields resume only after the LAST `)`. field 3 (state) is the
157    // first token there, making starttime (field 22) the 20th -> index 19.
158    let after_comm = stat.rsplit_once(')')?.1;
159    let starttime_ticks: u64 = after_comm.split_whitespace().nth(19)?.parse().ok()?;
160
161    let proc_stat = std::fs::read_to_string("/proc/stat").ok()?;
162    let btime: u64 = proc_stat
163        .lines()
164        .find_map(|l| l.strip_prefix("btime ")?.trim().parse().ok())?;
165
166    Some(std::time::UNIX_EPOCH + std::time::Duration::from_secs(btime + starttime_ticks / USER_HZ))
167}
168
169/// Per-file diff classification.
170#[derive(Debug, Clone, PartialEq, Eq)]
171pub enum DiffKind {
172    /// On-disk content matches the planned render. Nothing to do.
173    Unchanged,
174    /// Registry now renders different content. On-disk hash still matches
175    /// the manifest, so the file is ryra-owned and safe to overwrite.
176    Modified,
177    /// On-disk hash differs from both the manifest and the planned render —
178    /// the user hand-edited this file. Upgrade refuses without `--force`.
179    /// Includes the case where there is no manifest entry to compare against
180    /// (service installed before the manifest feature; treated conservatively
181    /// as drift until the user confirms with `--force`).
182    Drift,
183    /// File is in the planned render but absent from the manifest — registry
184    /// added it.
185    Added,
186    /// File is in the manifest but no longer rendered by the registry —
187    /// registry stopped shipping it. Upgrade deletes it.
188    Removed,
189}
190
191#[derive(Debug, Clone)]
192pub struct DiffEntry {
193    pub path: PathBuf,
194    pub kind: DiffKind,
195}
196
197/// One env var the registry expects in `.env` that the user's `.env`
198/// doesn't have. By design env tracking is *append-only* — we never flag
199/// a present-but-different value as drift, and we never propose
200/// removing a key. Users may have manually edited values or added their
201/// own keys; clobbering those would be the larger harm.
202///
203/// `kind` and `prompt` come straight from the registry's `EnvVar`
204/// definition, so the CLI can route Prompted / Required additions
205/// through the same interactive prompt that `ryra add` uses, while
206/// silently appending Default ones.
207#[derive(Debug, Clone)]
208pub struct EnvAddition {
209    pub key: String,
210    pub value: String,
211    pub kind: crate::registry::service_def::EnvKind,
212    pub prompt: Option<String>,
213}
214
215/// Result of comparing the registry's render to what's on disk.
216#[derive(Debug, Clone)]
217pub struct DiffResult {
218    pub service: String,
219    pub entries: Vec<DiffEntry>,
220    /// Static env vars the registry expects but the user's `.env` is
221    /// missing. Empty when the `.env` already covers everything tracked.
222    pub env_additions: Vec<EnvAddition>,
223    /// `runtime = "native"` only: the source changed since the running process
224    /// started, so a rebuild/restart would ship new code even though the
225    /// rendered config is unchanged. Always `false` for podman services and
226    /// stopped natives. Orthogonal to [`Self::is_clean`] (which is config-only)
227    /// -- a service is upgradable when the diff is dirty *or* this is set.
228    pub source_stale: bool,
229}
230
231impl DiffResult {
232    /// True when nothing about the install would change — neither files
233    /// nor env vars.
234    pub fn is_clean(&self) -> bool {
235        self.entries
236            .iter()
237            .all(|e| matches!(e.kind, DiffKind::Unchanged))
238            && self.env_additions.is_empty()
239    }
240
241    /// Files the user hand-edited. Upgrade must refuse to overwrite these
242    /// without `--force`.
243    pub fn drifted(&self) -> Vec<&DiffEntry> {
244        self.entries
245            .iter()
246            .filter(|e| matches!(e.kind, DiffKind::Drift))
247            .collect()
248    }
249}
250
251/// Reconstruct the planning inputs we stashed at install time and feed them
252/// back through `add_service` in upgrade mode. Returns the planned step
253/// list and the planned-file content map (path → content). The richer
254/// per-env metadata lives on `AddResult.tracked_envs`.
255async fn replan(service_name: &str) -> Result<Replanned> {
256    if !is_service_installed(service_name) {
257        return Err(Error::ServiceNotInstalled(service_name.to_string()));
258    }
259    let metadata = load_metadata(service_name)?
260        .ok_or_else(|| Error::ServiceNotInstalled(service_name.to_string()))?;
261
262    let exposure = match metadata.url.as_deref() {
263        Some(url) => Exposure::from_url(url),
264        None => Exposure::Loopback,
265    };
266
267    let service_ref = service_ref_for(&metadata, service_name);
268    let repo_dir = resolve_registry_dir(&service_ref).await?;
269    // The service's own dir under the resolved registry (where a native build/
270    // run happens). Surfaced so callers — the source-staleness check below —
271    // reuse this single resolution instead of resolving again.
272    let source_dir = crate::registry::find_service(&repo_dir, service_name)?.service_dir;
273    let native = matches!(metadata.runtime, Runtime::Native);
274
275    // Recover existing host ports from the install's `.env` so the
276    // re-render lands on the same numbers. Without this every dynamically
277    // allocated port shifts because `port_in_use` reports them taken.
278    let port_overrides = read_existing_ports(service_name)?;
279
280    // Trivial port-in-use closure: the upgrade caller pins every port via
281    // `port_overrides`, so the closure is never consulted. Returning false
282    // unconditionally is safe — no allocation runs.
283    let port_in_use = |_p: u16| false;
284
285    let enabled_groups: BTreeSet<String> = metadata.enabled_groups.iter().cloned().collect();
286    let selected_choices = metadata.selected_choices.clone();
287    // Recover the install's existing `.env` values so a re-render reuses what's
288    // already configured instead of re-demanding it. A required choice/group
289    // member (e.g. an `external` database's `DATABASE_URL`) is provided once at
290    // install and lives in the `.env`; without seeding it here the render treats
291    // it as "no value" and the upgrade/diff errors on a service that's running
292    // fine. Same rationale as `port_overrides` above: upgrade re-renders against
293    // the existing install, it doesn't re-ask for what's already set.
294    let env_overrides = read_existing_env_keys(service_name)?;
295    let result = add_service(crate::AddServiceParams {
296        service_name,
297        exposure: &exposure,
298        auth: match metadata.auth.clone() {
299            Some(kind) => crate::AuthChoice::Native(kind),
300            None => crate::AuthChoice::None,
301        },
302        // SMTP and backup enablement are per-install state — persisted by
303        // `ryra add` and `ryra configure`. Upgrade preserves whatever the
304        // user picked.
305        enable_smtp: metadata.smtp_enabled,
306        enable_backup: metadata.backup_enabled,
307        env_overrides: &env_overrides,
308        enabled_groups: &enabled_groups,
309        selected_choices: &selected_choices,
310        registry_name: &metadata.registry,
311        repo_dir: &repo_dir,
312        pre_built_ctx: None,
313        port_in_use: &port_in_use,
314        // ACME mode is only consumed when adding the reverse proxy itself;
315        // upgrade never needs to seed the TLS snippet.
316        acme_mode: None,
317        mode: PlanMode::Upgrade,
318        port_overrides: &port_overrides,
319    })?;
320
321    let mut planned: BTreeMap<PathBuf, String> = BTreeMap::new();
322    for step in &result.steps {
323        if let Step::WriteFile(file) = step {
324            planned.insert(file.path.clone(), file.content.clone());
325        }
326    }
327    Ok(Replanned {
328        result,
329        planned,
330        source_dir,
331        native,
332    })
333}
334
335/// Output of [`replan`]: the re-rendered plan plus the resolved source
336/// location, so callers don't resolve the registry a second time.
337struct Replanned {
338    result: AddResult,
339    planned: BTreeMap<PathBuf, String>,
340    /// The service's source dir (where a native build/run happens).
341    source_dir: PathBuf,
342    /// Whether this is a `runtime = "native"` install.
343    native: bool,
344}
345
346/// Parse the on-disk `.env` for a service into a key→value map. Lines
347/// without `=`, comments, and blanks are skipped. Returns an empty map if
348/// the file is absent — caller decides whether that's a soft error.
349fn read_existing_env_keys(service_name: &str) -> Result<BTreeMap<String, String>> {
350    let env_path = service_home(service_name)?.join(".env");
351    let mut out: BTreeMap<String, String> = BTreeMap::new();
352    let content = match std::fs::read_to_string(&env_path) {
353        Ok(c) => c,
354        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(out),
355        Err(source) => {
356            return Err(Error::FileRead {
357                path: env_path,
358                source,
359            });
360        }
361    };
362    for line in content.lines() {
363        let line = line.trim();
364        if line.is_empty() || line.starts_with('#') {
365            continue;
366        }
367        if let Some((k, v)) = line.split_once('=') {
368            out.insert(k.trim().to_string(), v.to_string());
369        }
370    }
371    Ok(out)
372}
373
374/// Parse `SERVICE_PORT_<NAME>=<port>` lines out of an installed service's
375/// `.env`. Returns a name → port map (lowercased name, matching the
376/// `[[ports]]` definition in service.toml). Also used by the metrics
377/// bridge to resolve host-network scrape targets retroactively.
378pub(crate) fn read_existing_ports(service_name: &str) -> Result<BTreeMap<String, u16>> {
379    let env_path = service_home(service_name)?.join(".env");
380    let mut overrides = BTreeMap::new();
381    let content = match std::fs::read_to_string(&env_path) {
382        Ok(c) => c,
383        // No .env yet means a half-installed service; let the planner
384        // re-allocate. (`add_service` will then surface a richer error if
385        // the install is genuinely broken.)
386        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(overrides),
387        Err(source) => {
388            return Err(Error::FileRead {
389                path: env_path,
390                source,
391            });
392        }
393    };
394    for line in content.lines() {
395        let line = line.trim();
396        if line.is_empty() || line.starts_with('#') {
397            continue;
398        }
399        let Some((key, value)) = line.split_once('=') else {
400            continue;
401        };
402        let Some(name) = key.strip_prefix("SERVICE_PORT_") else {
403            continue;
404        };
405        if let Ok(port) = value.trim().parse::<u16>() {
406            overrides.insert(name.to_ascii_lowercase(), port);
407        }
408    }
409    Ok(overrides)
410}
411
412/// Lockfile-tracked files we never want to flag as drift. The `.env` carries
413/// generated secrets that rotate at runtime; `service.manifest` itself is the
414/// manifest, not a tracked file. Both are excluded from the planned set
415/// during diffing so they don't appear as Removed/Added.
416fn should_skip_path(path: &std::path::Path, manifest_file: &std::path::Path) -> bool {
417    if path == manifest_file {
418        return true;
419    }
420    matches!(path.file_name().and_then(|n| n.to_str()), Some(".env"))
421}
422
423/// Compute the diff between the registry's render and what's on disk for an
424/// installed service.
425pub async fn diff_service(service_name: &str) -> Result<DiffResult> {
426    let Replanned {
427        result,
428        planned,
429        source_dir,
430        native,
431    } = replan(service_name).await?;
432
433    // Native source-staleness rides along with the diff (same resolution, no
434    // second registry lookup): has any source file changed since the running
435    // process started? See the module note above on why this is the signal.
436    let source_stale = native
437        && unit_main_pid(service_name)
438            .and_then(process_start_time)
439            .is_some_and(|started| any_file_newer_than(&source_dir, started));
440
441    let manifest_file = manifest::manifest_path(service_name)?;
442    let (manifest_entries, _manifest_envs) = manifest::load(service_name)?.unwrap_or_default();
443    let manifest_by_path: BTreeMap<PathBuf, String> = manifest_entries
444        .into_iter()
445        .map(|e| (e.path, e.sha256))
446        .collect();
447
448    // Env additions: registry-expected static keys missing from the user's
449    // `.env`. Append-only — we ignore present-but-different values
450    // (could be a manual override) and never propose removals (could be
451    // a key the user added themselves that the registry happens not to
452    // ship). The registry-side list comes from the freshly-rendered
453    // `tracked_envs` (which carries kind + prompt for the CLI), not the
454    // on-disk manifest — that's the source of truth.
455    let existing_env = read_existing_env_keys(service_name)?;
456    let env_additions: Vec<EnvAddition> = result
457        .tracked_envs
458        .iter()
459        .filter(|p| !existing_env.contains_key(&p.key))
460        .map(|p| EnvAddition {
461            key: p.key.clone(),
462            value: p.value.clone(),
463            kind: p.kind.clone(),
464            prompt: p.prompt.clone(),
465        })
466        .collect();
467
468    let mut entries: Vec<DiffEntry> = Vec::new();
469    let mut seen: BTreeSet<PathBuf> = BTreeSet::new();
470
471    // Walk planned files first — Added / Modified / Drift / Unchanged.
472    for (path, content) in &planned {
473        if should_skip_path(path, &manifest_file) {
474            continue;
475        }
476        seen.insert(path.clone());
477        let planned_hash = manifest::hash_bytes(content.as_bytes());
478        let on_disk_hash = if path.exists() {
479            Some(manifest::hash_file(path)?)
480        } else {
481            None
482        };
483        let manifest_hash = manifest_by_path.get(path);
484
485        let kind = match (on_disk_hash.as_deref(), manifest_hash.map(String::as_str)) {
486            // File doesn't exist on disk.
487            (None, Some(_)) | (None, None) => match manifest_hash {
488                Some(_) => DiffKind::Modified, // we wrote it, user deleted it; restore
489                None => DiffKind::Added,       // registry adds it, fresh write
490            },
491            // On-disk content already matches what the registry would render.
492            (Some(d), _) if d == planned_hash => DiffKind::Unchanged,
493            // No manifest entry → can't tell if the user touched it.
494            // Conservative: treat as drift so --force is required once.
495            (Some(_), None) => DiffKind::Drift,
496            // On-disk matches the manifest but not the planned render →
497            // ryra-owned, safe to overwrite.
498            (Some(d), Some(l)) if d == l => DiffKind::Modified,
499            // On-disk matches neither lock nor plan → user hand-edited.
500            (Some(_), Some(_)) => DiffKind::Drift,
501        };
502        entries.push(DiffEntry {
503            path: path.clone(),
504            kind,
505        });
506    }
507
508    // Walk manifest entries that the planner no longer emits — Removed.
509    for path in manifest_by_path.keys() {
510        if seen.contains(path) {
511            continue;
512        }
513        if should_skip_path(path, &manifest_file) {
514            continue;
515        }
516        entries.push(DiffEntry {
517            path: path.clone(),
518            kind: DiffKind::Removed,
519        });
520    }
521
522    entries.sort_by(|a, b| a.path.cmp(&b.path));
523    Ok(DiffResult {
524        service: service_name.to_string(),
525        entries,
526        env_additions,
527        source_stale,
528    })
529}
530
531/// Plan a zero-downtime color swap for a `deploy = "blue-green"` install.
532///
533/// Returns `None` when the service isn't blue/green, so [`upgrade_service`] can
534/// fall through to its normal restart-based flow. Otherwise the plan:
535///   1. re-renders both color quadlets/units + reloads systemd (so the idle
536///      slot picks up any new image tag or config), keeping `.env` untouched;
537///   2. starts the *idle* slot and gates on its health endpoint;
538///   3. repoints the Caddy upstream at the idle slot and reloads gracefully;
539///   4. stops the old slot and flips `active_color` in metadata.
540///
541/// A health-gate timeout aborts before step 3, leaving the old slot live and
542/// routed — a failed deploy is a no-op, never an outage.
543pub async fn blue_green_swap(service_name: &str) -> Result<Option<UpgradeResult>> {
544    if !is_service_installed(service_name) {
545        return Err(Error::ServiceNotInstalled(service_name.to_string()));
546    }
547    let metadata = load_metadata(service_name)?
548        .ok_or_else(|| Error::ServiceNotInstalled(service_name.to_string()))?;
549
550    // Resolve the registry def to read the deploy strategy + health path.
551    let service_ref = service_ref_for(&metadata, service_name);
552    let repo_dir = resolve_registry_dir(&service_ref).await?;
553    let reg = crate::registry::find_service(&repo_dir, service_name)?;
554    let def = &reg.def;
555    if def.service.deploy != DeployStrategy::BlueGreen {
556        return Ok(None);
557    }
558    let health_check = def.service.health_check.clone().ok_or_else(|| {
559        Error::Template(format!(
560            "{service_name}: deploy = \"blue-green\" but no health_check — validation should have caught this"
561        ))
562    })?;
563
564    // Which slot is live, and which we're rolling onto.
565    let live = metadata.active_color.unwrap_or(Color::Blue);
566    let target = live.other();
567
568    // The idle slot's host port, from the install's `.env`
569    // (`SERVICE_PORT_HTTP_GREEN` etc., written by the blue/green add path).
570    let primary_port_name = def
571        .ports
572        .iter()
573        .find(|p| p.name.eq_ignore_ascii_case("http"))
574        .or_else(|| def.ports.first())
575        .map(|p| p.name.clone())
576        .ok_or_else(|| {
577            Error::Template(format!("{service_name}: blue/green needs a routable port"))
578        })?;
579    let existing_ports = read_existing_ports(service_name)?;
580    let target_key = format!("{}_{}", primary_port_name.to_ascii_lowercase(), target);
581    let target_port = existing_ports.get(&target_key).copied().ok_or_else(|| {
582        Error::Template(format!(
583            "{service_name}: missing {} in .env — reinstall to allocate the blue/green port pair",
584            deploy::color_port_var(
585                &format!("SERVICE_PORT_{}", primary_port_name.to_uppercase()),
586                target
587            )
588        ))
589    })?;
590    let health_url = format!("http://127.0.0.1:{target_port}{health_check}");
591
592    // Re-render the install (Upgrade mode): emits both color quadlets/units and
593    // pulls any new image. Keep those file writes + pulls + daemon-reload, but
594    // drop the add path's StartService/StopService (we orchestrate the swap
595    // ourselves), its `.env` write (preserve secrets), and its metadata write
596    // (we flip active_color below instead of resetting it to blue).
597    let replanned = replan(service_name).await?;
598    let env_filename = std::ffi::OsStr::new(".env");
599    let metadata_file = metadata_path(service_name)?;
600    // Never re-sync or rebuild the LIVE slot's working dir — that's the whole
601    // point of the isolation (an in-flight Python/Node process must not have its
602    // source mutated). Drop any SyncDir/Build that targets `colors/<live>`;
603    // keep the idle slot's. (Podman has no such steps — it re-pulls the image,
604    // which is harmless — so this is a native-only filter in practice.)
605    let live_slot = format!("colors/{live}");
606    let touches_live = |p: &std::path::Path| p.to_string_lossy().contains(&live_slot);
607    let mut steps: Vec<Step> = Vec::new();
608    for step in replanned.result.steps {
609        match step {
610            Step::StartService { .. } | Step::StopService { .. } => continue,
611            Step::WriteFile(GeneratedFile { ref path, .. })
612                if path.file_name() == Some(env_filename) || *path == metadata_file =>
613            {
614                continue;
615            }
616            Step::SyncDir { ref dst, .. } if touches_live(dst) => continue,
617            Step::Build { ref dir, .. } if touches_live(dir) => continue,
618            other => steps.push(other),
619        }
620    }
621
622    // Caddy: repoint the upstream at the idle slot. Only when the install has a
623    // routed URL and a Caddyfile exists (loopback installs swap without it).
624    let caddy_rewrite =
625        blue_green_caddy_rewrite(service_name, def, &metadata, target, target_port)?;
626
627    // The runtime-agnostic swap: start idle -> health-gate -> caddy reload ->
628    // stop old. Artifact prep (pull/build) already rode along in `steps` above.
629    steps.extend(deploy::color_swap_steps(deploy::ColorSwap {
630        service_name: service_name.to_string(),
631        live,
632        prepare: None,
633        health_url,
634        health_timeout_secs: def.service.health_timeout_secs(),
635        caddy_rewrite,
636    }));
637
638    // Flip active_color so the next deploy rolls back onto `live`.
639    let mut new_metadata = metadata.clone();
640    new_metadata.active_color = Some(target);
641    steps.push(Step::WriteFile(GeneratedFile {
642        path: metadata_file,
643        content: toml::to_string_pretty(&new_metadata)?,
644    }));
645
646    Ok(Some(UpgradeResult {
647        service: service_name.to_string(),
648        diff: diff_service(service_name).await?,
649        steps,
650        backup_dir: None,
651        planned_files: replanned.planned,
652        // A swap isn't visible as config drift (the new image/build lives behind
653        // the same quadlet), so force the apply just like the native rebuild path.
654        force_apply: true,
655    }))
656}
657
658/// Re-render the Caddy site block pointing at the idle color and splice it into
659/// the existing Caddyfile. `None` when the install has no routed URL or no
660/// Caddyfile on disk (a loopback blue/green install swaps without Caddy).
661fn blue_green_caddy_rewrite(
662    service_name: &str,
663    def: &crate::registry::service_def::ServiceDef,
664    metadata: &Metadata,
665    target: Color,
666    target_port: u16,
667) -> Result<Option<Step>> {
668    let Some(url) = metadata.url.as_deref() else {
669        return Ok(None);
670    };
671    let caddyfile_path = caddy::caddyfile_path()?;
672    let Ok(existing) = std::fs::read_to_string(&caddyfile_path) else {
673        return Ok(None);
674    };
675    let parsed = url::Url::parse(url)
676        .map_err(|e| Error::Template(format!("invalid service URL '{url}': {e}")))?;
677    let domain = parsed
678        .host_str()
679        .ok_or_else(|| Error::Template(format!("service URL '{url}' has no host")))?;
680    let paths = crate::config::ConfigPaths::resolve()?;
681    let config = crate::config::load_or_default(&paths.config_file)?;
682    // Podman slots are containers on Caddy's shared network, reachable by name
683    // (`<svc>-<color>:<container_port>`). Native slots are host processes, so
684    // Caddy reaches them over the host bridge at the color's *host* port.
685    let (target_host, port) = match metadata.runtime {
686        Runtime::Podman => (
687            deploy::color_unit(service_name, target),
688            def.ports.first().map(|p| p.container_port).unwrap_or(80),
689        ),
690        Runtime::Native => ("host.containers.internal".to_string(), target_port),
691    };
692    let block = caddy::render_site_block(&caddy::CaddySiteParams {
693        service_name: service_name.to_string(),
694        target_host,
695        domain: domain.to_string(),
696        container_port: port,
697        https_port: crate::caddy_https_port(&config),
698        force_internal_tls: false,
699    });
700    let updated = caddy::add_route(&existing, service_name, &block);
701    Ok(Some(Step::WriteFile(GeneratedFile {
702        path: caddyfile_path,
703        content: updated,
704    })))
705}
706
707/// Plan an upgrade for an installed service.
708///
709/// Returns the steps to execute and the backup directory where displaced
710/// files will be copied. The backup dir is *also* baked into the steps
711/// (as `Step::CopyFile` entries placed before each `Step::WriteFile`).
712pub async fn upgrade_service(service_name: &str, force: bool) -> Result<UpgradeResult> {
713    // Blue/green services upgrade by a color swap, not an in-place restart, so
714    // they take a different plan entirely. `blue_green_swap` returns None for
715    // restart-strategy installs, falling through to the standard flow below.
716    if let Some(plan) = blue_green_swap(service_name).await? {
717        return Ok(plan);
718    }
719
720    let diff = diff_service(service_name).await?;
721
722    if !force {
723        let drifted = diff.drifted();
724        if !drifted.is_empty() {
725            return Err(Error::HandEditedFiles {
726                service: service_name.to_string(),
727                paths: drifted.iter().map(|e| e.path.clone()).collect(),
728            });
729        }
730    }
731
732    let Replanned {
733        result, planned, ..
734    } = replan(service_name).await?;
735    let manifest_file = manifest::manifest_path(service_name)?;
736    let env_file = service_home(service_name)?.join(".env");
737
738    // Hard-fail if `.env` is missing. Append-only env handling can't
739    // reconstruct generated secrets (mysql_root_password, jwt_key, etc.)
740    // and would silently produce a half-written file that fails on
741    // restart. Surface the real problem instead.
742    if !env_file.exists() {
743        return Err(Error::Template(format!(
744            "{service_name}: `.env` is missing at {} — upgrade can't reconstruct generated secrets. \
745             Restore the file from a backup or reinstall the service.",
746            env_file.display()
747        )));
748    }
749
750    // Decide the backup directory once per upgrade run. Used whenever any
751    // file would be overwritten *or* the existing service.manifest exists (the
752    // lock is always backed up so `ryra revert` can reconstruct the
753    // pre-upgrade state). Empty when neither holds — keeps
754    // `~/.local/state/ryra/` from accumulating no-op dirs.
755    let backup_dir = backup_directory(service_name)?;
756    let needs_backup: BTreeSet<PathBuf> = diff
757        .entries
758        .iter()
759        .filter(|e| {
760            matches!(
761                e.kind,
762                DiffKind::Modified | DiffKind::Drift | DiffKind::Removed
763            )
764        })
765        .map(|e| e.path.clone())
766        .collect();
767    let manifest_will_be_backed_up = manifest_file.exists();
768    let backup_used = !needs_backup.is_empty() || manifest_will_be_backed_up;
769
770    // Filter the planned step list down to what an upgrade should actually do.
771    // - WriteFile for `.env` is dropped (preserve secrets).
772    // - PullImage stays (idempotent if cached, fetches new tag if registry bumped).
773    // - StartService is replaced with RestartService at the very end.
774    // - CreateDir / Symlink stay (idempotent and may be needed for new files).
775    // - DaemonReload stays.
776    // - CopyFile stays (vendored binaries; rare to upgrade but handled the same).
777    // - TailscaleSetup / TailscaleEnable were already gated out by PlanMode::Upgrade.
778    let mut steps: Vec<Step> = Vec::new();
779    if backup_used {
780        steps.push(Step::CreateDir(backup_dir.clone()));
781    }
782    let unchanged: BTreeSet<PathBuf> = diff
783        .entries
784        .iter()
785        .filter(|e| matches!(e.kind, DiffKind::Unchanged))
786        .map(|e| e.path.clone())
787        .collect();
788
789    let env_filename = std::ffi::OsStr::new(".env");
790    for step in result.steps {
791        match step {
792            // .env stays untouched on upgrade — generated secrets in the
793            // running service must not be regenerated.
794            Step::WriteFile(GeneratedFile { ref path, .. })
795                if path.file_name() == Some(env_filename) =>
796            {
797                continue;
798            }
799            // Identical content already on disk — skip the write entirely
800            // so the file's mtime stays put and `sha256sum -c` stays clean
801            // for unchanged entries.
802            Step::WriteFile(GeneratedFile { ref path, .. }) if unchanged.contains(path) => {
803                // The manifest is special: even if "unchanged" by content, we
804                // re-emit it because path-level adds/removes mean its content
805                // has changed and we need the new hashes recorded.
806                if path == &manifest_file {
807                    steps.push(step);
808                }
809                continue;
810            }
811            Step::WriteFile(ref file) => {
812                // Always back up the existing service.manifest too, even though
813                // it's filtered out of the diff. `ryra revert` reads the
814                // backed-up lock to know which files were Added during the
815                // upgrade (current lock − pre-upgrade lock) so it can delete
816                // them on revert. Without this, revert would leave
817                // upgrade-added files orphaned.
818                let should_backup = (needs_backup.contains(&file.path)
819                    || file.path == manifest_file)
820                    && file.path.exists();
821                if should_backup {
822                    let rel = backup_relpath(&file.path);
823                    let dst = backup_dir.join(rel);
824                    if let Some(parent) = dst.parent() {
825                        steps.push(Step::CreateDir(parent.to_path_buf()));
826                    }
827                    steps.push(Step::CopyFile {
828                        src: file.path.clone(),
829                        dst,
830                    });
831                }
832                steps.push(step);
833            }
834            // The replanned step list always ends with StartService; we
835            // strip it and append a RestartService at the very end so the
836            // unit picks up the new quadlet.
837            Step::StartService { .. } => continue,
838            other => steps.push(other),
839        }
840    }
841
842    // Removed files: back them up then delete.
843    for entry in &diff.entries {
844        if !matches!(entry.kind, DiffKind::Removed) {
845            continue;
846        }
847        if entry.path.exists() {
848            let rel = backup_relpath(&entry.path);
849            let dst = backup_dir.join(rel);
850            if let Some(parent) = dst.parent() {
851                steps.push(Step::CreateDir(parent.to_path_buf()));
852            }
853            steps.push(Step::CopyFile {
854                src: entry.path.clone(),
855                dst,
856            });
857        }
858        steps.push(Step::RemoveFile(entry.path.clone()));
859    }
860
861    // Env additions: append registry-required static env vars that the
862    // user's .env doesn't have. Append-only — we never rewrite the
863    // existing .env (that would clobber rotated secrets and any manual
864    // edits) and we never remove keys (the user might have added their
865    // own that the registry happens not to ship). The .env is
866    // intentionally NOT backed up: it only ever gains lines and the
867    // pre-existing content survives unchanged.
868    if !diff.env_additions.is_empty() {
869        let mut content = match std::fs::read_to_string(&env_file) {
870            Ok(c) => c,
871            // Service installed but .env missing? Treat the add as a
872            // fresh write — odd state, but the right one to recover to.
873            Err(e) if e.kind() == std::io::ErrorKind::NotFound => String::new(),
874            Err(source) => {
875                return Err(Error::FileRead {
876                    path: env_file.clone(),
877                    source,
878                });
879            }
880        };
881        if !content.is_empty() && !content.ends_with('\n') {
882            content.push('\n');
883        }
884        for add in &diff.env_additions {
885            content.push_str(&format!("{}={}\n", add.key, add.value));
886        }
887        steps.push(Step::WriteFile(GeneratedFile {
888            path: env_file,
889            content,
890        }));
891    }
892
893    // Pick up the new quadlet by restarting. RestartService is enough to
894    // re-read the env file, re-run ExecStartPre/Post, and pull in any new
895    // ExecStartPost script (the seafile case).
896    steps.push(Step::RestartService {
897        unit: service_name.to_string(),
898    });
899
900    // Native services rebuild from source on upgrade (the `Build` step) and
901    // restart. A source change leaves the rendered config clean, so force the
902    // apply; otherwise the CLI would short-circuit on the clean diff and never
903    // rebuild. The plan already ends in RestartService.
904    let force_apply = matches!(
905        crate::metadata::load_metadata(service_name),
906        Ok(Some(m)) if m.runtime == crate::registry::service_def::Runtime::Native
907    );
908
909    Ok(UpgradeResult {
910        service: service_name.to_string(),
911        diff,
912        steps,
913        backup_dir: if backup_used { Some(backup_dir) } else { None },
914        // The replanned env content is irrelevant for upgrade (we don't
915        // write it), but expose the template-render context bag in case
916        // future callers need it. Keep it empty for now to avoid
917        // confusing consumers.
918        planned_files: planned,
919        force_apply,
920    })
921}
922
923pub struct UpgradeResult {
924    pub service: String,
925    pub diff: DiffResult,
926    pub steps: Vec<Step>,
927    /// `None` when no files would be overwritten or removed.
928    pub backup_dir: Option<PathBuf>,
929    pub planned_files: BTreeMap<PathBuf, String>,
930    /// Apply even when the config diff is clean. True for native services: a
931    /// source rebuild isn't visible in the rendered config, so the plan must
932    /// still run (the `SyncBinary` step then no-ops if the binary is unchanged).
933    pub force_apply: bool,
934}
935
936/// One available backup snapshot for a service.
937#[derive(Debug, Clone)]
938pub struct BackupSnapshot {
939    /// Filesystem path: `~/.local/state/ryra/backups/<timestamp>/<service>/`.
940    pub path: PathBuf,
941    /// `YYYY-MM-DDTHH-MM-SSZ` timestamp from the parent dir name.
942    pub timestamp: String,
943}
944
945pub struct RevertResult {
946    pub service: String,
947    pub snapshot: BackupSnapshot,
948    pub steps: Vec<Step>,
949    /// Files to be copied from backup back to their original locations.
950    pub files_to_restore: Vec<PathBuf>,
951    /// Files added by the upgrade that didn't exist before — will be
952    /// removed by revert. Empty when the snapshot pre-dates the manifest
953    /// feature (we can't reconstruct what was added without it).
954    pub files_to_delete: Vec<PathBuf>,
955}
956
957/// List every backup snapshot for a service, newest first. Empty result
958/// means there's nothing to revert from.
959/// How many backup snapshots `ryra upgrade` retains per service before
960/// auto-pruning. Each snapshot is small (~tens of KB — config files +
961/// the manifest) so the cap is more about mental clutter than disk; 5
962/// is enough to revert a few iterations back without filling the
963/// `~/.local/state/ryra/backups/` tree with dead snapshots from years
964/// of upgrades.
965pub const DEFAULT_BACKUP_KEEP: usize = 5;
966
967/// Drop snapshots older than the most recent `keep` for this service.
968/// Returns the paths that were removed (newest-first within the
969/// removed set; the kept set keeps the same order). The shared
970/// timestamp dir is also removed when this was the last service-
971/// scoped subdir under it (multi-service upgrade runs share a
972/// timestamp dir; we don't want to nuke other services' state).
973pub fn prune_backups(service_name: &str, keep: usize) -> Result<Vec<PathBuf>> {
974    let backups_root = state_dir()?.join("backups");
975    prune_backups_in(&backups_root, service_name, keep)
976}
977
978/// Pure inner that operates on an explicit `<state>/backups/` root.
979/// Split out so tests can drive it against a tmp tree without touching
980/// the real XDG state dir.
981fn prune_backups_in(
982    backups_root: &std::path::Path,
983    service_name: &str,
984    keep: usize,
985) -> Result<Vec<PathBuf>> {
986    let snapshots = list_backups_in(backups_root, service_name)?;
987    if snapshots.len() <= keep {
988        return Ok(Vec::new());
989    }
990    let mut removed: Vec<PathBuf> = Vec::new();
991    for snap in snapshots.into_iter().skip(keep) {
992        if let Err(e) = std::fs::remove_dir_all(&snap.path) {
993            eprintln!(
994                "warning: failed to prune backup {}: {e}",
995                snap.path.display()
996            );
997            continue;
998        }
999        removed.push(snap.path.clone());
1000        if let Some(parent) = snap.path.parent()
1001            && let Ok(mut entries) = std::fs::read_dir(parent)
1002            && entries.next().is_none()
1003        {
1004            let _ = std::fs::remove_dir(parent);
1005        }
1006    }
1007    Ok(removed)
1008}
1009
1010pub fn list_backups(service_name: &str) -> Result<Vec<BackupSnapshot>> {
1011    let backups_root = state_dir()?.join("backups");
1012    list_backups_in(&backups_root, service_name)
1013}
1014
1015fn list_backups_in(
1016    backups_root: &std::path::Path,
1017    service_name: &str,
1018) -> Result<Vec<BackupSnapshot>> {
1019    if !backups_root.is_dir() {
1020        return Ok(Vec::new());
1021    }
1022    let mut snapshots: Vec<BackupSnapshot> = Vec::new();
1023    let entries = std::fs::read_dir(backups_root).map_err(|source| Error::FileRead {
1024        path: backups_root.to_path_buf(),
1025        source,
1026    })?;
1027    for entry in entries.flatten() {
1028        let stamp_dir = entry.path();
1029        if !stamp_dir.is_dir() {
1030            continue;
1031        }
1032        let svc_dir = stamp_dir.join(service_name);
1033        if !svc_dir.is_dir() {
1034            continue;
1035        }
1036        let Some(stamp) = stamp_dir.file_name().and_then(|n| n.to_str()) else {
1037            continue;
1038        };
1039        snapshots.push(BackupSnapshot {
1040            path: svc_dir,
1041            timestamp: stamp.to_string(),
1042        });
1043    }
1044    // Newest first: timestamp is `YYYY-MM-DDTHH-MM-SSZ`, lexical-descending == reverse-chronological.
1045    snapshots.sort_by(|a, b| b.timestamp.cmp(&a.timestamp));
1046    Ok(snapshots)
1047}
1048
1049/// Plan a revert for an installed service.
1050///
1051/// `at` selects a specific backup timestamp; `None` picks the most recent.
1052/// The returned plan: restore every file from the backup tree to its
1053/// original location, delete files added by the upgrade, daemon-reload,
1054/// restart the unit.
1055pub fn revert_service(service_name: &str, at: Option<&str>) -> Result<RevertResult> {
1056    if !is_service_installed(service_name) {
1057        return Err(Error::ServiceNotInstalled(service_name.to_string()));
1058    }
1059    let snapshot = pick_snapshot(service_name, at)?;
1060
1061    // Files to restore: walk the backup tree and reconstruct the original
1062    // absolute path for each one. The backup mirrors absolute paths under
1063    // `<snapshot>/<original-path-without-leading-slash>`, so the inverse is
1064    // simply prefixing `/` to each path-relative-to-snapshot.
1065    let mut files_to_restore: Vec<PathBuf> = Vec::new();
1066    walk_backup_files(&snapshot.path, &mut files_to_restore)?;
1067
1068    // Files to delete: anything in the *current* lock that isn't in the
1069    // *backed-up* lock was added by the upgrade and should disappear on
1070    // revert. If either lock is absent, leave the delete set empty —
1071    // safest no-op for snapshots that pre-date this feature.
1072    let backup_manifest_file =
1073        absolute_to_backup_path(&snapshot.path, &manifest::manifest_path(service_name)?);
1074    let (backup_manifest_entries, _) = read_manifest_at(&backup_manifest_file)?;
1075    let (current_manifest_entries, _) = manifest::load(service_name)?.unwrap_or_default();
1076
1077    let backup_manifest_set: BTreeSet<PathBuf> = backup_manifest_entries
1078        .iter()
1079        .map(|e| e.path.clone())
1080        .collect();
1081    let mut files_to_delete: Vec<PathBuf> = if backup_manifest_entries.is_empty() {
1082        // Pre-feature snapshot: no way to know what was added.
1083        Vec::new()
1084    } else {
1085        current_manifest_entries
1086            .iter()
1087            .map(|e| e.path.clone())
1088            .filter(|p| !backup_manifest_set.contains(p))
1089            .collect()
1090    };
1091    files_to_delete.sort();
1092
1093    // Build the step list.
1094    let mut steps: Vec<Step> = Vec::new();
1095    // Restore: backup → original. CopyFile creates parents itself, so no
1096    // CreateDir needed.
1097    for backup_path in &files_to_restore {
1098        let original = backup_to_absolute_path(&snapshot.path, backup_path);
1099        steps.push(Step::CopyFile {
1100            src: backup_path.clone(),
1101            dst: original,
1102        });
1103    }
1104    // Delete: each Added file, plus any orphan symlink in the quadlet dir
1105    // that pointed at it (only the actual file is in the lock; the
1106    // companion symlink in `~/.config/containers/systemd/` is not).
1107    let qd = crate::quadlet_dir()?;
1108    for path in &files_to_delete {
1109        if path.exists() {
1110            steps.push(Step::RemoveFile(path.clone()));
1111        }
1112        if let Some(name) = path.file_name() {
1113            let symlink = qd.join(name);
1114            if std::fs::symlink_metadata(&symlink).is_ok() {
1115                steps.push(Step::RemoveFile(symlink));
1116            }
1117        }
1118    }
1119    steps.push(Step::DaemonReload);
1120    steps.push(Step::RestartService {
1121        unit: service_name.to_string(),
1122    });
1123
1124    let files_to_restore_orig: Vec<PathBuf> = files_to_restore
1125        .iter()
1126        .map(|p| backup_to_absolute_path(&snapshot.path, p))
1127        .collect();
1128    Ok(RevertResult {
1129        service: service_name.to_string(),
1130        snapshot,
1131        steps,
1132        files_to_restore: files_to_restore_orig,
1133        files_to_delete,
1134    })
1135}
1136
1137/// Resolve the snapshot to revert to. `at` is a timestamp string (e.g.
1138/// `2026-05-05T13-33-50Z`); when absent, the most recent snapshot wins.
1139fn pick_snapshot(service_name: &str, at: Option<&str>) -> Result<BackupSnapshot> {
1140    let snapshots = list_backups(service_name)?;
1141    if snapshots.is_empty() {
1142        return Err(Error::NoBackup(service_name.to_string()));
1143    }
1144    match at {
1145        None => Ok(snapshots
1146            .into_iter()
1147            .next()
1148            .expect("non-empty checked above")),
1149        Some(stamp) => snapshots
1150            .into_iter()
1151            .find(|s| s.timestamp == stamp)
1152            .ok_or_else(|| Error::BackupNotFound {
1153                service: service_name.to_string(),
1154                stamp: stamp.to_string(),
1155            }),
1156    }
1157}
1158
1159/// Recursively collect every regular file under `root` into `out`. Symlinks
1160/// are followed; we don't expect any in a backup tree (we always copied
1161/// targets, never link entries).
1162fn walk_backup_files(root: &std::path::Path, out: &mut Vec<PathBuf>) -> Result<()> {
1163    let entries = std::fs::read_dir(root).map_err(|source| Error::FileRead {
1164        path: root.to_path_buf(),
1165        source,
1166    })?;
1167    for entry in entries.flatten() {
1168        let path = entry.path();
1169        let meta = match entry.metadata() {
1170            Ok(m) => m,
1171            Err(_) => continue,
1172        };
1173        if meta.is_dir() {
1174            walk_backup_files(&path, out)?;
1175        } else if meta.is_file() {
1176            out.push(path);
1177        }
1178    }
1179    Ok(())
1180}
1181
1182/// Inverse of `backup_relpath`: a backup path `<root>/home/user/foo`
1183/// maps back to `/home/user/foo`.
1184fn backup_to_absolute_path(root: &std::path::Path, backup: &std::path::Path) -> PathBuf {
1185    let rel = backup.strip_prefix(root).unwrap_or(backup);
1186    PathBuf::from("/").join(rel)
1187}
1188
1189/// Forward variant: `<root>` + `/home/user/foo` → `<root>/home/user/foo`.
1190fn absolute_to_backup_path(root: &std::path::Path, abs: &std::path::Path) -> PathBuf {
1191    let rel = abs.to_string_lossy();
1192    let stripped = rel.trim_start_matches('/');
1193    root.join(stripped)
1194}
1195
1196/// Read a manifest at the given path. Missing-file is treated as an empty
1197/// list — pre-feature backups simply have no lock to reference.
1198fn read_manifest_at(
1199    path: &std::path::Path,
1200) -> Result<(Vec<manifest::ManifestEntry>, Vec<manifest::EnvEntry>)> {
1201    if !path.exists() {
1202        return Ok((Vec::new(), Vec::new()));
1203    }
1204    let content = std::fs::read_to_string(path).map_err(|source| Error::FileRead {
1205        path: path.to_path_buf(),
1206        source,
1207    })?;
1208    manifest::parse(&content)
1209}
1210
1211/// `~/.local/state/ryra/backups/<timestamp>/<service>/`. Timestamp uses an
1212/// ISO-8601-ish form that sorts lexically (no colons — Windows-friendly,
1213/// not that it matters today, but the cost is zero).
1214fn backup_directory(service_name: &str) -> Result<PathBuf> {
1215    let state = state_dir()?;
1216    let now = std::time::SystemTime::now()
1217        .duration_since(std::time::UNIX_EPOCH)
1218        .map_err(|e| Error::Template(format!("system clock before UNIX epoch: {e}")))?
1219        .as_secs();
1220    let stamp = format_timestamp(now);
1221    Ok(state.join("backups").join(stamp).join(service_name))
1222}
1223
1224/// XDG state dir under `ryra/`. Created on demand by the CreateDir step.
1225fn state_dir() -> Result<PathBuf> {
1226    let base = dirs::state_dir()
1227        .or_else(|| dirs::home_dir().map(|h| h.join(".local").join("state")))
1228        .ok_or(Error::HomeDirNotFound)?;
1229    Ok(base.join("ryra"))
1230}
1231
1232/// Format a UNIX epoch into `YYYY-MM-DDTHH-MM-SSZ`. Avoids the chrono
1233/// dependency — we just need stable lexical sort.
1234fn format_timestamp(secs: u64) -> String {
1235    // Days from 1970-01-01.
1236    const SECS_PER_DAY: u64 = 86_400;
1237    let days = secs / SECS_PER_DAY;
1238    let time_of_day = secs % SECS_PER_DAY;
1239    let h = time_of_day / 3600;
1240    let m = (time_of_day % 3600) / 60;
1241    let s = time_of_day % 60;
1242    let (y, mo, d) = ymd_from_days(days);
1243    format!("{y:04}-{mo:02}-{d:02}T{h:02}-{m:02}-{s:02}Z")
1244}
1245
1246/// Convert "days since 1970-01-01" into `(year, month, day)` using the
1247/// civil-from-days algorithm (Howard Hinnant's date library, MIT). Self-
1248/// contained so we don't add a chrono/time dep just for backup naming.
1249fn ymd_from_days(days: u64) -> (i64, u32, u32) {
1250    let z = days as i64 + 719_468;
1251    let era = if z >= 0 { z } else { z - 146_096 } / 146_097;
1252    let doe = (z - era * 146_097) as u64;
1253    let yoe = (doe - doe / 1460 + doe / 36_524 - doe / 146_096) / 365;
1254    let y = yoe as i64 + era * 400;
1255    let doy = doe - (365 * yoe + yoe / 4 - yoe / 100);
1256    let mp = (5 * doy + 2) / 153;
1257    let d = (doy - (153 * mp + 2) / 5 + 1) as u32;
1258    let m = if mp < 10 { mp + 3 } else { mp - 9 } as u32;
1259    let y = if m <= 2 { y + 1 } else { y };
1260    (y, m, d)
1261}
1262
1263/// Map an absolute path into the backup tree. We strip the leading `/` so the
1264/// joined path doesn't escape the backup dir; everything else is preserved
1265/// verbatim so the user can `diff -r` across the original location.
1266fn backup_relpath(path: &std::path::Path) -> PathBuf {
1267    PathBuf::from(path.to_string_lossy().trim_start_matches('/'))
1268}
1269
1270#[cfg(test)]
1271mod tests {
1272    use super::*;
1273
1274    #[test]
1275    fn timestamp_round_numbers() {
1276        // 2026-01-01T00-00-00Z — sanity check on the calendar conversion.
1277        // 1767225600 = days from epoch * 86400 for 2026-01-01.
1278        // (epoch 0 = 1970-01-01; 56 years incl. leap days = 20454 days.)
1279        // Easier: just verify a known value end-to-end.
1280        let s = format_timestamp(0);
1281        assert_eq!(s, "1970-01-01T00-00-00Z");
1282        let s = format_timestamp(86_400);
1283        assert_eq!(s, "1970-01-02T00-00-00Z");
1284        let s = format_timestamp(31_536_000); // not a leap year (1970)
1285        assert_eq!(s, "1971-01-01T00-00-00Z");
1286    }
1287
1288    #[test]
1289    fn backup_relpath_strips_leading_slash() {
1290        let p = backup_relpath(std::path::Path::new("/home/user/foo/bar"));
1291        assert_eq!(p, PathBuf::from("home/user/foo/bar"));
1292    }
1293
1294    /// Stand up a tmp backups tree with the given timestamps and a
1295    /// service subdir under each, then run `prune_backups_in` against it.
1296    /// Returns (kept timestamps newest-first, removed paths). Hermetic:
1297    /// no env vars touched, no shared global state.
1298    fn setup_and_prune(stamps: &[&str], keep: usize) -> (Vec<String>, Vec<PathBuf>) {
1299        let tmp = std::env::temp_dir().join(format!(
1300            "ryra-prune-test-{}-{}",
1301            std::process::id(),
1302            std::time::SystemTime::now()
1303                .duration_since(std::time::UNIX_EPOCH)
1304                .unwrap()
1305                .as_nanos()
1306        ));
1307        let backups_root = tmp.join("backups");
1308        for s in stamps {
1309            std::fs::create_dir_all(backups_root.join(s).join("svc")).unwrap();
1310        }
1311        let removed = prune_backups_in(&backups_root, "svc", keep).unwrap();
1312        let mut kept: Vec<String> = std::fs::read_dir(&backups_root)
1313            .unwrap()
1314            .filter_map(|e| e.ok())
1315            .filter_map(|e| e.file_name().into_string().ok())
1316            .collect();
1317        kept.sort();
1318        kept.reverse();
1319        let _ = std::fs::remove_dir_all(&tmp);
1320        (kept, removed)
1321    }
1322
1323    #[test]
1324    fn prune_keeps_newest_n() {
1325        // Five timestamps, keep=3 — the two oldest (lex-smallest) should go.
1326        let (kept, removed) = setup_and_prune(
1327            &[
1328                "2026-01-01T00-00-00Z",
1329                "2026-02-01T00-00-00Z",
1330                "2026-03-01T00-00-00Z",
1331                "2026-04-01T00-00-00Z",
1332                "2026-05-01T00-00-00Z",
1333            ],
1334            3,
1335        );
1336        assert_eq!(kept.len(), 3);
1337        assert_eq!(kept[0], "2026-05-01T00-00-00Z");
1338        assert_eq!(kept[2], "2026-03-01T00-00-00Z");
1339        assert_eq!(removed.len(), 2);
1340    }
1341
1342    #[test]
1343    fn prune_no_op_when_under_keep() {
1344        let (kept, removed) = setup_and_prune(&["2026-01-01T00-00-00Z", "2026-02-01T00-00-00Z"], 5);
1345        assert_eq!(kept.len(), 2);
1346        assert!(removed.is_empty());
1347    }
1348
1349    fn unique_tmp(prefix: &str) -> PathBuf {
1350        std::env::temp_dir().join(format!(
1351            "{prefix}-{}-{}",
1352            std::process::id(),
1353            std::time::SystemTime::now()
1354                .duration_since(std::time::UNIX_EPOCH)
1355                .unwrap()
1356                .as_nanos()
1357        ))
1358    }
1359
1360    #[test]
1361    fn source_staleness_ignores_build_and_dotdirs() {
1362        use std::time::Duration;
1363
1364        let tmp = unique_tmp("ryra-stale");
1365        std::fs::create_dir_all(tmp.join("src")).unwrap();
1366        std::fs::create_dir_all(tmp.join("target")).unwrap();
1367        std::fs::create_dir_all(tmp.join(".git")).unwrap();
1368        std::fs::write(tmp.join("src/main.rs"), "fn main(){}").unwrap();
1369        std::fs::write(tmp.join("target/app"), "bin").unwrap();
1370        std::fs::write(tmp.join(".git/HEAD"), "ref").unwrap();
1371
1372        // Baseline after everything we wrote: nothing is newer.
1373        assert!(!any_file_newer_than(
1374            &tmp,
1375            SystemTime::now() + Duration::from_secs(3600)
1376        ));
1377        // Baseline before everything: the source file trips staleness.
1378        assert!(any_file_newer_than(
1379            &tmp,
1380            SystemTime::now() - Duration::from_secs(3600)
1381        ));
1382
1383        // When only ignored dirs hold newer files, staleness stays false.
1384        let ignored_only = unique_tmp("ryra-stale-ign");
1385        std::fs::create_dir_all(ignored_only.join("node_modules")).unwrap();
1386        std::fs::write(ignored_only.join("node_modules/x.js"), "x").unwrap();
1387        assert!(!any_file_newer_than(
1388            &ignored_only,
1389            SystemTime::now() - Duration::from_secs(3600)
1390        ));
1391
1392        let _ = std::fs::remove_dir_all(&tmp);
1393        let _ = std::fs::remove_dir_all(&ignored_only);
1394    }
1395
1396    #[test]
1397    fn should_skip_path_excludes_env_and_manifest() {
1398        let lock = PathBuf::from("/svc/service.manifest");
1399        assert!(should_skip_path(&PathBuf::from("/svc/.env"), &lock));
1400        assert!(should_skip_path(&lock, &lock));
1401        assert!(!should_skip_path(
1402            &PathBuf::from("/svc/configs/x.sh"),
1403            &lock
1404        ));
1405    }
1406}