Skip to main content

purple_ssh/
vault_ssh.rs

1use anyhow::{Context, Result};
2use log::{debug, error, info};
3use std::collections::{HashMap, HashSet};
4use std::path::{Path, PathBuf};
5use std::process::Command;
6use std::time::{Instant, SystemTime};
7
8use crate::ssh_config::model::HostEntry;
9
10/// One host resolved to a Vault SSH role, ready for bulk signing.
11#[derive(Clone, PartialEq)]
12pub struct VaultSignTarget {
13    pub alias: String,
14    pub role: String,
15    pub certificate_file: String,
16    pub pubkey: std::path::PathBuf,
17    pub vault_addr: Option<String>,
18}
19
20/// Manual `Debug` so `vault_addr` (a Vault server hostname revealing
21/// infrastructure topology) never appears unredacted in `{:?}` output.
22impl std::fmt::Debug for VaultSignTarget {
23    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
24        f.debug_struct("VaultSignTarget")
25            .field("alias", &self.alias)
26            .field("role", &self.role)
27            .field("certificate_file", &self.certificate_file)
28            .field("pubkey", &self.pubkey)
29            .field(
30                "vault_addr",
31                &self.vault_addr.as_ref().map(|_| "<redacted>"),
32            )
33            .finish()
34    }
35}
36
37/// Result of a certificate signing operation.
38#[derive(Debug)]
39pub struct SignResult {
40    pub cert_path: PathBuf,
41}
42
43/// Certificate validity status.
44#[derive(Debug, Clone, PartialEq)]
45pub enum CertStatus {
46    Valid {
47        expires_at: i64,
48        remaining_secs: i64,
49        /// Total certificate validity window in seconds (to - from), used by
50        /// the UI to compute proportional freshness thresholds.
51        total_secs: i64,
52    },
53    Expired,
54    Missing,
55    Invalid(String),
56}
57
58/// Minimum remaining seconds before a cert needs renewal (5 minutes).
59pub const RENEWAL_THRESHOLD_SECS: i64 = 300;
60
61/// TTL (in seconds) for the in-memory cert status cache before we re-run
62/// `ssh-keygen -L` against an on-disk certificate. Distinct from
63/// `RENEWAL_THRESHOLD_SECS`: this controls how often we *re-check* a cert's
64/// validity, while `RENEWAL_THRESHOLD_SECS` is the minimum lifetime below which
65/// we actually request a new signature from Vault.
66pub const CERT_STATUS_CACHE_TTL_SECS: u64 = 300;
67
68/// Shorter TTL for cached `CertStatus::Invalid` entries produced by check
69/// failures (e.g. unresolvable cert path). Error entries use this backoff
70/// instead of the 5-minute re-check TTL so transient errors recover quickly
71/// without hammering the background check thread on every poll tick.
72pub const CERT_ERROR_BACKOFF_SECS: u64 = 30;
73
74/// Validate a Vault SSH role path. Accepts ASCII alphanumerics plus `/`, `_` and `-`.
75/// Rejects empty strings and values longer than 128 chars.
76pub fn is_valid_role(s: &str) -> bool {
77    !s.is_empty()
78        && s.len() <= 128
79        && s.chars()
80            .all(|c| c.is_ascii_alphanumeric() || c == '/' || c == '_' || c == '-')
81}
82
83/// Validate a `VAULT_ADDR` value passed to the Vault CLI as an env var.
84///
85/// Intentionally minimal: reject empty, control characters and whitespace.
86/// We do NOT try to parse the URL here — a typo just produces a Vault CLI
87/// error, which is fine. The 512-byte ceiling prevents a pathological config
88/// line from ballooning the environment block.
89pub fn is_valid_vault_addr(s: &str) -> bool {
90    let trimmed = s.trim();
91    !trimmed.is_empty()
92        && trimmed.len() <= 512
93        && !trimmed.chars().any(|c| c.is_control() || c.is_whitespace())
94}
95
96/// Normalize a vault address so bare IPs and hostnames work.
97///
98/// Inputs with an explicit `http://` or `https://` scheme pass through
99/// unchanged: the user's port choice (including its absence) is honoured
100/// so the HTTP client follows the scheme default. Adding a redundant
101/// `:443` breaks strict `Host`-header ACLs on HAProxy and similar
102/// proxies once they drop to HTTP/1.1.
103///
104/// Bare hosts (no scheme) get `https://` prepended. A bare host without
105/// a port falls back to Vault's `:8200` default, matching the local dev
106/// pattern where `vault.local` or `192.168.1.10` is meant to point at a
107/// stock Vault server. With an explicit port (`host:9200`) the user's
108/// port wins.
109pub fn normalize_vault_addr(s: &str) -> String {
110    let trimmed = s.trim();
111    let lower = trimmed.to_ascii_lowercase();
112    if lower.starts_with("http://") || lower.starts_with("https://") {
113        return trimmed.to_string();
114    }
115    if trimmed.contains("://") {
116        return trimmed.to_string();
117    }
118    let scheme_len = 8;
119    let with_scheme = format!("https://{}", trimmed);
120    let after_scheme = &with_scheme[scheme_len..];
121    let authority = after_scheme.split('/').next().unwrap_or(after_scheme);
122    let has_port = if let Some(bracket_end) = authority.rfind(']') {
123        authority[bracket_end..].contains(':')
124    } else {
125        authority.contains(':')
126    };
127    if has_port {
128        with_scheme
129    } else {
130        let path_start = scheme_len + authority.len();
131        format!(
132            "{}:8200{}",
133            &with_scheme[..path_start],
134            &with_scheme[path_start..]
135        )
136    }
137}
138
139/// Scrub a raw Vault CLI stderr for display. Drops lines containing credential-like
140/// tokens (token, secret, x-vault-, cookie, authorization), joins the rest with spaces
141/// and truncates to 200 chars.
142pub fn scrub_vault_stderr(raw: &str) -> String {
143    let filtered: String = raw
144        .lines()
145        .filter(|line| {
146            let lower = line.to_ascii_lowercase();
147            !(lower.contains("token")
148                || lower.contains("secret")
149                || lower.contains("x-vault-")
150                || lower.contains("cookie")
151                || lower.contains("authorization"))
152        })
153        .collect::<Vec<_>>()
154        .join(" ");
155    let trimmed = filtered.trim();
156    if trimmed.is_empty() {
157        return "Vault SSH signing failed. Check vault status and policy".to_string();
158    }
159    if trimmed.chars().count() > 200 {
160        trimmed.chars().take(200).collect::<String>() + "..."
161    } else {
162        trimmed.to_string()
163    }
164}
165
166/// Return the certificate path for a given alias: `~/.purple/certs/<alias>-cert.pub`
167pub fn cert_path_for(paths: Option<&crate::runtime::env::Paths>, alias: &str) -> Result<PathBuf> {
168    anyhow::ensure!(
169        !alias.is_empty()
170            && !alias.contains('/')
171            && !alias.contains('\\')
172            && !alias.contains(':')
173            && !alias.contains('\0')
174            && !alias.contains(".."),
175        "Invalid alias for cert path: '{}'",
176        alias
177    );
178    paths
179        .map(|p| p.cert_for(alias))
180        .context("Could not determine home directory")
181}
182
183/// Resolve the actual certificate file path for a host.
184/// Priority: CertificateFile directive > purple's default cert path.
185pub fn resolve_cert_path(
186    paths: Option<&crate::runtime::env::Paths>,
187    alias: &str,
188    certificate_file: &str,
189) -> Result<PathBuf> {
190    if !certificate_file.is_empty() {
191        let expanded = if let Some(rest) = certificate_file.strip_prefix("~/") {
192            if let Some(p) = paths {
193                p.home().join(rest)
194            } else {
195                PathBuf::from(certificate_file)
196            }
197        } else {
198            PathBuf::from(certificate_file)
199        };
200        Ok(expanded)
201    } else {
202        cert_path_for(paths, alias)
203    }
204}
205
206/// Sign an SSH public key via Vault SSH secrets engine.
207/// Runs: `vault write -field=signed_key <role> public_key=@<pubkey_path>`
208/// Writes the signed certificate to `~/.purple/certs/<alias>-cert.pub`.
209///
210/// When `vault_addr` is `Some`, it is set as the `VAULT_ADDR` env var on the
211/// `vault` subprocess, overriding whatever the parent shell has configured.
212/// When `None`, the subprocess inherits the parent's env (current behavior).
213/// This lets purple users configure Vault address at the provider or host
214/// level without needing to launch purple from a pre-exported shell.
215pub fn sign_certificate(
216    env: &crate::runtime::env::Env,
217    role: &str,
218    pubkey_path: &Path,
219    alias: &str,
220    vault_addr: Option<&str>,
221) -> Result<SignResult> {
222    if !pubkey_path.exists() {
223        anyhow::bail!(
224            "Public key not found: {}. Set IdentityFile on the host or ensure ~/.ssh/id_ed25519.pub exists.",
225            pubkey_path.display()
226        );
227    }
228
229    if !is_valid_role(role) {
230        anyhow::bail!("Invalid Vault SSH role: '{}'", role);
231    }
232
233    let cert_dest = cert_path_for(env.paths(), alias)?;
234
235    if let Some(parent) = cert_dest.parent() {
236        std::fs::create_dir_all(parent)
237            .with_context(|| crate::messages::vault_create_dir_failed(&parent.display()))?;
238    }
239
240    // The Vault CLI receives the public key path as a UTF-8 argument. `Path::display()`
241    // is lossy on non-UTF8 paths and could produce a mangled path Vault would then fail
242    // to read. Require a valid UTF-8 path and fail fast with a clear message.
243    let pubkey_str = pubkey_path.to_str().context(
244        "public key path contains non-UTF8 bytes; vault CLI requires a valid UTF-8 path",
245    )?;
246    // The Vault CLI parses arguments as `key=value` KV pairs. A path containing
247    // `=` would be split mid-argument and produce a cryptic parse error. The
248    // check runs on the already-resolved (tilde-expanded) path because that is
249    // exactly the byte sequence the CLI will see. A user with a `$HOME` path
250    // that itself contains `=` will hit this early; the error message reports
251    // the expanded path so they can rename the offending directory.
252    if pubkey_str.contains('=') {
253        anyhow::bail!(
254            "Public key path '{}' contains '=' which is not supported by the Vault CLI argument format. Rename the key file or directory.",
255            pubkey_str
256        );
257    }
258    let pubkey_arg = format!("public_key=@{}", pubkey_str);
259    debug!(
260        "[external] Vault sign request: addr={} role={}",
261        vault_addr.unwrap_or("<env>"),
262        role
263    );
264    let mut cmd = env.command("vault");
265    cmd.args(["write", "-field=signed_key", role, &pubkey_arg]);
266    // Override VAULT_ADDR for this subprocess only when a value was resolved
267    // from config. Otherwise leave the env untouched so `vault` keeps using
268    // whatever the parent shell (or `~/.vault-token`) provides. The caller
269    // (typically `resolve_vault_addr`) is expected to have validated and
270    // trimmed the value already — re-checking here is cheap belt-and-braces
271    // for callers that construct the `Option<&str>` manually.
272    if let Some(addr) = vault_addr {
273        anyhow::ensure!(
274            is_valid_vault_addr(addr),
275            "Invalid VAULT_ADDR '{}' for role '{}'. Check the Vault SSH Address field.",
276            addr,
277            role
278        );
279        cmd.env("VAULT_ADDR", addr);
280    }
281    let mut child = cmd
282        .stdout(std::process::Stdio::piped())
283        .stderr(std::process::Stdio::piped())
284        .spawn()
285        .context("Failed to run vault CLI. Is vault installed and in PATH?")?;
286
287    // Drain both pipes on background threads to prevent pipe-buffer deadlock.
288    // Without this, the vault CLI can block writing to a full stderr pipe
289    // (64 KB) while we poll try_wait, causing a false timeout.
290    let stdout_handle = child.stdout.take();
291    let stderr_handle = child.stderr.take();
292    let stdout_thread = std::thread::spawn(move || -> Vec<u8> {
293        let mut buf = Vec::new();
294        if let Some(mut h) = stdout_handle {
295            if let Err(e) = std::io::Read::read_to_end(&mut h, &mut buf) {
296                log::warn!("[external] Failed to read vault stdout pipe: {e}");
297            }
298        }
299        buf
300    });
301    let stderr_thread = std::thread::spawn(move || -> Vec<u8> {
302        let mut buf = Vec::new();
303        if let Some(mut h) = stderr_handle {
304            if let Err(e) = std::io::Read::read_to_end(&mut h, &mut buf) {
305                log::warn!("[external] Failed to read vault stderr pipe: {e}");
306            }
307        }
308        buf
309    });
310
311    // Wait up to 30 seconds for the vault CLI to complete. Without a timeout
312    // the thread blocks indefinitely when the Vault server is unreachable
313    // (e.g. wrong address, firewall, TLS handshake hanging).
314    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(30);
315    let status = loop {
316        match child.try_wait() {
317            Ok(Some(s)) => break s,
318            Ok(None) => {
319                if std::time::Instant::now() >= deadline {
320                    let _ = child.kill();
321                    let _ = child.wait();
322                    // The pipe-drain threads (stdout_thread, stderr_thread)
323                    // are dropped without joining here. This is intentional:
324                    // kill() closes the child's pipe ends, so read_to_end
325                    // returns immediately and the threads self-terminate.
326                    error!(
327                        "[external] Vault unreachable: {}: timed out after 30s",
328                        vault_addr.unwrap_or("<env>")
329                    );
330                    anyhow::bail!("Vault SSH timed out. Server unreachable.");
331                }
332                std::thread::sleep(std::time::Duration::from_millis(100));
333            }
334            Err(e) => {
335                let _ = child.kill();
336                let _ = child.wait();
337                anyhow::bail!("Failed to wait for vault CLI: {}", e);
338            }
339        }
340    };
341
342    let stdout_bytes = stdout_thread.join().unwrap_or_default();
343    let stderr_bytes = stderr_thread.join().unwrap_or_default();
344    let output = std::process::Output {
345        status,
346        stdout: stdout_bytes,
347        stderr: stderr_bytes,
348    };
349
350    if !output.status.success() {
351        let stderr = String::from_utf8_lossy(&output.stderr);
352        if stderr.contains("permission denied") || stderr.contains("403") {
353            error!(
354                "[external] Vault auth failed: permission denied (role={} addr={})",
355                role,
356                vault_addr.unwrap_or("<env>")
357            );
358            anyhow::bail!("Vault SSH permission denied. Check token and policy.");
359        }
360        if stderr.contains("missing client token") || stderr.contains("token expired") {
361            error!(
362                "[external] Vault auth failed: token missing or expired (role={} addr={})",
363                role,
364                vault_addr.unwrap_or("<env>")
365            );
366            anyhow::bail!("Vault SSH token missing or expired. Run `vault login`.");
367        }
368        // Check "connection refused" before "dial tcp" because Go's
369        // refused-connection error contains both substrings.
370        if stderr.contains("connection refused") {
371            error!(
372                "[external] Vault unreachable: {}: connection refused",
373                vault_addr.unwrap_or("<env>")
374            );
375            anyhow::bail!("Vault SSH connection refused.");
376        }
377        if stderr.contains("i/o timeout") || stderr.contains("dial tcp") {
378            error!(
379                "[external] Vault unreachable: {}: connection timed out",
380                vault_addr.unwrap_or("<env>")
381            );
382            anyhow::bail!("Vault SSH connection timed out.");
383        }
384        if stderr.contains("no such host") {
385            error!(
386                "[external] Vault unreachable: {}: no such host",
387                vault_addr.unwrap_or("<env>")
388            );
389            anyhow::bail!("Vault SSH host not found.");
390        }
391        if stderr.contains("server gave HTTP response to HTTPS client") {
392            error!(
393                "[external] Vault unreachable: {}: server returned HTTP on HTTPS connection",
394                vault_addr.unwrap_or("<env>")
395            );
396            anyhow::bail!("Vault SSH server uses HTTP, not HTTPS. Set address to http://.");
397        }
398        if stderr.contains("certificate signed by unknown authority")
399            || stderr.contains("tls:")
400            || stderr.contains("x509:")
401        {
402            error!(
403                "[external] Vault unreachable: {}: TLS error",
404                vault_addr.unwrap_or("<env>")
405            );
406            anyhow::bail!("Vault SSH TLS error. Check certificate or use http://.");
407        }
408        error!(
409            "[external] Vault SSH signing failed: {}",
410            scrub_vault_stderr(&stderr)
411        );
412        anyhow::bail!("Vault SSH failed: {}", scrub_vault_stderr(&stderr));
413    }
414
415    let signed_key = String::from_utf8_lossy(&output.stdout).trim().to_string();
416    if signed_key.is_empty() {
417        anyhow::bail!("Vault returned empty certificate for role '{}'", role);
418    }
419
420    crate::fs_util::atomic_write(&cert_dest, signed_key.as_bytes())
421        .with_context(|| crate::messages::vault_write_cert_failed(&cert_dest.display()))?;
422
423    info!("Vault SSH certificate signed for {}", alias);
424    Ok(SignResult {
425        cert_path: cert_dest,
426    })
427}
428
429/// Check the validity of an SSH certificate file via `ssh-keygen -L`.
430///
431/// Timezone note: `ssh-keygen -L` outputs local civil time, which `parse_ssh_datetime`
432/// converts to pseudo-epoch seconds. Rather than comparing against UTC `now` (which would
433/// be wrong in non-UTC zones), we compute the TTL from the parsed from/to difference
434/// (timezone-independent) and measure elapsed time since the cert file was written (UTC
435/// file mtime vs UTC now). This keeps both sides in the same reference frame.
436pub fn check_cert_validity(env: &crate::runtime::env::Env, cert_path: &Path) -> CertStatus {
437    if !cert_path.exists() {
438        return CertStatus::Missing;
439    }
440
441    let output = match env
442        .command("ssh-keygen")
443        .args(["-L", "-f"])
444        .arg(cert_path)
445        .output()
446    {
447        Ok(o) => o,
448        Err(e) => return CertStatus::Invalid(crate::messages::vault_ssh_keygen_run_failed(&e)),
449    };
450
451    if !output.status.success() {
452        return CertStatus::Invalid("ssh-keygen could not read certificate".to_string());
453    }
454
455    let stdout = String::from_utf8_lossy(&output.stdout);
456
457    // Handle certificates signed with no expiration ("Valid: forever").
458    for line in stdout.lines() {
459        let t = line.trim();
460        if t == "Valid: forever" || t.starts_with("Valid: from ") && t.ends_with(" to forever") {
461            return CertStatus::Valid {
462                expires_at: i64::MAX,
463                remaining_secs: i64::MAX,
464                total_secs: i64::MAX,
465            };
466        }
467    }
468
469    for line in stdout.lines() {
470        if let Some((from, to)) = parse_valid_line(line) {
471            let ttl = to - from; // Correct regardless of timezone
472            // Defensive: a cert with to < from is malformed. Treat as Invalid
473            // rather than propagating a negative ttl into the cache and the
474            // renewal threshold calculation.
475            if ttl <= 0 {
476                return CertStatus::Invalid(
477                    "certificate has non-positive validity window".to_string(),
478                );
479            }
480
481            // Use file modification time as the signing timestamp (UTC)
482            let signed_at = match std::fs::metadata(cert_path)
483                .and_then(|m| m.modified())
484                .ok()
485                .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
486            {
487                Some(d) => d.as_secs() as i64,
488                None => {
489                    // Cannot determine file age. Treat as needing renewal.
490                    return CertStatus::Expired;
491                }
492            };
493
494            let now = match std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH) {
495                Ok(d) => d.as_secs() as i64,
496                Err(_) => {
497                    return CertStatus::Invalid("system clock before unix epoch".to_string());
498                }
499            };
500
501            let elapsed = now - signed_at;
502            let remaining = ttl - elapsed;
503
504            if remaining <= 0 {
505                return CertStatus::Expired;
506            }
507            let expires_at = now + remaining;
508            return CertStatus::Valid {
509                expires_at,
510                remaining_secs: remaining,
511                total_secs: ttl,
512            };
513        }
514    }
515
516    CertStatus::Invalid("No Valid: line found in certificate".to_string())
517}
518
519/// Parse "Valid: from YYYY-MM-DDTHH:MM:SS to YYYY-MM-DDTHH:MM:SS" from ssh-keygen -L.
520fn parse_valid_line(line: &str) -> Option<(i64, i64)> {
521    let trimmed = line.trim();
522    let rest = trimmed.strip_prefix("Valid:")?;
523    let rest = rest.trim();
524    let rest = rest.strip_prefix("from ")?;
525    let (from_str, rest) = rest.split_once(" to ")?;
526    let to_str = rest.trim();
527
528    let from = parse_ssh_datetime(from_str)?;
529    let to = parse_ssh_datetime(to_str)?;
530    Some((from, to))
531}
532
533/// Parse YYYY-MM-DDTHH:MM:SS to Unix epoch seconds.
534/// Note: ssh-keygen outputs local time. We use the same clock for comparison
535/// (SystemTime::now gives wall clock), so the relative difference is correct
536/// for TTL checks even though the absolute epoch may be off by the UTC offset.
537fn parse_ssh_datetime(s: &str) -> Option<i64> {
538    let s = s.trim();
539    if s.len() < 19 {
540        return None;
541    }
542    let year: i64 = s.get(0..4)?.parse().ok()?;
543    let month: i64 = s.get(5..7)?.parse().ok()?;
544    let day: i64 = s.get(8..10)?.parse().ok()?;
545    let hour: i64 = s.get(11..13)?.parse().ok()?;
546    let min: i64 = s.get(14..16)?.parse().ok()?;
547    let sec: i64 = s.get(17..19)?.parse().ok()?;
548
549    if s.as_bytes().get(4) != Some(&b'-')
550        || s.as_bytes().get(7) != Some(&b'-')
551        || s.as_bytes().get(10) != Some(&b'T')
552        || s.as_bytes().get(13) != Some(&b':')
553        || s.as_bytes().get(16) != Some(&b':')
554    {
555        return None;
556    }
557
558    if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
559        return None;
560    }
561    if !(0..=23).contains(&hour) || !(0..=59).contains(&min) || !(0..=59).contains(&sec) {
562        return None;
563    }
564
565    // Civil date to Unix epoch (same algorithm as chrono/time crates).
566    let mut y = year;
567    let m = if month <= 2 {
568        y -= 1;
569        month + 9
570    } else {
571        month - 3
572    };
573    let era = if y >= 0 { y } else { y - 399 } / 400;
574    let yoe = y - era * 400;
575    let doy = (153 * m + 2) / 5 + day - 1;
576    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
577    let days = era * 146097 + doe - 719468;
578
579    Some(days * 86400 + hour * 3600 + min * 60 + sec)
580}
581
582/// Check if a certificate needs renewal.
583///
584/// For certificates whose total validity window is shorter than
585/// `RENEWAL_THRESHOLD_SECS`, the fixed 5-minute threshold would flag a freshly
586/// signed cert as needing renewal immediately, causing an infinite re-sign loop.
587/// In that case we fall back to a proportional threshold (half the total).
588pub fn needs_renewal(status: &CertStatus) -> bool {
589    match status {
590        CertStatus::Missing | CertStatus::Expired | CertStatus::Invalid(_) => true,
591        CertStatus::Valid {
592            remaining_secs,
593            total_secs,
594            ..
595        } => {
596            let threshold = if *total_secs > 0 && *total_secs <= RENEWAL_THRESHOLD_SECS {
597                *total_secs / 2
598            } else {
599                RENEWAL_THRESHOLD_SECS
600            };
601            *remaining_secs < threshold
602        }
603    }
604}
605
606/// Ensure a valid certificate exists for a host. Signs a new one if needed.
607/// Checks at the CertificateFile path (or purple's default) before signing.
608pub fn ensure_cert(
609    env: &crate::runtime::env::Env,
610    role: &str,
611    pubkey_path: &Path,
612    alias: &str,
613    certificate_file: &str,
614    vault_addr: Option<&str>,
615) -> Result<PathBuf> {
616    let check_path = resolve_cert_path(env.paths(), alias, certificate_file)?;
617    let status = check_cert_validity(env, &check_path);
618
619    if !needs_renewal(&status) {
620        info!(
621            "Vault SSH certificate cache hit: alias={} role={} path={}",
622            alias,
623            role,
624            check_path.display()
625        );
626        return Ok(check_path);
627    }
628
629    log::debug!(
630        "Vault SSH certificate cache miss: alias={} role={} status={:?} -> signing",
631        alias,
632        role,
633        status
634    );
635    let result = sign_certificate(env, role, pubkey_path, alias, vault_addr)?;
636    Ok(result.cert_path)
637}
638
639/// Resolve the public key path for signing.
640/// Priority: host IdentityFile + ".pub" > ~/.ssh/id_ed25519.pub fallback.
641/// Returns an error when the user's home directory cannot be determined. Any
642/// IdentityFile pointing outside `$HOME` is rejected and falls back to the
643/// default `~/.ssh/id_ed25519.pub` to prevent reading arbitrary filesystem
644/// locations via a crafted IdentityFile directive.
645pub fn resolve_pubkey_path(
646    paths: Option<&crate::runtime::env::Paths>,
647    identity_file: &str,
648) -> Result<PathBuf> {
649    let home = paths
650        .context("Could not determine home directory")?
651        .home()
652        .to_path_buf();
653    let fallback = home.join(".ssh/id_ed25519.pub");
654
655    if identity_file.is_empty() {
656        return Ok(fallback);
657    }
658
659    let expanded = if let Some(rest) = identity_file.strip_prefix("~/") {
660        home.join(rest)
661    } else {
662        PathBuf::from(identity_file)
663    };
664
665    // A purely lexical `starts_with(&home)` check can be bypassed by a symlink inside
666    // $HOME pointing to a path outside $HOME (e.g. ~/evil -> /etc). Canonicalize both
667    // sides so symlinks are resolved, then compare. If the expanded path does not yet
668    // exist (or canonicalize fails for any reason) we cannot safely reason about where
669    // it actually points, so fall back to the default key path.
670    let canonical_home = match std::fs::canonicalize(&home) {
671        Ok(p) => p,
672        Err(_) => return Ok(fallback),
673    };
674    if expanded.exists() {
675        match std::fs::canonicalize(&expanded) {
676            Ok(canonical) if canonical.starts_with(&canonical_home) => {}
677            _ => return Ok(fallback),
678        }
679    } else if !expanded.starts_with(&home) {
680        return Ok(fallback);
681    }
682
683    if expanded.extension().is_some_and(|ext| ext == "pub") {
684        Ok(expanded)
685    } else {
686        let mut s = expanded.into_os_string();
687        s.push(".pub");
688        Ok(PathBuf::from(s))
689    }
690}
691
692/// Resolve the effective vault role for a host.
693/// Priority: host-level vault_ssh > provider-level vault_role > None.
694///
695/// `provider_label` selects between multiple labeled configs of the same
696/// provider. None means a bare config (legacy 2-segment marker).
697pub fn resolve_vault_role(
698    host_vault_ssh: Option<&str>,
699    provider_name: Option<&str>,
700    provider_label: Option<&str>,
701    provider_config: &crate::providers::config::ProviderConfig,
702) -> Option<String> {
703    if let Some(role) = host_vault_ssh {
704        if !role.is_empty() {
705            return Some(role.to_string());
706        }
707    }
708
709    if let Some(name) = provider_name {
710        let id = crate::providers::config::ProviderConfigId {
711            provider: name.to_string(),
712            label: provider_label.map(|s| s.to_string()),
713        };
714        let section = provider_config
715            .section_by_id(&id)
716            .or_else(|| provider_config.section(name));
717        if let Some(section) = section {
718            if !section.vault_role.is_empty() {
719                return Some(section.vault_role.clone());
720            }
721        }
722    }
723
724    None
725}
726
727/// Resolve the effective Vault address for a host.
728///
729/// Precedence (highest wins): per-host `# purple:vault-addr` comment,
730/// provider `vault_addr=` setting, else None (caller falls back to the
731/// `vault` CLI's own env resolution).
732///
733/// Both layers are re-validated with `is_valid_vault_addr` even though the
734/// parser paths (`HostBlock::vault_addr()` and `ProviderConfig::parse`)
735/// already drop invalid values. This is defensive: a future caller that
736/// constructs a `HostEntry` or `ProviderSection` in-memory (tests, migration
737/// code, a new feature) won't be able to smuggle a malformed `VAULT_ADDR`
738/// into `sign_certificate` through this resolver.
739pub fn resolve_vault_addr(
740    host_vault_addr: Option<&str>,
741    provider_name: Option<&str>,
742    provider_label: Option<&str>,
743    provider_config: &crate::providers::config::ProviderConfig,
744) -> Option<String> {
745    if let Some(addr) = host_vault_addr {
746        let trimmed = addr.trim();
747        if !trimmed.is_empty() && is_valid_vault_addr(trimmed) {
748            return Some(normalize_vault_addr(trimmed));
749        }
750    }
751
752    if let Some(name) = provider_name {
753        let id = crate::providers::config::ProviderConfigId {
754            provider: name.to_string(),
755            label: provider_label.map(|s| s.to_string()),
756        };
757        let section = provider_config
758            .section_by_id(&id)
759            .or_else(|| provider_config.section(name));
760        if let Some(section) = section {
761            let trimmed = section.vault_addr.trim();
762            if !trimmed.is_empty() && is_valid_vault_addr(trimmed) {
763                return Some(normalize_vault_addr(trimmed));
764            }
765        }
766    }
767
768    None
769}
770
771/// Resolve the effective ProxyJump chain for an alias by asking ssh itself.
772///
773/// Uses `ssh -G -F <config> <alias>` so wildcard patterns and `Match` blocks
774/// contribute the same way they do at connect time. Without this, a host that
775/// inherits ProxyJump from a wildcard (e.g. `Host *prod*  ProxyJump bastion`)
776/// would look like it has no proxy when read from its own block alone.
777///
778/// Returns aliases in dependency order: proxies first, the target last. The
779/// target is always present, even when ssh resolution yields nothing. Cycles
780/// are broken with a visited set. Hosts referenced via ProxyJump that have no
781/// matching `Host` block in the config still appear in the chain so callers
782/// can decide what to do with them; existence is verified by the caller.
783pub fn resolve_proxy_chain(config_path: &Path, alias: &str) -> Vec<String> {
784    let mut chain: Vec<String> = Vec::new();
785    let mut visited: HashSet<String> = HashSet::new();
786    let mut queue: Vec<String> = vec![alias.to_string()];
787
788    while let Some(current) = queue.pop() {
789        if !visited.insert(current.clone()) {
790            continue;
791        }
792        chain.push(current.clone());
793
794        let output = Command::new("ssh")
795            .args(["-G", "-F"])
796            .arg(config_path)
797            .arg("--")
798            .arg(&current)
799            .output();
800
801        let Ok(output) = output else {
802            debug!("[external] ssh -G failed for {}: spawn error", current);
803            continue;
804        };
805        if !output.status.success() {
806            debug!(
807                "[external] ssh -G non-zero exit for {} (code {:?})",
808                current,
809                output.status.code()
810            );
811            continue;
812        }
813
814        let stdout = String::from_utf8_lossy(&output.stdout);
815        for line in stdout.lines() {
816            let lower = line.to_ascii_lowercase();
817            let Some(rest) = lower.strip_prefix("proxyjump ") else {
818                continue;
819            };
820            // ssh -G emits literal "none" when no proxy is configured.
821            if rest.trim() == "none" {
822                continue;
823            }
824            // Use the original-case slice for the value; ssh prints the
825            // proxyjump value verbatim after the lower-cased key.
826            // strip_prefix already guarantees line.len() >= "proxyjump ".len().
827            let value = &line["proxyjump ".len()..];
828            for jump in value.split(',') {
829                let host = parse_proxy_jump_host(jump.trim());
830                if !host.is_empty() {
831                    queue.push(host.to_string());
832                }
833            }
834        }
835    }
836
837    chain.reverse();
838    chain
839}
840
841/// Extract the host portion from a single `[user@]host[:port]` ProxyJump entry.
842/// Handles bracketed IPv6 hosts like `[::1]:22`.
843fn parse_proxy_jump_host(jump: &str) -> &str {
844    let trimmed = jump.trim();
845    let after_user = trimmed.rsplit_once('@').map(|(_, h)| h).unwrap_or(trimmed);
846    if let Some(rest) = after_user.strip_prefix('[') {
847        if let Some(end) = rest.find(']') {
848            return &rest[..end];
849        }
850    }
851    after_user.split(':').next().unwrap_or(after_user)
852}
853
854/// One row in the Keys-tab Vault SSH strip.
855#[derive(Debug, Clone, PartialEq, Eq)]
856pub struct ActiveCert {
857    /// Host alias the cert belongs to.
858    pub alias: String,
859    /// Role name from `# purple:vault-ssh <role>`.
860    pub role: String,
861    /// Seconds remaining on the cert.
862    pub remaining_secs: i64,
863    /// Total signed-cert validity window in seconds. Used by the gauge
864    /// to compute `remaining/total` for the fill ratio.
865    pub total_secs: i64,
866}
867
868/// True iff a host has any purple-managed Vault context: either an
869/// explicit `# purple:vault-ssh` role marker, or a `CertificateFile`
870/// directive pointing into `~/.purple/certs/`. The second branch covers
871/// users who sign certs directly with the `vault` CLI and wire them in
872/// via `CertificateFile` without setting the role marker.
873pub fn has_purple_vault_context(host: &HostEntry) -> bool {
874    host.vault_ssh.is_some() || cert_file_in_purple_dir(&host.certificate_file)
875}
876
877/// `CertificateFile` path looks like a purple-managed cert when it
878/// references the per-user `.purple/certs/` directory. We match on the
879/// substring so the check works regardless of whether the path is
880/// tilde-expanded or absolute.
881pub fn cert_file_in_purple_dir(cert_file: &str) -> bool {
882    !cert_file.is_empty() && cert_file.contains("/.purple/certs/")
883}
884
885/// True when any host has a purple-managed Vault context. The Keys-tab
886/// strip renders iff this returns true. Even hosts whose cert is not
887/// yet cached count, so the strip appears the moment the user
888/// configures their first Vault role or sets a cert path.
889pub fn vault_ssh_in_use(hosts: &[HostEntry]) -> bool {
890    hosts.iter().any(has_purple_vault_context)
891}
892
893/// Build the strip's row list from the cert cache. Hosts that have a
894/// configured role (or a purple-managed cert path) but no cached
895/// `Valid` status are omitted; the gauge has nothing to fill until the
896/// lazy cert check populates the cache. Sort: longest remaining first
897/// so the user sees healthy certs at the top and expiring ones at the
898/// bottom.
899pub fn active_certs_for_strip(
900    hosts: &[HostEntry],
901    cache: &HashMap<String, (Instant, CertStatus, Option<SystemTime>)>,
902) -> Vec<ActiveCert> {
903    // Recompute `remaining_secs` against the current wall clock instead
904    // of using the cached snapshot. The cached number was correct only
905    // at the moment the check ran; the strip is redrawn on every event
906    // tick (~20× per second), so deriving from `expires_at - now` gives
907    // a per-second countdown without re-running the cert validation.
908    let now = SystemTime::now()
909        .duration_since(SystemTime::UNIX_EPOCH)
910        .map(|d| d.as_secs() as i64)
911        .unwrap_or(0);
912    let mut rows: Vec<ActiveCert> = hosts
913        .iter()
914        .filter(|h| has_purple_vault_context(h))
915        .filter_map(|h| {
916            let role = h.vault_ssh.clone().unwrap_or_default();
917            match cache.get(&h.alias) {
918                Some((
919                    _,
920                    CertStatus::Valid {
921                        expires_at,
922                        remaining_secs,
923                        total_secs,
924                    },
925                    _,
926                )) => {
927                    // `expires_at == 0` is the demo sentinel for "no
928                    // wall clock"; fall back to the static cached value
929                    // so visual fixtures stay byte-deterministic.
930                    let live_remaining = if *expires_at == 0 {
931                        *remaining_secs
932                    } else {
933                        (*expires_at - now).max(0)
934                    };
935                    Some(ActiveCert {
936                        alias: h.alias.clone(),
937                        role,
938                        remaining_secs: live_remaining,
939                        total_secs: *total_secs,
940                    })
941                }
942                _ => None,
943            }
944        })
945        .collect();
946    rows.sort_by_key(|r| std::cmp::Reverse(r.remaining_secs));
947    rows
948}
949
950/// Compute the fill ratio (0.0..=1.0) for a Vault SSH cert TTL gauge.
951/// Clamped so a cert in renewal-overlap or one whose `total_secs` was
952/// recorded as `i64::MAX` ("Valid: forever") does not produce NaN.
953pub fn cert_fill_ratio(remaining_secs: i64, total_secs: i64) -> f32 {
954    if total_secs <= 0 || remaining_secs <= 0 {
955        return 0.0;
956    }
957    if total_secs == i64::MAX || remaining_secs >= total_secs {
958        return 1.0;
959    }
960    (remaining_secs as f32 / total_secs as f32).clamp(0.0, 1.0)
961}
962
963/// Format remaining certificate time for display.
964pub fn format_remaining(remaining_secs: i64) -> String {
965    if remaining_secs <= 0 {
966        return "expired".to_string();
967    }
968    let hours = remaining_secs / 3600;
969    let mins = (remaining_secs % 3600) / 60;
970    if hours > 0 {
971        format!("{}h {}m", hours, mins)
972    } else {
973        format!("{}m", mins)
974    }
975}
976
977#[cfg(test)]
978#[path = "vault_ssh_tests.rs"]
979mod tests;