Skip to main content

purple_ssh/
vault_ssh.rs

1use anyhow::{Context, Result};
2use log::{debug, error, info};
3use std::collections::{HashMap, HashSet};
4use std::path::{Path, PathBuf};
5use std::process::Command;
6use std::time::{Instant, SystemTime};
7
8use crate::ssh_config::model::HostEntry;
9
10/// Result of a certificate signing operation.
11#[derive(Debug)]
12pub struct SignResult {
13    pub cert_path: PathBuf,
14}
15
16/// Certificate validity status.
17#[derive(Debug, Clone, PartialEq)]
18pub enum CertStatus {
19    Valid {
20        expires_at: i64,
21        remaining_secs: i64,
22        /// Total certificate validity window in seconds (to - from), used by
23        /// the UI to compute proportional freshness thresholds.
24        total_secs: i64,
25    },
26    Expired,
27    Missing,
28    Invalid(String),
29}
30
31/// Minimum remaining seconds before a cert needs renewal (5 minutes).
32pub const RENEWAL_THRESHOLD_SECS: i64 = 300;
33
34/// TTL (in seconds) for the in-memory cert status cache before we re-run
35/// `ssh-keygen -L` against an on-disk certificate. Distinct from
36/// `RENEWAL_THRESHOLD_SECS`: this controls how often we *re-check* a cert's
37/// validity, while `RENEWAL_THRESHOLD_SECS` is the minimum lifetime below which
38/// we actually request a new signature from Vault.
39pub const CERT_STATUS_CACHE_TTL_SECS: u64 = 300;
40
41/// Shorter TTL for cached `CertStatus::Invalid` entries produced by check
42/// failures (e.g. unresolvable cert path). Error entries use this backoff
43/// instead of the 5-minute re-check TTL so transient errors recover quickly
44/// without hammering the background check thread on every poll tick.
45pub const CERT_ERROR_BACKOFF_SECS: u64 = 30;
46
47/// Validate a Vault SSH role path. Accepts ASCII alphanumerics plus `/`, `_` and `-`.
48/// Rejects empty strings and values longer than 128 chars.
49pub fn is_valid_role(s: &str) -> bool {
50    !s.is_empty()
51        && s.len() <= 128
52        && s.chars()
53            .all(|c| c.is_ascii_alphanumeric() || c == '/' || c == '_' || c == '-')
54}
55
56/// Validate a `VAULT_ADDR` value passed to the Vault CLI as an env var.
57///
58/// Intentionally minimal: reject empty, control characters and whitespace.
59/// We do NOT try to parse the URL here — a typo just produces a Vault CLI
60/// error, which is fine. The 512-byte ceiling prevents a pathological config
61/// line from ballooning the environment block.
62pub fn is_valid_vault_addr(s: &str) -> bool {
63    let trimmed = s.trim();
64    !trimmed.is_empty()
65        && trimmed.len() <= 512
66        && !trimmed.chars().any(|c| c.is_control() || c.is_whitespace())
67}
68
69/// Normalize a vault address so bare IPs and hostnames work.
70/// Prepends `https://` when no scheme is present and appends a default
71/// port when none is specified: `:80` for `http://`, `:443` for
72/// `https://`, `:8200` for bare hostnames (Vault's default). The
73/// default scheme is `https://` because production Vault always uses
74/// TLS. Dev-mode users can set `http://` explicitly.
75pub fn normalize_vault_addr(s: &str) -> String {
76    let trimmed = s.trim();
77    // Case-insensitive scheme detection.
78    let lower = trimmed.to_ascii_lowercase();
79    let (with_scheme, scheme_len) = if lower.starts_with("http://") || lower.starts_with("https://")
80    {
81        let len = if lower.starts_with("https://") { 8 } else { 7 };
82        (trimmed.to_string(), len)
83    } else if trimmed.contains("://") {
84        // Unknown scheme (ftp://, etc.) — return as-is, let the CLI error.
85        return trimmed.to_string();
86    } else {
87        (format!("https://{}", trimmed), 8)
88    };
89    // Extract the authority (host[:port]) portion, ignoring any path/query.
90    let after_scheme = &with_scheme[scheme_len..];
91    let authority = after_scheme.split('/').next().unwrap_or(after_scheme);
92    // IPv6 addresses use [::1]:port syntax. A colon inside brackets is not a
93    // port separator.
94    let has_port = if let Some(bracket_end) = authority.rfind(']') {
95        authority[bracket_end..].contains(':')
96    } else {
97        authority.contains(':')
98    };
99    if has_port {
100        with_scheme
101    } else {
102        // Use the scheme's standard port when the user typed an explicit scheme,
103        // otherwise fall back to Vault's default port (8200).
104        let default_port = if lower.starts_with("http://") {
105            80
106        } else if lower.starts_with("https://") {
107            443
108        } else {
109            8200
110        };
111        let path_start = scheme_len + authority.len();
112        format!(
113            "{}:{}{}",
114            &with_scheme[..path_start],
115            default_port,
116            &with_scheme[path_start..]
117        )
118    }
119}
120
121/// Scrub a raw Vault CLI stderr for display. Drops lines containing credential-like
122/// tokens (token, secret, x-vault-, cookie, authorization), joins the rest with spaces
123/// and truncates to 200 chars.
124pub fn scrub_vault_stderr(raw: &str) -> String {
125    let filtered: String = raw
126        .lines()
127        .filter(|line| {
128            let lower = line.to_ascii_lowercase();
129            !(lower.contains("token")
130                || lower.contains("secret")
131                || lower.contains("x-vault-")
132                || lower.contains("cookie")
133                || lower.contains("authorization"))
134        })
135        .collect::<Vec<_>>()
136        .join(" ");
137    let trimmed = filtered.trim();
138    if trimmed.is_empty() {
139        return "Vault SSH signing failed. Check vault status and policy".to_string();
140    }
141    if trimmed.chars().count() > 200 {
142        trimmed.chars().take(200).collect::<String>() + "..."
143    } else {
144        trimmed.to_string()
145    }
146}
147
148/// Return the certificate path for a given alias: `~/.purple/certs/<alias>-cert.pub`
149pub fn cert_path_for(alias: &str) -> Result<PathBuf> {
150    anyhow::ensure!(
151        !alias.is_empty()
152            && !alias.contains('/')
153            && !alias.contains('\\')
154            && !alias.contains(':')
155            && !alias.contains('\0')
156            && !alias.contains(".."),
157        "Invalid alias for cert path: '{}'",
158        alias
159    );
160    let dir = dirs::home_dir()
161        .context("Could not determine home directory")?
162        .join(".purple/certs");
163    Ok(dir.join(format!("{}-cert.pub", alias)))
164}
165
166/// Resolve the actual certificate file path for a host.
167/// Priority: CertificateFile directive > purple's default cert path.
168pub fn resolve_cert_path(alias: &str, certificate_file: &str) -> Result<PathBuf> {
169    if !certificate_file.is_empty() {
170        let expanded = if let Some(rest) = certificate_file.strip_prefix("~/") {
171            if let Some(home) = dirs::home_dir() {
172                home.join(rest)
173            } else {
174                PathBuf::from(certificate_file)
175            }
176        } else {
177            PathBuf::from(certificate_file)
178        };
179        Ok(expanded)
180    } else {
181        cert_path_for(alias)
182    }
183}
184
185/// Sign an SSH public key via Vault SSH secrets engine.
186/// Runs: `vault write -field=signed_key <role> public_key=@<pubkey_path>`
187/// Writes the signed certificate to `~/.purple/certs/<alias>-cert.pub`.
188///
189/// When `vault_addr` is `Some`, it is set as the `VAULT_ADDR` env var on the
190/// `vault` subprocess, overriding whatever the parent shell has configured.
191/// When `None`, the subprocess inherits the parent's env (current behavior).
192/// This lets purple users configure Vault address at the provider or host
193/// level without needing to launch purple from a pre-exported shell.
194pub fn sign_certificate(
195    role: &str,
196    pubkey_path: &Path,
197    alias: &str,
198    vault_addr: Option<&str>,
199) -> Result<SignResult> {
200    if !pubkey_path.exists() {
201        anyhow::bail!(
202            "Public key not found: {}. Set IdentityFile on the host or ensure ~/.ssh/id_ed25519.pub exists.",
203            pubkey_path.display()
204        );
205    }
206
207    if !is_valid_role(role) {
208        anyhow::bail!("Invalid Vault SSH role: '{}'", role);
209    }
210
211    let cert_dest = cert_path_for(alias)?;
212
213    if let Some(parent) = cert_dest.parent() {
214        std::fs::create_dir_all(parent)
215            .with_context(|| crate::messages::vault_create_dir_failed(&parent.display()))?;
216    }
217
218    // The Vault CLI receives the public key path as a UTF-8 argument. `Path::display()`
219    // is lossy on non-UTF8 paths and could produce a mangled path Vault would then fail
220    // to read. Require a valid UTF-8 path and fail fast with a clear message.
221    let pubkey_str = pubkey_path.to_str().context(
222        "public key path contains non-UTF8 bytes; vault CLI requires a valid UTF-8 path",
223    )?;
224    // The Vault CLI parses arguments as `key=value` KV pairs. A path containing
225    // `=` would be split mid-argument and produce a cryptic parse error. The
226    // check runs on the already-resolved (tilde-expanded) path because that is
227    // exactly the byte sequence the CLI will see. A user with a `$HOME` path
228    // that itself contains `=` will hit this early; the error message reports
229    // the expanded path so they can rename the offending directory.
230    if pubkey_str.contains('=') {
231        anyhow::bail!(
232            "Public key path '{}' contains '=' which is not supported by the Vault CLI argument format. Rename the key file or directory.",
233            pubkey_str
234        );
235    }
236    let pubkey_arg = format!("public_key=@{}", pubkey_str);
237    debug!(
238        "[external] Vault sign request: addr={} role={}",
239        vault_addr.unwrap_or("<env>"),
240        role
241    );
242    let mut cmd = Command::new("vault");
243    cmd.args(["write", "-field=signed_key", role, &pubkey_arg]);
244    // Override VAULT_ADDR for this subprocess only when a value was resolved
245    // from config. Otherwise leave the env untouched so `vault` keeps using
246    // whatever the parent shell (or `~/.vault-token`) provides. The caller
247    // (typically `resolve_vault_addr`) is expected to have validated and
248    // trimmed the value already — re-checking here is cheap belt-and-braces
249    // for callers that construct the `Option<&str>` manually.
250    if let Some(addr) = vault_addr {
251        anyhow::ensure!(
252            is_valid_vault_addr(addr),
253            "Invalid VAULT_ADDR '{}' for role '{}'. Check the Vault SSH Address field.",
254            addr,
255            role
256        );
257        cmd.env("VAULT_ADDR", addr);
258    }
259    let mut child = cmd
260        .stdout(std::process::Stdio::piped())
261        .stderr(std::process::Stdio::piped())
262        .spawn()
263        .context("Failed to run vault CLI. Is vault installed and in PATH?")?;
264
265    // Drain both pipes on background threads to prevent pipe-buffer deadlock.
266    // Without this, the vault CLI can block writing to a full stderr pipe
267    // (64 KB) while we poll try_wait, causing a false timeout.
268    let stdout_handle = child.stdout.take();
269    let stderr_handle = child.stderr.take();
270    let stdout_thread = std::thread::spawn(move || -> Vec<u8> {
271        let mut buf = Vec::new();
272        if let Some(mut h) = stdout_handle {
273            if let Err(e) = std::io::Read::read_to_end(&mut h, &mut buf) {
274                log::warn!("[external] Failed to read vault stdout pipe: {e}");
275            }
276        }
277        buf
278    });
279    let stderr_thread = std::thread::spawn(move || -> Vec<u8> {
280        let mut buf = Vec::new();
281        if let Some(mut h) = stderr_handle {
282            if let Err(e) = std::io::Read::read_to_end(&mut h, &mut buf) {
283                log::warn!("[external] Failed to read vault stderr pipe: {e}");
284            }
285        }
286        buf
287    });
288
289    // Wait up to 30 seconds for the vault CLI to complete. Without a timeout
290    // the thread blocks indefinitely when the Vault server is unreachable
291    // (e.g. wrong address, firewall, TLS handshake hanging).
292    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(30);
293    let status = loop {
294        match child.try_wait() {
295            Ok(Some(s)) => break s,
296            Ok(None) => {
297                if std::time::Instant::now() >= deadline {
298                    let _ = child.kill();
299                    let _ = child.wait();
300                    // The pipe-drain threads (stdout_thread, stderr_thread)
301                    // are dropped without joining here. This is intentional:
302                    // kill() closes the child's pipe ends, so read_to_end
303                    // returns immediately and the threads self-terminate.
304                    error!(
305                        "[external] Vault unreachable: {}: timed out after 30s",
306                        vault_addr.unwrap_or("<env>")
307                    );
308                    anyhow::bail!("Vault SSH timed out. Server unreachable.");
309                }
310                std::thread::sleep(std::time::Duration::from_millis(100));
311            }
312            Err(e) => {
313                let _ = child.kill();
314                let _ = child.wait();
315                anyhow::bail!("Failed to wait for vault CLI: {}", e);
316            }
317        }
318    };
319
320    let stdout_bytes = stdout_thread.join().unwrap_or_default();
321    let stderr_bytes = stderr_thread.join().unwrap_or_default();
322    let output = std::process::Output {
323        status,
324        stdout: stdout_bytes,
325        stderr: stderr_bytes,
326    };
327
328    if !output.status.success() {
329        let stderr = String::from_utf8_lossy(&output.stderr);
330        if stderr.contains("permission denied") || stderr.contains("403") {
331            error!(
332                "[external] Vault auth failed: permission denied (role={} addr={})",
333                role,
334                vault_addr.unwrap_or("<env>")
335            );
336            anyhow::bail!("Vault SSH permission denied. Check token and policy.");
337        }
338        if stderr.contains("missing client token") || stderr.contains("token expired") {
339            error!(
340                "[external] Vault auth failed: token missing or expired (role={} addr={})",
341                role,
342                vault_addr.unwrap_or("<env>")
343            );
344            anyhow::bail!("Vault SSH token missing or expired. Run `vault login`.");
345        }
346        // Check "connection refused" before "dial tcp" because Go's
347        // refused-connection error contains both substrings.
348        if stderr.contains("connection refused") {
349            error!(
350                "[external] Vault unreachable: {}: connection refused",
351                vault_addr.unwrap_or("<env>")
352            );
353            anyhow::bail!("Vault SSH connection refused.");
354        }
355        if stderr.contains("i/o timeout") || stderr.contains("dial tcp") {
356            error!(
357                "[external] Vault unreachable: {}: connection timed out",
358                vault_addr.unwrap_or("<env>")
359            );
360            anyhow::bail!("Vault SSH connection timed out.");
361        }
362        if stderr.contains("no such host") {
363            error!(
364                "[external] Vault unreachable: {}: no such host",
365                vault_addr.unwrap_or("<env>")
366            );
367            anyhow::bail!("Vault SSH host not found.");
368        }
369        if stderr.contains("server gave HTTP response to HTTPS client") {
370            error!(
371                "[external] Vault unreachable: {}: server returned HTTP on HTTPS connection",
372                vault_addr.unwrap_or("<env>")
373            );
374            anyhow::bail!("Vault SSH server uses HTTP, not HTTPS. Set address to http://.");
375        }
376        if stderr.contains("certificate signed by unknown authority")
377            || stderr.contains("tls:")
378            || stderr.contains("x509:")
379        {
380            error!(
381                "[external] Vault unreachable: {}: TLS error",
382                vault_addr.unwrap_or("<env>")
383            );
384            anyhow::bail!("Vault SSH TLS error. Check certificate or use http://.");
385        }
386        error!(
387            "[external] Vault SSH signing failed: {}",
388            scrub_vault_stderr(&stderr)
389        );
390        anyhow::bail!("Vault SSH failed: {}", scrub_vault_stderr(&stderr));
391    }
392
393    let signed_key = String::from_utf8_lossy(&output.stdout).trim().to_string();
394    if signed_key.is_empty() {
395        anyhow::bail!("Vault returned empty certificate for role '{}'", role);
396    }
397
398    crate::fs_util::atomic_write(&cert_dest, signed_key.as_bytes())
399        .with_context(|| crate::messages::vault_write_cert_failed(&cert_dest.display()))?;
400
401    info!("Vault SSH certificate signed for {}", alias);
402    Ok(SignResult {
403        cert_path: cert_dest,
404    })
405}
406
407/// Check the validity of an SSH certificate file via `ssh-keygen -L`.
408///
409/// Timezone note: `ssh-keygen -L` outputs local civil time, which `parse_ssh_datetime`
410/// converts to pseudo-epoch seconds. Rather than comparing against UTC `now` (which would
411/// be wrong in non-UTC zones), we compute the TTL from the parsed from/to difference
412/// (timezone-independent) and measure elapsed time since the cert file was written (UTC
413/// file mtime vs UTC now). This keeps both sides in the same reference frame.
414pub fn check_cert_validity(cert_path: &Path) -> CertStatus {
415    if !cert_path.exists() {
416        return CertStatus::Missing;
417    }
418
419    let output = match Command::new("ssh-keygen")
420        .args(["-L", "-f"])
421        .arg(cert_path)
422        .output()
423    {
424        Ok(o) => o,
425        Err(e) => return CertStatus::Invalid(crate::messages::vault_ssh_keygen_run_failed(&e)),
426    };
427
428    if !output.status.success() {
429        return CertStatus::Invalid("ssh-keygen could not read certificate".to_string());
430    }
431
432    let stdout = String::from_utf8_lossy(&output.stdout);
433
434    // Handle certificates signed with no expiration ("Valid: forever").
435    for line in stdout.lines() {
436        let t = line.trim();
437        if t == "Valid: forever" || t.starts_with("Valid: from ") && t.ends_with(" to forever") {
438            return CertStatus::Valid {
439                expires_at: i64::MAX,
440                remaining_secs: i64::MAX,
441                total_secs: i64::MAX,
442            };
443        }
444    }
445
446    for line in stdout.lines() {
447        if let Some((from, to)) = parse_valid_line(line) {
448            let ttl = to - from; // Correct regardless of timezone
449            // Defensive: a cert with to < from is malformed. Treat as Invalid
450            // rather than propagating a negative ttl into the cache and the
451            // renewal threshold calculation.
452            if ttl <= 0 {
453                return CertStatus::Invalid(
454                    "certificate has non-positive validity window".to_string(),
455                );
456            }
457
458            // Use file modification time as the signing timestamp (UTC)
459            let signed_at = match std::fs::metadata(cert_path)
460                .and_then(|m| m.modified())
461                .ok()
462                .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
463            {
464                Some(d) => d.as_secs() as i64,
465                None => {
466                    // Cannot determine file age. Treat as needing renewal.
467                    return CertStatus::Expired;
468                }
469            };
470
471            let now = match std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH) {
472                Ok(d) => d.as_secs() as i64,
473                Err(_) => {
474                    return CertStatus::Invalid("system clock before unix epoch".to_string());
475                }
476            };
477
478            let elapsed = now - signed_at;
479            let remaining = ttl - elapsed;
480
481            if remaining <= 0 {
482                return CertStatus::Expired;
483            }
484            let expires_at = now + remaining;
485            return CertStatus::Valid {
486                expires_at,
487                remaining_secs: remaining,
488                total_secs: ttl,
489            };
490        }
491    }
492
493    CertStatus::Invalid("No Valid: line found in certificate".to_string())
494}
495
496/// Parse "Valid: from YYYY-MM-DDTHH:MM:SS to YYYY-MM-DDTHH:MM:SS" from ssh-keygen -L.
497fn parse_valid_line(line: &str) -> Option<(i64, i64)> {
498    let trimmed = line.trim();
499    let rest = trimmed.strip_prefix("Valid:")?;
500    let rest = rest.trim();
501    let rest = rest.strip_prefix("from ")?;
502    let (from_str, rest) = rest.split_once(" to ")?;
503    let to_str = rest.trim();
504
505    let from = parse_ssh_datetime(from_str)?;
506    let to = parse_ssh_datetime(to_str)?;
507    Some((from, to))
508}
509
510/// Parse YYYY-MM-DDTHH:MM:SS to Unix epoch seconds.
511/// Note: ssh-keygen outputs local time. We use the same clock for comparison
512/// (SystemTime::now gives wall clock), so the relative difference is correct
513/// for TTL checks even though the absolute epoch may be off by the UTC offset.
514fn parse_ssh_datetime(s: &str) -> Option<i64> {
515    let s = s.trim();
516    if s.len() < 19 {
517        return None;
518    }
519    let year: i64 = s.get(0..4)?.parse().ok()?;
520    let month: i64 = s.get(5..7)?.parse().ok()?;
521    let day: i64 = s.get(8..10)?.parse().ok()?;
522    let hour: i64 = s.get(11..13)?.parse().ok()?;
523    let min: i64 = s.get(14..16)?.parse().ok()?;
524    let sec: i64 = s.get(17..19)?.parse().ok()?;
525
526    if s.as_bytes().get(4) != Some(&b'-')
527        || s.as_bytes().get(7) != Some(&b'-')
528        || s.as_bytes().get(10) != Some(&b'T')
529        || s.as_bytes().get(13) != Some(&b':')
530        || s.as_bytes().get(16) != Some(&b':')
531    {
532        return None;
533    }
534
535    if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
536        return None;
537    }
538    if !(0..=23).contains(&hour) || !(0..=59).contains(&min) || !(0..=59).contains(&sec) {
539        return None;
540    }
541
542    // Civil date to Unix epoch (same algorithm as chrono/time crates).
543    let mut y = year;
544    let m = if month <= 2 {
545        y -= 1;
546        month + 9
547    } else {
548        month - 3
549    };
550    let era = if y >= 0 { y } else { y - 399 } / 400;
551    let yoe = y - era * 400;
552    let doy = (153 * m + 2) / 5 + day - 1;
553    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
554    let days = era * 146097 + doe - 719468;
555
556    Some(days * 86400 + hour * 3600 + min * 60 + sec)
557}
558
559/// Check if a certificate needs renewal.
560///
561/// For certificates whose total validity window is shorter than
562/// `RENEWAL_THRESHOLD_SECS`, the fixed 5-minute threshold would flag a freshly
563/// signed cert as needing renewal immediately, causing an infinite re-sign loop.
564/// In that case we fall back to a proportional threshold (half the total).
565pub fn needs_renewal(status: &CertStatus) -> bool {
566    match status {
567        CertStatus::Missing | CertStatus::Expired | CertStatus::Invalid(_) => true,
568        CertStatus::Valid {
569            remaining_secs,
570            total_secs,
571            ..
572        } => {
573            let threshold = if *total_secs > 0 && *total_secs <= RENEWAL_THRESHOLD_SECS {
574                *total_secs / 2
575            } else {
576                RENEWAL_THRESHOLD_SECS
577            };
578            *remaining_secs < threshold
579        }
580    }
581}
582
583/// Ensure a valid certificate exists for a host. Signs a new one if needed.
584/// Checks at the CertificateFile path (or purple's default) before signing.
585pub fn ensure_cert(
586    role: &str,
587    pubkey_path: &Path,
588    alias: &str,
589    certificate_file: &str,
590    vault_addr: Option<&str>,
591) -> Result<PathBuf> {
592    let check_path = resolve_cert_path(alias, certificate_file)?;
593    let status = check_cert_validity(&check_path);
594
595    if !needs_renewal(&status) {
596        info!(
597            "Vault SSH certificate cache hit: alias={} role={} path={}",
598            alias,
599            role,
600            check_path.display()
601        );
602        return Ok(check_path);
603    }
604
605    log::debug!(
606        "Vault SSH certificate cache miss: alias={} role={} status={:?} -> signing",
607        alias,
608        role,
609        status
610    );
611    let result = sign_certificate(role, pubkey_path, alias, vault_addr)?;
612    Ok(result.cert_path)
613}
614
615/// Resolve the public key path for signing.
616/// Priority: host IdentityFile + ".pub" > ~/.ssh/id_ed25519.pub fallback.
617/// Returns an error when the user's home directory cannot be determined. Any
618/// IdentityFile pointing outside `$HOME` is rejected and falls back to the
619/// default `~/.ssh/id_ed25519.pub` to prevent reading arbitrary filesystem
620/// locations via a crafted IdentityFile directive.
621pub fn resolve_pubkey_path(identity_file: &str) -> Result<PathBuf> {
622    let home = dirs::home_dir().context("Could not determine home directory")?;
623    let fallback = home.join(".ssh/id_ed25519.pub");
624
625    if identity_file.is_empty() {
626        return Ok(fallback);
627    }
628
629    let expanded = if let Some(rest) = identity_file.strip_prefix("~/") {
630        home.join(rest)
631    } else {
632        PathBuf::from(identity_file)
633    };
634
635    // A purely lexical `starts_with(&home)` check can be bypassed by a symlink inside
636    // $HOME pointing to a path outside $HOME (e.g. ~/evil -> /etc). Canonicalize both
637    // sides so symlinks are resolved, then compare. If the expanded path does not yet
638    // exist (or canonicalize fails for any reason) we cannot safely reason about where
639    // it actually points, so fall back to the default key path.
640    let canonical_home = match std::fs::canonicalize(&home) {
641        Ok(p) => p,
642        Err(_) => return Ok(fallback),
643    };
644    if expanded.exists() {
645        match std::fs::canonicalize(&expanded) {
646            Ok(canonical) if canonical.starts_with(&canonical_home) => {}
647            _ => return Ok(fallback),
648        }
649    } else if !expanded.starts_with(&home) {
650        return Ok(fallback);
651    }
652
653    if expanded.extension().is_some_and(|ext| ext == "pub") {
654        Ok(expanded)
655    } else {
656        let mut s = expanded.into_os_string();
657        s.push(".pub");
658        Ok(PathBuf::from(s))
659    }
660}
661
662/// Resolve the effective vault role for a host.
663/// Priority: host-level vault_ssh > provider-level vault_role > None.
664///
665/// `provider_label` selects between multiple labeled configs of the same
666/// provider. None means a bare config (legacy 2-segment marker).
667pub fn resolve_vault_role(
668    host_vault_ssh: Option<&str>,
669    provider_name: Option<&str>,
670    provider_label: Option<&str>,
671    provider_config: &crate::providers::config::ProviderConfig,
672) -> Option<String> {
673    if let Some(role) = host_vault_ssh {
674        if !role.is_empty() {
675            return Some(role.to_string());
676        }
677    }
678
679    if let Some(name) = provider_name {
680        let id = crate::providers::config::ProviderConfigId {
681            provider: name.to_string(),
682            label: provider_label.map(|s| s.to_string()),
683        };
684        let section = provider_config
685            .section_by_id(&id)
686            .or_else(|| provider_config.section(name));
687        if let Some(section) = section {
688            if !section.vault_role.is_empty() {
689                return Some(section.vault_role.clone());
690            }
691        }
692    }
693
694    None
695}
696
697/// Resolve the effective Vault address for a host.
698///
699/// Precedence (highest wins): per-host `# purple:vault-addr` comment,
700/// provider `vault_addr=` setting, else None (caller falls back to the
701/// `vault` CLI's own env resolution).
702///
703/// Both layers are re-validated with `is_valid_vault_addr` even though the
704/// parser paths (`HostBlock::vault_addr()` and `ProviderConfig::parse`)
705/// already drop invalid values. This is defensive: a future caller that
706/// constructs a `HostEntry` or `ProviderSection` in-memory (tests, migration
707/// code, a new feature) won't be able to smuggle a malformed `VAULT_ADDR`
708/// into `sign_certificate` through this resolver.
709pub fn resolve_vault_addr(
710    host_vault_addr: Option<&str>,
711    provider_name: Option<&str>,
712    provider_label: Option<&str>,
713    provider_config: &crate::providers::config::ProviderConfig,
714) -> Option<String> {
715    if let Some(addr) = host_vault_addr {
716        let trimmed = addr.trim();
717        if !trimmed.is_empty() && is_valid_vault_addr(trimmed) {
718            return Some(normalize_vault_addr(trimmed));
719        }
720    }
721
722    if let Some(name) = provider_name {
723        let id = crate::providers::config::ProviderConfigId {
724            provider: name.to_string(),
725            label: provider_label.map(|s| s.to_string()),
726        };
727        let section = provider_config
728            .section_by_id(&id)
729            .or_else(|| provider_config.section(name));
730        if let Some(section) = section {
731            let trimmed = section.vault_addr.trim();
732            if !trimmed.is_empty() && is_valid_vault_addr(trimmed) {
733                return Some(normalize_vault_addr(trimmed));
734            }
735        }
736    }
737
738    None
739}
740
741/// Resolve the effective ProxyJump chain for an alias by asking ssh itself.
742///
743/// Uses `ssh -G -F <config> <alias>` so wildcard patterns and `Match` blocks
744/// contribute the same way they do at connect time. Without this, a host that
745/// inherits ProxyJump from a wildcard (e.g. `Host *prod*  ProxyJump bastion`)
746/// would look like it has no proxy when read from its own block alone.
747///
748/// Returns aliases in dependency order: proxies first, the target last. The
749/// target is always present, even when ssh resolution yields nothing. Cycles
750/// are broken with a visited set. Hosts referenced via ProxyJump that have no
751/// matching `Host` block in the config still appear in the chain so callers
752/// can decide what to do with them; existence is verified by the caller.
753pub fn resolve_proxy_chain(config_path: &Path, alias: &str) -> Vec<String> {
754    let mut chain: Vec<String> = Vec::new();
755    let mut visited: HashSet<String> = HashSet::new();
756    let mut queue: Vec<String> = vec![alias.to_string()];
757
758    while let Some(current) = queue.pop() {
759        if !visited.insert(current.clone()) {
760            continue;
761        }
762        chain.push(current.clone());
763
764        let output = Command::new("ssh")
765            .args(["-G", "-F"])
766            .arg(config_path)
767            .arg("--")
768            .arg(&current)
769            .output();
770
771        let Ok(output) = output else {
772            debug!("[external] ssh -G failed for {}: spawn error", current);
773            continue;
774        };
775        if !output.status.success() {
776            debug!(
777                "[external] ssh -G non-zero exit for {} (code {:?})",
778                current,
779                output.status.code()
780            );
781            continue;
782        }
783
784        let stdout = String::from_utf8_lossy(&output.stdout);
785        for line in stdout.lines() {
786            let lower = line.to_ascii_lowercase();
787            let Some(rest) = lower.strip_prefix("proxyjump ") else {
788                continue;
789            };
790            // ssh -G emits literal "none" when no proxy is configured.
791            if rest.trim() == "none" {
792                continue;
793            }
794            // Use the original-case slice for the value; ssh prints the
795            // proxyjump value verbatim after the lower-cased key.
796            // strip_prefix already guarantees line.len() >= "proxyjump ".len().
797            let value = &line["proxyjump ".len()..];
798            for jump in value.split(',') {
799                let host = parse_proxy_jump_host(jump.trim());
800                if !host.is_empty() {
801                    queue.push(host.to_string());
802                }
803            }
804        }
805    }
806
807    chain.reverse();
808    chain
809}
810
811/// Extract the host portion from a single `[user@]host[:port]` ProxyJump entry.
812/// Handles bracketed IPv6 hosts like `[::1]:22`.
813fn parse_proxy_jump_host(jump: &str) -> &str {
814    let trimmed = jump.trim();
815    let after_user = trimmed.rsplit_once('@').map(|(_, h)| h).unwrap_or(trimmed);
816    if let Some(rest) = after_user.strip_prefix('[') {
817        if let Some(end) = rest.find(']') {
818            return &rest[..end];
819        }
820    }
821    after_user.split(':').next().unwrap_or(after_user)
822}
823
824/// One row in the Keys-tab Vault SSH strip.
825#[derive(Debug, Clone, PartialEq, Eq)]
826pub struct ActiveCert {
827    /// Host alias the cert belongs to.
828    pub alias: String,
829    /// Role name from `# purple:vault-ssh <role>`.
830    pub role: String,
831    /// Seconds remaining on the cert.
832    pub remaining_secs: i64,
833    /// Total signed-cert validity window in seconds. Used by the gauge
834    /// to compute `remaining/total` for the fill ratio.
835    pub total_secs: i64,
836}
837
838/// True iff a host has any purple-managed Vault context: either an
839/// explicit `# purple:vault-ssh` role marker, or a `CertificateFile`
840/// directive pointing into `~/.purple/certs/`. The second branch covers
841/// users who sign certs directly with the `vault` CLI and wire them in
842/// via `CertificateFile` without setting the role marker.
843pub fn has_purple_vault_context(host: &HostEntry) -> bool {
844    host.vault_ssh.is_some() || cert_file_in_purple_dir(&host.certificate_file)
845}
846
847/// `CertificateFile` path looks like a purple-managed cert when it
848/// references the per-user `.purple/certs/` directory. We match on the
849/// substring so the check works regardless of whether the path is
850/// tilde-expanded or absolute.
851pub fn cert_file_in_purple_dir(cert_file: &str) -> bool {
852    !cert_file.is_empty() && cert_file.contains("/.purple/certs/")
853}
854
855/// True when any host has a purple-managed Vault context. The Keys-tab
856/// strip renders iff this returns true. Even hosts whose cert is not
857/// yet cached count, so the strip appears the moment the user
858/// configures their first Vault role or sets a cert path.
859pub fn vault_ssh_in_use(hosts: &[HostEntry]) -> bool {
860    hosts.iter().any(has_purple_vault_context)
861}
862
863/// Build the strip's row list from the cert cache. Hosts that have a
864/// configured role (or a purple-managed cert path) but no cached
865/// `Valid` status are omitted; the gauge has nothing to fill until the
866/// lazy cert check populates the cache. Sort: longest remaining first
867/// so the user sees healthy certs at the top and expiring ones at the
868/// bottom.
869pub fn active_certs_for_strip(
870    hosts: &[HostEntry],
871    cache: &HashMap<String, (Instant, CertStatus, Option<SystemTime>)>,
872) -> Vec<ActiveCert> {
873    // Recompute `remaining_secs` against the current wall clock instead
874    // of using the cached snapshot. The cached number was correct only
875    // at the moment the check ran; the strip is redrawn on every event
876    // tick (~20× per second), so deriving from `expires_at - now` gives
877    // a per-second countdown without re-running the cert validation.
878    let now = SystemTime::now()
879        .duration_since(SystemTime::UNIX_EPOCH)
880        .map(|d| d.as_secs() as i64)
881        .unwrap_or(0);
882    let mut rows: Vec<ActiveCert> = hosts
883        .iter()
884        .filter(|h| has_purple_vault_context(h))
885        .filter_map(|h| {
886            let role = h.vault_ssh.clone().unwrap_or_default();
887            match cache.get(&h.alias) {
888                Some((
889                    _,
890                    CertStatus::Valid {
891                        expires_at,
892                        remaining_secs,
893                        total_secs,
894                    },
895                    _,
896                )) => {
897                    // `expires_at == 0` is the demo sentinel for "no
898                    // wall clock"; fall back to the static cached value
899                    // so visual fixtures stay byte-deterministic.
900                    let live_remaining = if *expires_at == 0 {
901                        *remaining_secs
902                    } else {
903                        (*expires_at - now).max(0)
904                    };
905                    Some(ActiveCert {
906                        alias: h.alias.clone(),
907                        role,
908                        remaining_secs: live_remaining,
909                        total_secs: *total_secs,
910                    })
911                }
912                _ => None,
913            }
914        })
915        .collect();
916    rows.sort_by_key(|r| std::cmp::Reverse(r.remaining_secs));
917    rows
918}
919
920/// Compute the fill ratio (0.0..=1.0) for a Vault SSH cert TTL gauge.
921/// Clamped so a cert in renewal-overlap or one whose `total_secs` was
922/// recorded as `i64::MAX` ("Valid: forever") does not produce NaN.
923pub fn cert_fill_ratio(remaining_secs: i64, total_secs: i64) -> f32 {
924    if total_secs <= 0 || remaining_secs <= 0 {
925        return 0.0;
926    }
927    if total_secs == i64::MAX || remaining_secs >= total_secs {
928        return 1.0;
929    }
930    (remaining_secs as f32 / total_secs as f32).clamp(0.0, 1.0)
931}
932
933/// Format remaining certificate time for display.
934pub fn format_remaining(remaining_secs: i64) -> String {
935    if remaining_secs <= 0 {
936        return "expired".to_string();
937    }
938    let hours = remaining_secs / 3600;
939    let mins = (remaining_secs % 3600) / 60;
940    if hours > 0 {
941        format!("{}h {}m", hours, mins)
942    } else {
943        format!("{}m", mins)
944    }
945}
946
947// Visible to sibling test modules (`main_tests.rs`) so they can share
948// `PATH_LOCK` and other process-global mocking helpers without spawning
949// a second lock that would race against this one.
950#[cfg(test)]
951#[path = "vault_ssh_tests.rs"]
952pub(crate) mod tests;