Skip to main content

purple_ssh/
vault_ssh.rs

1use anyhow::{Context, Result};
2use log::{debug, error, info};
3use std::collections::{HashMap, HashSet};
4use std::path::{Path, PathBuf};
5use std::process::Command;
6use std::time::{Instant, SystemTime};
7
8use crate::ssh_config::model::HostEntry;
9
10/// One host resolved to a Vault SSH role, ready for bulk signing.
11#[derive(Clone, PartialEq)]
12pub struct VaultSignTarget {
13    pub alias: String,
14    pub role: String,
15    pub certificate_file: String,
16    pub pubkey: std::path::PathBuf,
17    pub vault_addr: Option<String>,
18}
19
20/// Manual `Debug` so `vault_addr` (a Vault server hostname revealing
21/// infrastructure topology) never appears unredacted in `{:?}` output.
22impl std::fmt::Debug for VaultSignTarget {
23    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
24        f.debug_struct("VaultSignTarget")
25            .field("alias", &self.alias)
26            .field("role", &self.role)
27            .field("certificate_file", &self.certificate_file)
28            .field("pubkey", &self.pubkey)
29            .field(
30                "vault_addr",
31                &self.vault_addr.as_ref().map(|_| "<redacted>"),
32            )
33            .finish()
34    }
35}
36
37/// Result of a certificate signing operation.
38#[derive(Debug)]
39pub struct SignResult {
40    pub cert_path: PathBuf,
41}
42
43/// Certificate validity status.
44#[derive(Debug, Clone, PartialEq)]
45pub enum CertStatus {
46    Valid {
47        expires_at: i64,
48        remaining_secs: i64,
49        /// Total certificate validity window in seconds (to - from), used by
50        /// the UI to compute proportional freshness thresholds.
51        total_secs: i64,
52    },
53    Expired,
54    Missing,
55    Invalid(String),
56}
57
58/// Minimum remaining seconds before a cert needs renewal (5 minutes).
59pub const RENEWAL_THRESHOLD_SECS: i64 = 300;
60
61/// TTL (in seconds) for the in-memory cert status cache before we re-run
62/// `ssh-keygen -L` against an on-disk certificate. Distinct from
63/// `RENEWAL_THRESHOLD_SECS`: this controls how often we *re-check* a cert's
64/// validity, while `RENEWAL_THRESHOLD_SECS` is the minimum lifetime below which
65/// we actually request a new signature from Vault.
66pub const CERT_STATUS_CACHE_TTL_SECS: u64 = 300;
67
68/// Shorter TTL for cached `CertStatus::Invalid` entries produced by check
69/// failures (e.g. unresolvable cert path). Error entries use this backoff
70/// instead of the 5-minute re-check TTL so transient errors recover quickly
71/// without hammering the background check thread on every poll tick.
72pub const CERT_ERROR_BACKOFF_SECS: u64 = 30;
73
74/// Validate a Vault SSH role path. Accepts ASCII alphanumerics plus `/`, `_` and `-`.
75/// Rejects empty strings and values longer than 128 chars.
76pub fn is_valid_role(s: &str) -> bool {
77    !s.is_empty()
78        && s.len() <= 128
79        && s.chars()
80            .all(|c| c.is_ascii_alphanumeric() || c == '/' || c == '_' || c == '-')
81}
82
83/// Validate a `VAULT_ADDR` value passed to the Vault CLI as an env var.
84///
85/// Intentionally minimal: reject empty, control characters and whitespace.
86/// We do NOT try to parse the URL here — a typo just produces a Vault CLI
87/// error, which is fine. The 512-byte ceiling prevents a pathological config
88/// line from ballooning the environment block.
89pub fn is_valid_vault_addr(s: &str) -> bool {
90    let trimmed = s.trim();
91    !trimmed.is_empty()
92        && trimmed.len() <= 512
93        && !trimmed.chars().any(|c| c.is_control() || c.is_whitespace())
94}
95
96/// Normalize a vault address so bare IPs and hostnames work.
97/// Prepends `https://` when no scheme is present and appends a default
98/// port when none is specified: `:80` for `http://`, `:443` for
99/// `https://`, `:8200` for bare hostnames (Vault's default). The
100/// default scheme is `https://` because production Vault always uses
101/// TLS. Dev-mode users can set `http://` explicitly.
102pub fn normalize_vault_addr(s: &str) -> String {
103    let trimmed = s.trim();
104    // Case-insensitive scheme detection.
105    let lower = trimmed.to_ascii_lowercase();
106    let (with_scheme, scheme_len) = if lower.starts_with("http://") || lower.starts_with("https://")
107    {
108        let len = if lower.starts_with("https://") { 8 } else { 7 };
109        (trimmed.to_string(), len)
110    } else if trimmed.contains("://") {
111        // Unknown scheme (ftp://, etc.) — return as-is, let the CLI error.
112        return trimmed.to_string();
113    } else {
114        (format!("https://{}", trimmed), 8)
115    };
116    // Extract the authority (host[:port]) portion, ignoring any path/query.
117    let after_scheme = &with_scheme[scheme_len..];
118    let authority = after_scheme.split('/').next().unwrap_or(after_scheme);
119    // IPv6 addresses use [::1]:port syntax. A colon inside brackets is not a
120    // port separator.
121    let has_port = if let Some(bracket_end) = authority.rfind(']') {
122        authority[bracket_end..].contains(':')
123    } else {
124        authority.contains(':')
125    };
126    if has_port {
127        with_scheme
128    } else {
129        // Use the scheme's standard port when the user typed an explicit scheme,
130        // otherwise fall back to Vault's default port (8200).
131        let default_port = if lower.starts_with("http://") {
132            80
133        } else if lower.starts_with("https://") {
134            443
135        } else {
136            8200
137        };
138        let path_start = scheme_len + authority.len();
139        format!(
140            "{}:{}{}",
141            &with_scheme[..path_start],
142            default_port,
143            &with_scheme[path_start..]
144        )
145    }
146}
147
148/// Scrub a raw Vault CLI stderr for display. Drops lines containing credential-like
149/// tokens (token, secret, x-vault-, cookie, authorization), joins the rest with spaces
150/// and truncates to 200 chars.
151pub fn scrub_vault_stderr(raw: &str) -> String {
152    let filtered: String = raw
153        .lines()
154        .filter(|line| {
155            let lower = line.to_ascii_lowercase();
156            !(lower.contains("token")
157                || lower.contains("secret")
158                || lower.contains("x-vault-")
159                || lower.contains("cookie")
160                || lower.contains("authorization"))
161        })
162        .collect::<Vec<_>>()
163        .join(" ");
164    let trimmed = filtered.trim();
165    if trimmed.is_empty() {
166        return "Vault SSH signing failed. Check vault status and policy".to_string();
167    }
168    if trimmed.chars().count() > 200 {
169        trimmed.chars().take(200).collect::<String>() + "..."
170    } else {
171        trimmed.to_string()
172    }
173}
174
175/// Return the certificate path for a given alias: `~/.purple/certs/<alias>-cert.pub`
176pub fn cert_path_for(alias: &str) -> Result<PathBuf> {
177    anyhow::ensure!(
178        !alias.is_empty()
179            && !alias.contains('/')
180            && !alias.contains('\\')
181            && !alias.contains(':')
182            && !alias.contains('\0')
183            && !alias.contains(".."),
184        "Invalid alias for cert path: '{}'",
185        alias
186    );
187    let dir = dirs::home_dir()
188        .context("Could not determine home directory")?
189        .join(".purple/certs");
190    Ok(dir.join(format!("{}-cert.pub", alias)))
191}
192
193/// Resolve the actual certificate file path for a host.
194/// Priority: CertificateFile directive > purple's default cert path.
195pub fn resolve_cert_path(alias: &str, certificate_file: &str) -> Result<PathBuf> {
196    if !certificate_file.is_empty() {
197        let expanded = if let Some(rest) = certificate_file.strip_prefix("~/") {
198            if let Some(home) = dirs::home_dir() {
199                home.join(rest)
200            } else {
201                PathBuf::from(certificate_file)
202            }
203        } else {
204            PathBuf::from(certificate_file)
205        };
206        Ok(expanded)
207    } else {
208        cert_path_for(alias)
209    }
210}
211
212/// Sign an SSH public key via Vault SSH secrets engine.
213/// Runs: `vault write -field=signed_key <role> public_key=@<pubkey_path>`
214/// Writes the signed certificate to `~/.purple/certs/<alias>-cert.pub`.
215///
216/// When `vault_addr` is `Some`, it is set as the `VAULT_ADDR` env var on the
217/// `vault` subprocess, overriding whatever the parent shell has configured.
218/// When `None`, the subprocess inherits the parent's env (current behavior).
219/// This lets purple users configure Vault address at the provider or host
220/// level without needing to launch purple from a pre-exported shell.
221pub fn sign_certificate(
222    role: &str,
223    pubkey_path: &Path,
224    alias: &str,
225    vault_addr: Option<&str>,
226) -> Result<SignResult> {
227    if !pubkey_path.exists() {
228        anyhow::bail!(
229            "Public key not found: {}. Set IdentityFile on the host or ensure ~/.ssh/id_ed25519.pub exists.",
230            pubkey_path.display()
231        );
232    }
233
234    if !is_valid_role(role) {
235        anyhow::bail!("Invalid Vault SSH role: '{}'", role);
236    }
237
238    let cert_dest = cert_path_for(alias)?;
239
240    if let Some(parent) = cert_dest.parent() {
241        std::fs::create_dir_all(parent)
242            .with_context(|| crate::messages::vault_create_dir_failed(&parent.display()))?;
243    }
244
245    // The Vault CLI receives the public key path as a UTF-8 argument. `Path::display()`
246    // is lossy on non-UTF8 paths and could produce a mangled path Vault would then fail
247    // to read. Require a valid UTF-8 path and fail fast with a clear message.
248    let pubkey_str = pubkey_path.to_str().context(
249        "public key path contains non-UTF8 bytes; vault CLI requires a valid UTF-8 path",
250    )?;
251    // The Vault CLI parses arguments as `key=value` KV pairs. A path containing
252    // `=` would be split mid-argument and produce a cryptic parse error. The
253    // check runs on the already-resolved (tilde-expanded) path because that is
254    // exactly the byte sequence the CLI will see. A user with a `$HOME` path
255    // that itself contains `=` will hit this early; the error message reports
256    // the expanded path so they can rename the offending directory.
257    if pubkey_str.contains('=') {
258        anyhow::bail!(
259            "Public key path '{}' contains '=' which is not supported by the Vault CLI argument format. Rename the key file or directory.",
260            pubkey_str
261        );
262    }
263    let pubkey_arg = format!("public_key=@{}", pubkey_str);
264    debug!(
265        "[external] Vault sign request: addr={} role={}",
266        vault_addr.unwrap_or("<env>"),
267        role
268    );
269    let mut cmd = Command::new("vault");
270    cmd.args(["write", "-field=signed_key", role, &pubkey_arg]);
271    // Override VAULT_ADDR for this subprocess only when a value was resolved
272    // from config. Otherwise leave the env untouched so `vault` keeps using
273    // whatever the parent shell (or `~/.vault-token`) provides. The caller
274    // (typically `resolve_vault_addr`) is expected to have validated and
275    // trimmed the value already — re-checking here is cheap belt-and-braces
276    // for callers that construct the `Option<&str>` manually.
277    if let Some(addr) = vault_addr {
278        anyhow::ensure!(
279            is_valid_vault_addr(addr),
280            "Invalid VAULT_ADDR '{}' for role '{}'. Check the Vault SSH Address field.",
281            addr,
282            role
283        );
284        cmd.env("VAULT_ADDR", addr);
285    }
286    let mut child = cmd
287        .stdout(std::process::Stdio::piped())
288        .stderr(std::process::Stdio::piped())
289        .spawn()
290        .context("Failed to run vault CLI. Is vault installed and in PATH?")?;
291
292    // Drain both pipes on background threads to prevent pipe-buffer deadlock.
293    // Without this, the vault CLI can block writing to a full stderr pipe
294    // (64 KB) while we poll try_wait, causing a false timeout.
295    let stdout_handle = child.stdout.take();
296    let stderr_handle = child.stderr.take();
297    let stdout_thread = std::thread::spawn(move || -> Vec<u8> {
298        let mut buf = Vec::new();
299        if let Some(mut h) = stdout_handle {
300            if let Err(e) = std::io::Read::read_to_end(&mut h, &mut buf) {
301                log::warn!("[external] Failed to read vault stdout pipe: {e}");
302            }
303        }
304        buf
305    });
306    let stderr_thread = std::thread::spawn(move || -> Vec<u8> {
307        let mut buf = Vec::new();
308        if let Some(mut h) = stderr_handle {
309            if let Err(e) = std::io::Read::read_to_end(&mut h, &mut buf) {
310                log::warn!("[external] Failed to read vault stderr pipe: {e}");
311            }
312        }
313        buf
314    });
315
316    // Wait up to 30 seconds for the vault CLI to complete. Without a timeout
317    // the thread blocks indefinitely when the Vault server is unreachable
318    // (e.g. wrong address, firewall, TLS handshake hanging).
319    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(30);
320    let status = loop {
321        match child.try_wait() {
322            Ok(Some(s)) => break s,
323            Ok(None) => {
324                if std::time::Instant::now() >= deadline {
325                    let _ = child.kill();
326                    let _ = child.wait();
327                    // The pipe-drain threads (stdout_thread, stderr_thread)
328                    // are dropped without joining here. This is intentional:
329                    // kill() closes the child's pipe ends, so read_to_end
330                    // returns immediately and the threads self-terminate.
331                    error!(
332                        "[external] Vault unreachable: {}: timed out after 30s",
333                        vault_addr.unwrap_or("<env>")
334                    );
335                    anyhow::bail!("Vault SSH timed out. Server unreachable.");
336                }
337                std::thread::sleep(std::time::Duration::from_millis(100));
338            }
339            Err(e) => {
340                let _ = child.kill();
341                let _ = child.wait();
342                anyhow::bail!("Failed to wait for vault CLI: {}", e);
343            }
344        }
345    };
346
347    let stdout_bytes = stdout_thread.join().unwrap_or_default();
348    let stderr_bytes = stderr_thread.join().unwrap_or_default();
349    let output = std::process::Output {
350        status,
351        stdout: stdout_bytes,
352        stderr: stderr_bytes,
353    };
354
355    if !output.status.success() {
356        let stderr = String::from_utf8_lossy(&output.stderr);
357        if stderr.contains("permission denied") || stderr.contains("403") {
358            error!(
359                "[external] Vault auth failed: permission denied (role={} addr={})",
360                role,
361                vault_addr.unwrap_or("<env>")
362            );
363            anyhow::bail!("Vault SSH permission denied. Check token and policy.");
364        }
365        if stderr.contains("missing client token") || stderr.contains("token expired") {
366            error!(
367                "[external] Vault auth failed: token missing or expired (role={} addr={})",
368                role,
369                vault_addr.unwrap_or("<env>")
370            );
371            anyhow::bail!("Vault SSH token missing or expired. Run `vault login`.");
372        }
373        // Check "connection refused" before "dial tcp" because Go's
374        // refused-connection error contains both substrings.
375        if stderr.contains("connection refused") {
376            error!(
377                "[external] Vault unreachable: {}: connection refused",
378                vault_addr.unwrap_or("<env>")
379            );
380            anyhow::bail!("Vault SSH connection refused.");
381        }
382        if stderr.contains("i/o timeout") || stderr.contains("dial tcp") {
383            error!(
384                "[external] Vault unreachable: {}: connection timed out",
385                vault_addr.unwrap_or("<env>")
386            );
387            anyhow::bail!("Vault SSH connection timed out.");
388        }
389        if stderr.contains("no such host") {
390            error!(
391                "[external] Vault unreachable: {}: no such host",
392                vault_addr.unwrap_or("<env>")
393            );
394            anyhow::bail!("Vault SSH host not found.");
395        }
396        if stderr.contains("server gave HTTP response to HTTPS client") {
397            error!(
398                "[external] Vault unreachable: {}: server returned HTTP on HTTPS connection",
399                vault_addr.unwrap_or("<env>")
400            );
401            anyhow::bail!("Vault SSH server uses HTTP, not HTTPS. Set address to http://.");
402        }
403        if stderr.contains("certificate signed by unknown authority")
404            || stderr.contains("tls:")
405            || stderr.contains("x509:")
406        {
407            error!(
408                "[external] Vault unreachable: {}: TLS error",
409                vault_addr.unwrap_or("<env>")
410            );
411            anyhow::bail!("Vault SSH TLS error. Check certificate or use http://.");
412        }
413        error!(
414            "[external] Vault SSH signing failed: {}",
415            scrub_vault_stderr(&stderr)
416        );
417        anyhow::bail!("Vault SSH failed: {}", scrub_vault_stderr(&stderr));
418    }
419
420    let signed_key = String::from_utf8_lossy(&output.stdout).trim().to_string();
421    if signed_key.is_empty() {
422        anyhow::bail!("Vault returned empty certificate for role '{}'", role);
423    }
424
425    crate::fs_util::atomic_write(&cert_dest, signed_key.as_bytes())
426        .with_context(|| crate::messages::vault_write_cert_failed(&cert_dest.display()))?;
427
428    info!("Vault SSH certificate signed for {}", alias);
429    Ok(SignResult {
430        cert_path: cert_dest,
431    })
432}
433
434/// Check the validity of an SSH certificate file via `ssh-keygen -L`.
435///
436/// Timezone note: `ssh-keygen -L` outputs local civil time, which `parse_ssh_datetime`
437/// converts to pseudo-epoch seconds. Rather than comparing against UTC `now` (which would
438/// be wrong in non-UTC zones), we compute the TTL from the parsed from/to difference
439/// (timezone-independent) and measure elapsed time since the cert file was written (UTC
440/// file mtime vs UTC now). This keeps both sides in the same reference frame.
441pub fn check_cert_validity(cert_path: &Path) -> CertStatus {
442    if !cert_path.exists() {
443        return CertStatus::Missing;
444    }
445
446    let output = match Command::new("ssh-keygen")
447        .args(["-L", "-f"])
448        .arg(cert_path)
449        .output()
450    {
451        Ok(o) => o,
452        Err(e) => return CertStatus::Invalid(crate::messages::vault_ssh_keygen_run_failed(&e)),
453    };
454
455    if !output.status.success() {
456        return CertStatus::Invalid("ssh-keygen could not read certificate".to_string());
457    }
458
459    let stdout = String::from_utf8_lossy(&output.stdout);
460
461    // Handle certificates signed with no expiration ("Valid: forever").
462    for line in stdout.lines() {
463        let t = line.trim();
464        if t == "Valid: forever" || t.starts_with("Valid: from ") && t.ends_with(" to forever") {
465            return CertStatus::Valid {
466                expires_at: i64::MAX,
467                remaining_secs: i64::MAX,
468                total_secs: i64::MAX,
469            };
470        }
471    }
472
473    for line in stdout.lines() {
474        if let Some((from, to)) = parse_valid_line(line) {
475            let ttl = to - from; // Correct regardless of timezone
476            // Defensive: a cert with to < from is malformed. Treat as Invalid
477            // rather than propagating a negative ttl into the cache and the
478            // renewal threshold calculation.
479            if ttl <= 0 {
480                return CertStatus::Invalid(
481                    "certificate has non-positive validity window".to_string(),
482                );
483            }
484
485            // Use file modification time as the signing timestamp (UTC)
486            let signed_at = match std::fs::metadata(cert_path)
487                .and_then(|m| m.modified())
488                .ok()
489                .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
490            {
491                Some(d) => d.as_secs() as i64,
492                None => {
493                    // Cannot determine file age. Treat as needing renewal.
494                    return CertStatus::Expired;
495                }
496            };
497
498            let now = match std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH) {
499                Ok(d) => d.as_secs() as i64,
500                Err(_) => {
501                    return CertStatus::Invalid("system clock before unix epoch".to_string());
502                }
503            };
504
505            let elapsed = now - signed_at;
506            let remaining = ttl - elapsed;
507
508            if remaining <= 0 {
509                return CertStatus::Expired;
510            }
511            let expires_at = now + remaining;
512            return CertStatus::Valid {
513                expires_at,
514                remaining_secs: remaining,
515                total_secs: ttl,
516            };
517        }
518    }
519
520    CertStatus::Invalid("No Valid: line found in certificate".to_string())
521}
522
523/// Parse "Valid: from YYYY-MM-DDTHH:MM:SS to YYYY-MM-DDTHH:MM:SS" from ssh-keygen -L.
524fn parse_valid_line(line: &str) -> Option<(i64, i64)> {
525    let trimmed = line.trim();
526    let rest = trimmed.strip_prefix("Valid:")?;
527    let rest = rest.trim();
528    let rest = rest.strip_prefix("from ")?;
529    let (from_str, rest) = rest.split_once(" to ")?;
530    let to_str = rest.trim();
531
532    let from = parse_ssh_datetime(from_str)?;
533    let to = parse_ssh_datetime(to_str)?;
534    Some((from, to))
535}
536
537/// Parse YYYY-MM-DDTHH:MM:SS to Unix epoch seconds.
538/// Note: ssh-keygen outputs local time. We use the same clock for comparison
539/// (SystemTime::now gives wall clock), so the relative difference is correct
540/// for TTL checks even though the absolute epoch may be off by the UTC offset.
541fn parse_ssh_datetime(s: &str) -> Option<i64> {
542    let s = s.trim();
543    if s.len() < 19 {
544        return None;
545    }
546    let year: i64 = s.get(0..4)?.parse().ok()?;
547    let month: i64 = s.get(5..7)?.parse().ok()?;
548    let day: i64 = s.get(8..10)?.parse().ok()?;
549    let hour: i64 = s.get(11..13)?.parse().ok()?;
550    let min: i64 = s.get(14..16)?.parse().ok()?;
551    let sec: i64 = s.get(17..19)?.parse().ok()?;
552
553    if s.as_bytes().get(4) != Some(&b'-')
554        || s.as_bytes().get(7) != Some(&b'-')
555        || s.as_bytes().get(10) != Some(&b'T')
556        || s.as_bytes().get(13) != Some(&b':')
557        || s.as_bytes().get(16) != Some(&b':')
558    {
559        return None;
560    }
561
562    if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
563        return None;
564    }
565    if !(0..=23).contains(&hour) || !(0..=59).contains(&min) || !(0..=59).contains(&sec) {
566        return None;
567    }
568
569    // Civil date to Unix epoch (same algorithm as chrono/time crates).
570    let mut y = year;
571    let m = if month <= 2 {
572        y -= 1;
573        month + 9
574    } else {
575        month - 3
576    };
577    let era = if y >= 0 { y } else { y - 399 } / 400;
578    let yoe = y - era * 400;
579    let doy = (153 * m + 2) / 5 + day - 1;
580    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
581    let days = era * 146097 + doe - 719468;
582
583    Some(days * 86400 + hour * 3600 + min * 60 + sec)
584}
585
586/// Check if a certificate needs renewal.
587///
588/// For certificates whose total validity window is shorter than
589/// `RENEWAL_THRESHOLD_SECS`, the fixed 5-minute threshold would flag a freshly
590/// signed cert as needing renewal immediately, causing an infinite re-sign loop.
591/// In that case we fall back to a proportional threshold (half the total).
592pub fn needs_renewal(status: &CertStatus) -> bool {
593    match status {
594        CertStatus::Missing | CertStatus::Expired | CertStatus::Invalid(_) => true,
595        CertStatus::Valid {
596            remaining_secs,
597            total_secs,
598            ..
599        } => {
600            let threshold = if *total_secs > 0 && *total_secs <= RENEWAL_THRESHOLD_SECS {
601                *total_secs / 2
602            } else {
603                RENEWAL_THRESHOLD_SECS
604            };
605            *remaining_secs < threshold
606        }
607    }
608}
609
610/// Ensure a valid certificate exists for a host. Signs a new one if needed.
611/// Checks at the CertificateFile path (or purple's default) before signing.
612pub fn ensure_cert(
613    role: &str,
614    pubkey_path: &Path,
615    alias: &str,
616    certificate_file: &str,
617    vault_addr: Option<&str>,
618) -> Result<PathBuf> {
619    let check_path = resolve_cert_path(alias, certificate_file)?;
620    let status = check_cert_validity(&check_path);
621
622    if !needs_renewal(&status) {
623        info!(
624            "Vault SSH certificate cache hit: alias={} role={} path={}",
625            alias,
626            role,
627            check_path.display()
628        );
629        return Ok(check_path);
630    }
631
632    log::debug!(
633        "Vault SSH certificate cache miss: alias={} role={} status={:?} -> signing",
634        alias,
635        role,
636        status
637    );
638    let result = sign_certificate(role, pubkey_path, alias, vault_addr)?;
639    Ok(result.cert_path)
640}
641
642/// Resolve the public key path for signing.
643/// Priority: host IdentityFile + ".pub" > ~/.ssh/id_ed25519.pub fallback.
644/// Returns an error when the user's home directory cannot be determined. Any
645/// IdentityFile pointing outside `$HOME` is rejected and falls back to the
646/// default `~/.ssh/id_ed25519.pub` to prevent reading arbitrary filesystem
647/// locations via a crafted IdentityFile directive.
648pub fn resolve_pubkey_path(identity_file: &str) -> Result<PathBuf> {
649    let home = dirs::home_dir().context("Could not determine home directory")?;
650    let fallback = home.join(".ssh/id_ed25519.pub");
651
652    if identity_file.is_empty() {
653        return Ok(fallback);
654    }
655
656    let expanded = if let Some(rest) = identity_file.strip_prefix("~/") {
657        home.join(rest)
658    } else {
659        PathBuf::from(identity_file)
660    };
661
662    // A purely lexical `starts_with(&home)` check can be bypassed by a symlink inside
663    // $HOME pointing to a path outside $HOME (e.g. ~/evil -> /etc). Canonicalize both
664    // sides so symlinks are resolved, then compare. If the expanded path does not yet
665    // exist (or canonicalize fails for any reason) we cannot safely reason about where
666    // it actually points, so fall back to the default key path.
667    let canonical_home = match std::fs::canonicalize(&home) {
668        Ok(p) => p,
669        Err(_) => return Ok(fallback),
670    };
671    if expanded.exists() {
672        match std::fs::canonicalize(&expanded) {
673            Ok(canonical) if canonical.starts_with(&canonical_home) => {}
674            _ => return Ok(fallback),
675        }
676    } else if !expanded.starts_with(&home) {
677        return Ok(fallback);
678    }
679
680    if expanded.extension().is_some_and(|ext| ext == "pub") {
681        Ok(expanded)
682    } else {
683        let mut s = expanded.into_os_string();
684        s.push(".pub");
685        Ok(PathBuf::from(s))
686    }
687}
688
689/// Resolve the effective vault role for a host.
690/// Priority: host-level vault_ssh > provider-level vault_role > None.
691///
692/// `provider_label` selects between multiple labeled configs of the same
693/// provider. None means a bare config (legacy 2-segment marker).
694pub fn resolve_vault_role(
695    host_vault_ssh: Option<&str>,
696    provider_name: Option<&str>,
697    provider_label: Option<&str>,
698    provider_config: &crate::providers::config::ProviderConfig,
699) -> Option<String> {
700    if let Some(role) = host_vault_ssh {
701        if !role.is_empty() {
702            return Some(role.to_string());
703        }
704    }
705
706    if let Some(name) = provider_name {
707        let id = crate::providers::config::ProviderConfigId {
708            provider: name.to_string(),
709            label: provider_label.map(|s| s.to_string()),
710        };
711        let section = provider_config
712            .section_by_id(&id)
713            .or_else(|| provider_config.section(name));
714        if let Some(section) = section {
715            if !section.vault_role.is_empty() {
716                return Some(section.vault_role.clone());
717            }
718        }
719    }
720
721    None
722}
723
724/// Resolve the effective Vault address for a host.
725///
726/// Precedence (highest wins): per-host `# purple:vault-addr` comment,
727/// provider `vault_addr=` setting, else None (caller falls back to the
728/// `vault` CLI's own env resolution).
729///
730/// Both layers are re-validated with `is_valid_vault_addr` even though the
731/// parser paths (`HostBlock::vault_addr()` and `ProviderConfig::parse`)
732/// already drop invalid values. This is defensive: a future caller that
733/// constructs a `HostEntry` or `ProviderSection` in-memory (tests, migration
734/// code, a new feature) won't be able to smuggle a malformed `VAULT_ADDR`
735/// into `sign_certificate` through this resolver.
736pub fn resolve_vault_addr(
737    host_vault_addr: Option<&str>,
738    provider_name: Option<&str>,
739    provider_label: Option<&str>,
740    provider_config: &crate::providers::config::ProviderConfig,
741) -> Option<String> {
742    if let Some(addr) = host_vault_addr {
743        let trimmed = addr.trim();
744        if !trimmed.is_empty() && is_valid_vault_addr(trimmed) {
745            return Some(normalize_vault_addr(trimmed));
746        }
747    }
748
749    if let Some(name) = provider_name {
750        let id = crate::providers::config::ProviderConfigId {
751            provider: name.to_string(),
752            label: provider_label.map(|s| s.to_string()),
753        };
754        let section = provider_config
755            .section_by_id(&id)
756            .or_else(|| provider_config.section(name));
757        if let Some(section) = section {
758            let trimmed = section.vault_addr.trim();
759            if !trimmed.is_empty() && is_valid_vault_addr(trimmed) {
760                return Some(normalize_vault_addr(trimmed));
761            }
762        }
763    }
764
765    None
766}
767
768/// Resolve the effective ProxyJump chain for an alias by asking ssh itself.
769///
770/// Uses `ssh -G -F <config> <alias>` so wildcard patterns and `Match` blocks
771/// contribute the same way they do at connect time. Without this, a host that
772/// inherits ProxyJump from a wildcard (e.g. `Host *prod*  ProxyJump bastion`)
773/// would look like it has no proxy when read from its own block alone.
774///
775/// Returns aliases in dependency order: proxies first, the target last. The
776/// target is always present, even when ssh resolution yields nothing. Cycles
777/// are broken with a visited set. Hosts referenced via ProxyJump that have no
778/// matching `Host` block in the config still appear in the chain so callers
779/// can decide what to do with them; existence is verified by the caller.
780pub fn resolve_proxy_chain(config_path: &Path, alias: &str) -> Vec<String> {
781    let mut chain: Vec<String> = Vec::new();
782    let mut visited: HashSet<String> = HashSet::new();
783    let mut queue: Vec<String> = vec![alias.to_string()];
784
785    while let Some(current) = queue.pop() {
786        if !visited.insert(current.clone()) {
787            continue;
788        }
789        chain.push(current.clone());
790
791        let output = Command::new("ssh")
792            .args(["-G", "-F"])
793            .arg(config_path)
794            .arg("--")
795            .arg(&current)
796            .output();
797
798        let Ok(output) = output else {
799            debug!("[external] ssh -G failed for {}: spawn error", current);
800            continue;
801        };
802        if !output.status.success() {
803            debug!(
804                "[external] ssh -G non-zero exit for {} (code {:?})",
805                current,
806                output.status.code()
807            );
808            continue;
809        }
810
811        let stdout = String::from_utf8_lossy(&output.stdout);
812        for line in stdout.lines() {
813            let lower = line.to_ascii_lowercase();
814            let Some(rest) = lower.strip_prefix("proxyjump ") else {
815                continue;
816            };
817            // ssh -G emits literal "none" when no proxy is configured.
818            if rest.trim() == "none" {
819                continue;
820            }
821            // Use the original-case slice for the value; ssh prints the
822            // proxyjump value verbatim after the lower-cased key.
823            // strip_prefix already guarantees line.len() >= "proxyjump ".len().
824            let value = &line["proxyjump ".len()..];
825            for jump in value.split(',') {
826                let host = parse_proxy_jump_host(jump.trim());
827                if !host.is_empty() {
828                    queue.push(host.to_string());
829                }
830            }
831        }
832    }
833
834    chain.reverse();
835    chain
836}
837
838/// Extract the host portion from a single `[user@]host[:port]` ProxyJump entry.
839/// Handles bracketed IPv6 hosts like `[::1]:22`.
840fn parse_proxy_jump_host(jump: &str) -> &str {
841    let trimmed = jump.trim();
842    let after_user = trimmed.rsplit_once('@').map(|(_, h)| h).unwrap_or(trimmed);
843    if let Some(rest) = after_user.strip_prefix('[') {
844        if let Some(end) = rest.find(']') {
845            return &rest[..end];
846        }
847    }
848    after_user.split(':').next().unwrap_or(after_user)
849}
850
851/// One row in the Keys-tab Vault SSH strip.
852#[derive(Debug, Clone, PartialEq, Eq)]
853pub struct ActiveCert {
854    /// Host alias the cert belongs to.
855    pub alias: String,
856    /// Role name from `# purple:vault-ssh <role>`.
857    pub role: String,
858    /// Seconds remaining on the cert.
859    pub remaining_secs: i64,
860    /// Total signed-cert validity window in seconds. Used by the gauge
861    /// to compute `remaining/total` for the fill ratio.
862    pub total_secs: i64,
863}
864
865/// True iff a host has any purple-managed Vault context: either an
866/// explicit `# purple:vault-ssh` role marker, or a `CertificateFile`
867/// directive pointing into `~/.purple/certs/`. The second branch covers
868/// users who sign certs directly with the `vault` CLI and wire them in
869/// via `CertificateFile` without setting the role marker.
870pub fn has_purple_vault_context(host: &HostEntry) -> bool {
871    host.vault_ssh.is_some() || cert_file_in_purple_dir(&host.certificate_file)
872}
873
874/// `CertificateFile` path looks like a purple-managed cert when it
875/// references the per-user `.purple/certs/` directory. We match on the
876/// substring so the check works regardless of whether the path is
877/// tilde-expanded or absolute.
878pub fn cert_file_in_purple_dir(cert_file: &str) -> bool {
879    !cert_file.is_empty() && cert_file.contains("/.purple/certs/")
880}
881
882/// True when any host has a purple-managed Vault context. The Keys-tab
883/// strip renders iff this returns true. Even hosts whose cert is not
884/// yet cached count, so the strip appears the moment the user
885/// configures their first Vault role or sets a cert path.
886pub fn vault_ssh_in_use(hosts: &[HostEntry]) -> bool {
887    hosts.iter().any(has_purple_vault_context)
888}
889
890/// Build the strip's row list from the cert cache. Hosts that have a
891/// configured role (or a purple-managed cert path) but no cached
892/// `Valid` status are omitted; the gauge has nothing to fill until the
893/// lazy cert check populates the cache. Sort: longest remaining first
894/// so the user sees healthy certs at the top and expiring ones at the
895/// bottom.
896pub fn active_certs_for_strip(
897    hosts: &[HostEntry],
898    cache: &HashMap<String, (Instant, CertStatus, Option<SystemTime>)>,
899) -> Vec<ActiveCert> {
900    // Recompute `remaining_secs` against the current wall clock instead
901    // of using the cached snapshot. The cached number was correct only
902    // at the moment the check ran; the strip is redrawn on every event
903    // tick (~20× per second), so deriving from `expires_at - now` gives
904    // a per-second countdown without re-running the cert validation.
905    let now = SystemTime::now()
906        .duration_since(SystemTime::UNIX_EPOCH)
907        .map(|d| d.as_secs() as i64)
908        .unwrap_or(0);
909    let mut rows: Vec<ActiveCert> = hosts
910        .iter()
911        .filter(|h| has_purple_vault_context(h))
912        .filter_map(|h| {
913            let role = h.vault_ssh.clone().unwrap_or_default();
914            match cache.get(&h.alias) {
915                Some((
916                    _,
917                    CertStatus::Valid {
918                        expires_at,
919                        remaining_secs,
920                        total_secs,
921                    },
922                    _,
923                )) => {
924                    // `expires_at == 0` is the demo sentinel for "no
925                    // wall clock"; fall back to the static cached value
926                    // so visual fixtures stay byte-deterministic.
927                    let live_remaining = if *expires_at == 0 {
928                        *remaining_secs
929                    } else {
930                        (*expires_at - now).max(0)
931                    };
932                    Some(ActiveCert {
933                        alias: h.alias.clone(),
934                        role,
935                        remaining_secs: live_remaining,
936                        total_secs: *total_secs,
937                    })
938                }
939                _ => None,
940            }
941        })
942        .collect();
943    rows.sort_by_key(|r| std::cmp::Reverse(r.remaining_secs));
944    rows
945}
946
947/// Compute the fill ratio (0.0..=1.0) for a Vault SSH cert TTL gauge.
948/// Clamped so a cert in renewal-overlap or one whose `total_secs` was
949/// recorded as `i64::MAX` ("Valid: forever") does not produce NaN.
950pub fn cert_fill_ratio(remaining_secs: i64, total_secs: i64) -> f32 {
951    if total_secs <= 0 || remaining_secs <= 0 {
952        return 0.0;
953    }
954    if total_secs == i64::MAX || remaining_secs >= total_secs {
955        return 1.0;
956    }
957    (remaining_secs as f32 / total_secs as f32).clamp(0.0, 1.0)
958}
959
960/// Format remaining certificate time for display.
961pub fn format_remaining(remaining_secs: i64) -> String {
962    if remaining_secs <= 0 {
963        return "expired".to_string();
964    }
965    let hours = remaining_secs / 3600;
966    let mins = (remaining_secs % 3600) / 60;
967    if hours > 0 {
968        format!("{}h {}m", hours, mins)
969    } else {
970        format!("{}m", mins)
971    }
972}
973
974// Visible to sibling test modules (`main_tests.rs`) so they can share
975// `ENV_LOCK` and other process-global mocking helpers without spawning
976// a second lock that would race against this one.
977#[cfg(test)]
978#[path = "vault_ssh_tests.rs"]
979pub(crate) mod tests;