Skip to main content

purple_ssh/
vault_ssh.rs

1use anyhow::{Context, Result};
2use log::{debug, error, info};
3use std::collections::{HashMap, HashSet};
4use std::path::{Path, PathBuf};
5use std::process::Command;
6use std::time::{Instant, SystemTime};
7
8use crate::ssh_config::model::HostEntry;
9
10/// One host resolved to a Vault SSH role, ready for bulk signing.
11#[derive(Clone, PartialEq)]
12pub struct VaultSignTarget {
13    pub alias: String,
14    pub role: String,
15    pub certificate_file: String,
16    pub pubkey: std::path::PathBuf,
17    pub vault_addr: Option<String>,
18}
19
20/// Manual `Debug` so `vault_addr` (a Vault server hostname revealing
21/// infrastructure topology) never appears unredacted in `{:?}` output.
22impl std::fmt::Debug for VaultSignTarget {
23    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
24        f.debug_struct("VaultSignTarget")
25            .field("alias", &self.alias)
26            .field("role", &self.role)
27            .field("certificate_file", &self.certificate_file)
28            .field("pubkey", &self.pubkey)
29            .field(
30                "vault_addr",
31                &self.vault_addr.as_ref().map(|_| "<redacted>"),
32            )
33            .finish()
34    }
35}
36
37/// Result of a certificate signing operation.
38#[derive(Debug)]
39pub struct SignResult {
40    pub cert_path: PathBuf,
41}
42
43/// Certificate validity status.
44#[derive(Debug, Clone, PartialEq)]
45pub enum CertStatus {
46    Valid {
47        expires_at: i64,
48        remaining_secs: i64,
49        /// Total certificate validity window in seconds (to - from), used by
50        /// the UI to compute proportional freshness thresholds.
51        total_secs: i64,
52    },
53    Expired,
54    Missing,
55    Invalid(String),
56}
57
58/// Minimum remaining seconds before a cert needs renewal (5 minutes).
59pub const RENEWAL_THRESHOLD_SECS: i64 = 300;
60
61/// TTL (in seconds) for the in-memory cert status cache before we re-run
62/// `ssh-keygen -L` against an on-disk certificate. Distinct from
63/// `RENEWAL_THRESHOLD_SECS`: this controls how often we *re-check* a cert's
64/// validity, while `RENEWAL_THRESHOLD_SECS` is the minimum lifetime below which
65/// we actually request a new signature from Vault.
66pub const CERT_STATUS_CACHE_TTL_SECS: u64 = 300;
67
68/// Shorter TTL for cached `CertStatus::Invalid` entries produced by check
69/// failures (e.g. unresolvable cert path). Error entries use this backoff
70/// instead of the 5-minute re-check TTL so transient errors recover quickly
71/// without hammering the background check thread on every poll tick.
72pub const CERT_ERROR_BACKOFF_SECS: u64 = 30;
73
74/// Validate a Vault SSH role path. Accepts ASCII alphanumerics plus `/`, `_` and `-`.
75/// Rejects empty strings and values longer than 128 chars.
76pub fn is_valid_role(s: &str) -> bool {
77    !s.is_empty()
78        && s.len() <= 128
79        && s.chars()
80            .all(|c| c.is_ascii_alphanumeric() || c == '/' || c == '_' || c == '-')
81}
82
83/// Validate a `VAULT_ADDR` value passed to the Vault CLI as an env var.
84///
85/// Intentionally minimal: reject empty, control characters and whitespace.
86/// We do NOT try to parse the URL here — a typo just produces a Vault CLI
87/// error, which is fine. The 512-byte ceiling prevents a pathological config
88/// line from ballooning the environment block.
89pub fn is_valid_vault_addr(s: &str) -> bool {
90    let trimmed = s.trim();
91    !trimmed.is_empty()
92        && trimmed.len() <= 512
93        && !trimmed.chars().any(|c| c.is_control() || c.is_whitespace())
94}
95
96/// Normalize a vault address so bare IPs and hostnames work.
97///
98/// Inputs with an explicit `http://` or `https://` scheme pass through
99/// unchanged: the user's port choice (including its absence) is honoured
100/// so the HTTP client follows the scheme default. Adding a redundant
101/// `:443` breaks strict `Host`-header ACLs on HAProxy and similar
102/// proxies once they drop to HTTP/1.1.
103///
104/// Bare hosts (no scheme) get `https://` prepended. A bare host without
105/// a port falls back to Vault's `:8200` default, matching the local dev
106/// pattern where `vault.local` or `192.168.1.10` is meant to point at a
107/// stock Vault server. With an explicit port (`host:9200`) the user's
108/// port wins.
109pub fn normalize_vault_addr(s: &str) -> String {
110    let trimmed = s.trim();
111    let lower = trimmed.to_ascii_lowercase();
112    if lower.starts_with("http://") || lower.starts_with("https://") {
113        return trimmed.to_string();
114    }
115    if trimmed.contains("://") {
116        return trimmed.to_string();
117    }
118    let scheme_len = 8;
119    let with_scheme = format!("https://{}", trimmed);
120    let after_scheme = &with_scheme[scheme_len..];
121    let authority = after_scheme.split('/').next().unwrap_or(after_scheme);
122    let has_port = if let Some(bracket_end) = authority.rfind(']') {
123        authority[bracket_end..].contains(':')
124    } else {
125        authority.contains(':')
126    };
127    if has_port {
128        with_scheme
129    } else {
130        let path_start = scheme_len + authority.len();
131        format!(
132            "{}:8200{}",
133            &with_scheme[..path_start],
134            &with_scheme[path_start..]
135        )
136    }
137}
138
139/// Scrub a raw Vault CLI stderr for display. Drops lines containing credential-like
140/// tokens (token, secret, x-vault-, cookie, authorization), joins the rest with spaces
141/// and truncates to 200 chars.
142pub fn scrub_vault_stderr(raw: &str) -> String {
143    let filtered: String = raw
144        .lines()
145        .filter(|line| {
146            let lower = line.to_ascii_lowercase();
147            !(lower.contains("token")
148                || lower.contains("secret")
149                || lower.contains("x-vault-")
150                || lower.contains("cookie")
151                || lower.contains("authorization"))
152        })
153        .collect::<Vec<_>>()
154        .join(" ");
155    let trimmed = filtered.trim();
156    if trimmed.is_empty() {
157        return "Vault SSH signing failed. Check vault status and policy".to_string();
158    }
159    if trimmed.chars().count() > 200 {
160        trimmed.chars().take(200).collect::<String>() + "..."
161    } else {
162        trimmed.to_string()
163    }
164}
165
166/// Return the certificate path for a given alias: `~/.purple/certs/<alias>-cert.pub`
167pub fn cert_path_for(alias: &str) -> Result<PathBuf> {
168    anyhow::ensure!(
169        !alias.is_empty()
170            && !alias.contains('/')
171            && !alias.contains('\\')
172            && !alias.contains(':')
173            && !alias.contains('\0')
174            && !alias.contains(".."),
175        "Invalid alias for cert path: '{}'",
176        alias
177    );
178    let dir = dirs::home_dir()
179        .context("Could not determine home directory")?
180        .join(".purple/certs");
181    Ok(dir.join(format!("{}-cert.pub", alias)))
182}
183
184/// Resolve the actual certificate file path for a host.
185/// Priority: CertificateFile directive > purple's default cert path.
186pub fn resolve_cert_path(alias: &str, certificate_file: &str) -> Result<PathBuf> {
187    if !certificate_file.is_empty() {
188        let expanded = if let Some(rest) = certificate_file.strip_prefix("~/") {
189            if let Some(home) = dirs::home_dir() {
190                home.join(rest)
191            } else {
192                PathBuf::from(certificate_file)
193            }
194        } else {
195            PathBuf::from(certificate_file)
196        };
197        Ok(expanded)
198    } else {
199        cert_path_for(alias)
200    }
201}
202
203/// Sign an SSH public key via Vault SSH secrets engine.
204/// Runs: `vault write -field=signed_key <role> public_key=@<pubkey_path>`
205/// Writes the signed certificate to `~/.purple/certs/<alias>-cert.pub`.
206///
207/// When `vault_addr` is `Some`, it is set as the `VAULT_ADDR` env var on the
208/// `vault` subprocess, overriding whatever the parent shell has configured.
209/// When `None`, the subprocess inherits the parent's env (current behavior).
210/// This lets purple users configure Vault address at the provider or host
211/// level without needing to launch purple from a pre-exported shell.
212pub fn sign_certificate(
213    role: &str,
214    pubkey_path: &Path,
215    alias: &str,
216    vault_addr: Option<&str>,
217) -> Result<SignResult> {
218    if !pubkey_path.exists() {
219        anyhow::bail!(
220            "Public key not found: {}. Set IdentityFile on the host or ensure ~/.ssh/id_ed25519.pub exists.",
221            pubkey_path.display()
222        );
223    }
224
225    if !is_valid_role(role) {
226        anyhow::bail!("Invalid Vault SSH role: '{}'", role);
227    }
228
229    let cert_dest = cert_path_for(alias)?;
230
231    if let Some(parent) = cert_dest.parent() {
232        std::fs::create_dir_all(parent)
233            .with_context(|| crate::messages::vault_create_dir_failed(&parent.display()))?;
234    }
235
236    // The Vault CLI receives the public key path as a UTF-8 argument. `Path::display()`
237    // is lossy on non-UTF8 paths and could produce a mangled path Vault would then fail
238    // to read. Require a valid UTF-8 path and fail fast with a clear message.
239    let pubkey_str = pubkey_path.to_str().context(
240        "public key path contains non-UTF8 bytes; vault CLI requires a valid UTF-8 path",
241    )?;
242    // The Vault CLI parses arguments as `key=value` KV pairs. A path containing
243    // `=` would be split mid-argument and produce a cryptic parse error. The
244    // check runs on the already-resolved (tilde-expanded) path because that is
245    // exactly the byte sequence the CLI will see. A user with a `$HOME` path
246    // that itself contains `=` will hit this early; the error message reports
247    // the expanded path so they can rename the offending directory.
248    if pubkey_str.contains('=') {
249        anyhow::bail!(
250            "Public key path '{}' contains '=' which is not supported by the Vault CLI argument format. Rename the key file or directory.",
251            pubkey_str
252        );
253    }
254    let pubkey_arg = format!("public_key=@{}", pubkey_str);
255    debug!(
256        "[external] Vault sign request: addr={} role={}",
257        vault_addr.unwrap_or("<env>"),
258        role
259    );
260    let mut cmd = Command::new("vault");
261    cmd.args(["write", "-field=signed_key", role, &pubkey_arg]);
262    // Override VAULT_ADDR for this subprocess only when a value was resolved
263    // from config. Otherwise leave the env untouched so `vault` keeps using
264    // whatever the parent shell (or `~/.vault-token`) provides. The caller
265    // (typically `resolve_vault_addr`) is expected to have validated and
266    // trimmed the value already — re-checking here is cheap belt-and-braces
267    // for callers that construct the `Option<&str>` manually.
268    if let Some(addr) = vault_addr {
269        anyhow::ensure!(
270            is_valid_vault_addr(addr),
271            "Invalid VAULT_ADDR '{}' for role '{}'. Check the Vault SSH Address field.",
272            addr,
273            role
274        );
275        cmd.env("VAULT_ADDR", addr);
276    }
277    let mut child = cmd
278        .stdout(std::process::Stdio::piped())
279        .stderr(std::process::Stdio::piped())
280        .spawn()
281        .context("Failed to run vault CLI. Is vault installed and in PATH?")?;
282
283    // Drain both pipes on background threads to prevent pipe-buffer deadlock.
284    // Without this, the vault CLI can block writing to a full stderr pipe
285    // (64 KB) while we poll try_wait, causing a false timeout.
286    let stdout_handle = child.stdout.take();
287    let stderr_handle = child.stderr.take();
288    let stdout_thread = std::thread::spawn(move || -> Vec<u8> {
289        let mut buf = Vec::new();
290        if let Some(mut h) = stdout_handle {
291            if let Err(e) = std::io::Read::read_to_end(&mut h, &mut buf) {
292                log::warn!("[external] Failed to read vault stdout pipe: {e}");
293            }
294        }
295        buf
296    });
297    let stderr_thread = std::thread::spawn(move || -> Vec<u8> {
298        let mut buf = Vec::new();
299        if let Some(mut h) = stderr_handle {
300            if let Err(e) = std::io::Read::read_to_end(&mut h, &mut buf) {
301                log::warn!("[external] Failed to read vault stderr pipe: {e}");
302            }
303        }
304        buf
305    });
306
307    // Wait up to 30 seconds for the vault CLI to complete. Without a timeout
308    // the thread blocks indefinitely when the Vault server is unreachable
309    // (e.g. wrong address, firewall, TLS handshake hanging).
310    let deadline = std::time::Instant::now() + std::time::Duration::from_secs(30);
311    let status = loop {
312        match child.try_wait() {
313            Ok(Some(s)) => break s,
314            Ok(None) => {
315                if std::time::Instant::now() >= deadline {
316                    let _ = child.kill();
317                    let _ = child.wait();
318                    // The pipe-drain threads (stdout_thread, stderr_thread)
319                    // are dropped without joining here. This is intentional:
320                    // kill() closes the child's pipe ends, so read_to_end
321                    // returns immediately and the threads self-terminate.
322                    error!(
323                        "[external] Vault unreachable: {}: timed out after 30s",
324                        vault_addr.unwrap_or("<env>")
325                    );
326                    anyhow::bail!("Vault SSH timed out. Server unreachable.");
327                }
328                std::thread::sleep(std::time::Duration::from_millis(100));
329            }
330            Err(e) => {
331                let _ = child.kill();
332                let _ = child.wait();
333                anyhow::bail!("Failed to wait for vault CLI: {}", e);
334            }
335        }
336    };
337
338    let stdout_bytes = stdout_thread.join().unwrap_or_default();
339    let stderr_bytes = stderr_thread.join().unwrap_or_default();
340    let output = std::process::Output {
341        status,
342        stdout: stdout_bytes,
343        stderr: stderr_bytes,
344    };
345
346    if !output.status.success() {
347        let stderr = String::from_utf8_lossy(&output.stderr);
348        if stderr.contains("permission denied") || stderr.contains("403") {
349            error!(
350                "[external] Vault auth failed: permission denied (role={} addr={})",
351                role,
352                vault_addr.unwrap_or("<env>")
353            );
354            anyhow::bail!("Vault SSH permission denied. Check token and policy.");
355        }
356        if stderr.contains("missing client token") || stderr.contains("token expired") {
357            error!(
358                "[external] Vault auth failed: token missing or expired (role={} addr={})",
359                role,
360                vault_addr.unwrap_or("<env>")
361            );
362            anyhow::bail!("Vault SSH token missing or expired. Run `vault login`.");
363        }
364        // Check "connection refused" before "dial tcp" because Go's
365        // refused-connection error contains both substrings.
366        if stderr.contains("connection refused") {
367            error!(
368                "[external] Vault unreachable: {}: connection refused",
369                vault_addr.unwrap_or("<env>")
370            );
371            anyhow::bail!("Vault SSH connection refused.");
372        }
373        if stderr.contains("i/o timeout") || stderr.contains("dial tcp") {
374            error!(
375                "[external] Vault unreachable: {}: connection timed out",
376                vault_addr.unwrap_or("<env>")
377            );
378            anyhow::bail!("Vault SSH connection timed out.");
379        }
380        if stderr.contains("no such host") {
381            error!(
382                "[external] Vault unreachable: {}: no such host",
383                vault_addr.unwrap_or("<env>")
384            );
385            anyhow::bail!("Vault SSH host not found.");
386        }
387        if stderr.contains("server gave HTTP response to HTTPS client") {
388            error!(
389                "[external] Vault unreachable: {}: server returned HTTP on HTTPS connection",
390                vault_addr.unwrap_or("<env>")
391            );
392            anyhow::bail!("Vault SSH server uses HTTP, not HTTPS. Set address to http://.");
393        }
394        if stderr.contains("certificate signed by unknown authority")
395            || stderr.contains("tls:")
396            || stderr.contains("x509:")
397        {
398            error!(
399                "[external] Vault unreachable: {}: TLS error",
400                vault_addr.unwrap_or("<env>")
401            );
402            anyhow::bail!("Vault SSH TLS error. Check certificate or use http://.");
403        }
404        error!(
405            "[external] Vault SSH signing failed: {}",
406            scrub_vault_stderr(&stderr)
407        );
408        anyhow::bail!("Vault SSH failed: {}", scrub_vault_stderr(&stderr));
409    }
410
411    let signed_key = String::from_utf8_lossy(&output.stdout).trim().to_string();
412    if signed_key.is_empty() {
413        anyhow::bail!("Vault returned empty certificate for role '{}'", role);
414    }
415
416    crate::fs_util::atomic_write(&cert_dest, signed_key.as_bytes())
417        .with_context(|| crate::messages::vault_write_cert_failed(&cert_dest.display()))?;
418
419    info!("Vault SSH certificate signed for {}", alias);
420    Ok(SignResult {
421        cert_path: cert_dest,
422    })
423}
424
425/// Check the validity of an SSH certificate file via `ssh-keygen -L`.
426///
427/// Timezone note: `ssh-keygen -L` outputs local civil time, which `parse_ssh_datetime`
428/// converts to pseudo-epoch seconds. Rather than comparing against UTC `now` (which would
429/// be wrong in non-UTC zones), we compute the TTL from the parsed from/to difference
430/// (timezone-independent) and measure elapsed time since the cert file was written (UTC
431/// file mtime vs UTC now). This keeps both sides in the same reference frame.
432pub fn check_cert_validity(cert_path: &Path) -> CertStatus {
433    if !cert_path.exists() {
434        return CertStatus::Missing;
435    }
436
437    let output = match Command::new("ssh-keygen")
438        .args(["-L", "-f"])
439        .arg(cert_path)
440        .output()
441    {
442        Ok(o) => o,
443        Err(e) => return CertStatus::Invalid(crate::messages::vault_ssh_keygen_run_failed(&e)),
444    };
445
446    if !output.status.success() {
447        return CertStatus::Invalid("ssh-keygen could not read certificate".to_string());
448    }
449
450    let stdout = String::from_utf8_lossy(&output.stdout);
451
452    // Handle certificates signed with no expiration ("Valid: forever").
453    for line in stdout.lines() {
454        let t = line.trim();
455        if t == "Valid: forever" || t.starts_with("Valid: from ") && t.ends_with(" to forever") {
456            return CertStatus::Valid {
457                expires_at: i64::MAX,
458                remaining_secs: i64::MAX,
459                total_secs: i64::MAX,
460            };
461        }
462    }
463
464    for line in stdout.lines() {
465        if let Some((from, to)) = parse_valid_line(line) {
466            let ttl = to - from; // Correct regardless of timezone
467            // Defensive: a cert with to < from is malformed. Treat as Invalid
468            // rather than propagating a negative ttl into the cache and the
469            // renewal threshold calculation.
470            if ttl <= 0 {
471                return CertStatus::Invalid(
472                    "certificate has non-positive validity window".to_string(),
473                );
474            }
475
476            // Use file modification time as the signing timestamp (UTC)
477            let signed_at = match std::fs::metadata(cert_path)
478                .and_then(|m| m.modified())
479                .ok()
480                .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
481            {
482                Some(d) => d.as_secs() as i64,
483                None => {
484                    // Cannot determine file age. Treat as needing renewal.
485                    return CertStatus::Expired;
486                }
487            };
488
489            let now = match std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH) {
490                Ok(d) => d.as_secs() as i64,
491                Err(_) => {
492                    return CertStatus::Invalid("system clock before unix epoch".to_string());
493                }
494            };
495
496            let elapsed = now - signed_at;
497            let remaining = ttl - elapsed;
498
499            if remaining <= 0 {
500                return CertStatus::Expired;
501            }
502            let expires_at = now + remaining;
503            return CertStatus::Valid {
504                expires_at,
505                remaining_secs: remaining,
506                total_secs: ttl,
507            };
508        }
509    }
510
511    CertStatus::Invalid("No Valid: line found in certificate".to_string())
512}
513
514/// Parse "Valid: from YYYY-MM-DDTHH:MM:SS to YYYY-MM-DDTHH:MM:SS" from ssh-keygen -L.
515fn parse_valid_line(line: &str) -> Option<(i64, i64)> {
516    let trimmed = line.trim();
517    let rest = trimmed.strip_prefix("Valid:")?;
518    let rest = rest.trim();
519    let rest = rest.strip_prefix("from ")?;
520    let (from_str, rest) = rest.split_once(" to ")?;
521    let to_str = rest.trim();
522
523    let from = parse_ssh_datetime(from_str)?;
524    let to = parse_ssh_datetime(to_str)?;
525    Some((from, to))
526}
527
528/// Parse YYYY-MM-DDTHH:MM:SS to Unix epoch seconds.
529/// Note: ssh-keygen outputs local time. We use the same clock for comparison
530/// (SystemTime::now gives wall clock), so the relative difference is correct
531/// for TTL checks even though the absolute epoch may be off by the UTC offset.
532fn parse_ssh_datetime(s: &str) -> Option<i64> {
533    let s = s.trim();
534    if s.len() < 19 {
535        return None;
536    }
537    let year: i64 = s.get(0..4)?.parse().ok()?;
538    let month: i64 = s.get(5..7)?.parse().ok()?;
539    let day: i64 = s.get(8..10)?.parse().ok()?;
540    let hour: i64 = s.get(11..13)?.parse().ok()?;
541    let min: i64 = s.get(14..16)?.parse().ok()?;
542    let sec: i64 = s.get(17..19)?.parse().ok()?;
543
544    if s.as_bytes().get(4) != Some(&b'-')
545        || s.as_bytes().get(7) != Some(&b'-')
546        || s.as_bytes().get(10) != Some(&b'T')
547        || s.as_bytes().get(13) != Some(&b':')
548        || s.as_bytes().get(16) != Some(&b':')
549    {
550        return None;
551    }
552
553    if !(1..=12).contains(&month) || !(1..=31).contains(&day) {
554        return None;
555    }
556    if !(0..=23).contains(&hour) || !(0..=59).contains(&min) || !(0..=59).contains(&sec) {
557        return None;
558    }
559
560    // Civil date to Unix epoch (same algorithm as chrono/time crates).
561    let mut y = year;
562    let m = if month <= 2 {
563        y -= 1;
564        month + 9
565    } else {
566        month - 3
567    };
568    let era = if y >= 0 { y } else { y - 399 } / 400;
569    let yoe = y - era * 400;
570    let doy = (153 * m + 2) / 5 + day - 1;
571    let doe = yoe * 365 + yoe / 4 - yoe / 100 + doy;
572    let days = era * 146097 + doe - 719468;
573
574    Some(days * 86400 + hour * 3600 + min * 60 + sec)
575}
576
577/// Check if a certificate needs renewal.
578///
579/// For certificates whose total validity window is shorter than
580/// `RENEWAL_THRESHOLD_SECS`, the fixed 5-minute threshold would flag a freshly
581/// signed cert as needing renewal immediately, causing an infinite re-sign loop.
582/// In that case we fall back to a proportional threshold (half the total).
583pub fn needs_renewal(status: &CertStatus) -> bool {
584    match status {
585        CertStatus::Missing | CertStatus::Expired | CertStatus::Invalid(_) => true,
586        CertStatus::Valid {
587            remaining_secs,
588            total_secs,
589            ..
590        } => {
591            let threshold = if *total_secs > 0 && *total_secs <= RENEWAL_THRESHOLD_SECS {
592                *total_secs / 2
593            } else {
594                RENEWAL_THRESHOLD_SECS
595            };
596            *remaining_secs < threshold
597        }
598    }
599}
600
601/// Ensure a valid certificate exists for a host. Signs a new one if needed.
602/// Checks at the CertificateFile path (or purple's default) before signing.
603pub fn ensure_cert(
604    role: &str,
605    pubkey_path: &Path,
606    alias: &str,
607    certificate_file: &str,
608    vault_addr: Option<&str>,
609) -> Result<PathBuf> {
610    let check_path = resolve_cert_path(alias, certificate_file)?;
611    let status = check_cert_validity(&check_path);
612
613    if !needs_renewal(&status) {
614        info!(
615            "Vault SSH certificate cache hit: alias={} role={} path={}",
616            alias,
617            role,
618            check_path.display()
619        );
620        return Ok(check_path);
621    }
622
623    log::debug!(
624        "Vault SSH certificate cache miss: alias={} role={} status={:?} -> signing",
625        alias,
626        role,
627        status
628    );
629    let result = sign_certificate(role, pubkey_path, alias, vault_addr)?;
630    Ok(result.cert_path)
631}
632
633/// Resolve the public key path for signing.
634/// Priority: host IdentityFile + ".pub" > ~/.ssh/id_ed25519.pub fallback.
635/// Returns an error when the user's home directory cannot be determined. Any
636/// IdentityFile pointing outside `$HOME` is rejected and falls back to the
637/// default `~/.ssh/id_ed25519.pub` to prevent reading arbitrary filesystem
638/// locations via a crafted IdentityFile directive.
639pub fn resolve_pubkey_path(identity_file: &str) -> Result<PathBuf> {
640    let home = dirs::home_dir().context("Could not determine home directory")?;
641    let fallback = home.join(".ssh/id_ed25519.pub");
642
643    if identity_file.is_empty() {
644        return Ok(fallback);
645    }
646
647    let expanded = if let Some(rest) = identity_file.strip_prefix("~/") {
648        home.join(rest)
649    } else {
650        PathBuf::from(identity_file)
651    };
652
653    // A purely lexical `starts_with(&home)` check can be bypassed by a symlink inside
654    // $HOME pointing to a path outside $HOME (e.g. ~/evil -> /etc). Canonicalize both
655    // sides so symlinks are resolved, then compare. If the expanded path does not yet
656    // exist (or canonicalize fails for any reason) we cannot safely reason about where
657    // it actually points, so fall back to the default key path.
658    let canonical_home = match std::fs::canonicalize(&home) {
659        Ok(p) => p,
660        Err(_) => return Ok(fallback),
661    };
662    if expanded.exists() {
663        match std::fs::canonicalize(&expanded) {
664            Ok(canonical) if canonical.starts_with(&canonical_home) => {}
665            _ => return Ok(fallback),
666        }
667    } else if !expanded.starts_with(&home) {
668        return Ok(fallback);
669    }
670
671    if expanded.extension().is_some_and(|ext| ext == "pub") {
672        Ok(expanded)
673    } else {
674        let mut s = expanded.into_os_string();
675        s.push(".pub");
676        Ok(PathBuf::from(s))
677    }
678}
679
680/// Resolve the effective vault role for a host.
681/// Priority: host-level vault_ssh > provider-level vault_role > None.
682///
683/// `provider_label` selects between multiple labeled configs of the same
684/// provider. None means a bare config (legacy 2-segment marker).
685pub fn resolve_vault_role(
686    host_vault_ssh: Option<&str>,
687    provider_name: Option<&str>,
688    provider_label: Option<&str>,
689    provider_config: &crate::providers::config::ProviderConfig,
690) -> Option<String> {
691    if let Some(role) = host_vault_ssh {
692        if !role.is_empty() {
693            return Some(role.to_string());
694        }
695    }
696
697    if let Some(name) = provider_name {
698        let id = crate::providers::config::ProviderConfigId {
699            provider: name.to_string(),
700            label: provider_label.map(|s| s.to_string()),
701        };
702        let section = provider_config
703            .section_by_id(&id)
704            .or_else(|| provider_config.section(name));
705        if let Some(section) = section {
706            if !section.vault_role.is_empty() {
707                return Some(section.vault_role.clone());
708            }
709        }
710    }
711
712    None
713}
714
715/// Resolve the effective Vault address for a host.
716///
717/// Precedence (highest wins): per-host `# purple:vault-addr` comment,
718/// provider `vault_addr=` setting, else None (caller falls back to the
719/// `vault` CLI's own env resolution).
720///
721/// Both layers are re-validated with `is_valid_vault_addr` even though the
722/// parser paths (`HostBlock::vault_addr()` and `ProviderConfig::parse`)
723/// already drop invalid values. This is defensive: a future caller that
724/// constructs a `HostEntry` or `ProviderSection` in-memory (tests, migration
725/// code, a new feature) won't be able to smuggle a malformed `VAULT_ADDR`
726/// into `sign_certificate` through this resolver.
727pub fn resolve_vault_addr(
728    host_vault_addr: Option<&str>,
729    provider_name: Option<&str>,
730    provider_label: Option<&str>,
731    provider_config: &crate::providers::config::ProviderConfig,
732) -> Option<String> {
733    if let Some(addr) = host_vault_addr {
734        let trimmed = addr.trim();
735        if !trimmed.is_empty() && is_valid_vault_addr(trimmed) {
736            return Some(normalize_vault_addr(trimmed));
737        }
738    }
739
740    if let Some(name) = provider_name {
741        let id = crate::providers::config::ProviderConfigId {
742            provider: name.to_string(),
743            label: provider_label.map(|s| s.to_string()),
744        };
745        let section = provider_config
746            .section_by_id(&id)
747            .or_else(|| provider_config.section(name));
748        if let Some(section) = section {
749            let trimmed = section.vault_addr.trim();
750            if !trimmed.is_empty() && is_valid_vault_addr(trimmed) {
751                return Some(normalize_vault_addr(trimmed));
752            }
753        }
754    }
755
756    None
757}
758
759/// Resolve the effective ProxyJump chain for an alias by asking ssh itself.
760///
761/// Uses `ssh -G -F <config> <alias>` so wildcard patterns and `Match` blocks
762/// contribute the same way they do at connect time. Without this, a host that
763/// inherits ProxyJump from a wildcard (e.g. `Host *prod*  ProxyJump bastion`)
764/// would look like it has no proxy when read from its own block alone.
765///
766/// Returns aliases in dependency order: proxies first, the target last. The
767/// target is always present, even when ssh resolution yields nothing. Cycles
768/// are broken with a visited set. Hosts referenced via ProxyJump that have no
769/// matching `Host` block in the config still appear in the chain so callers
770/// can decide what to do with them; existence is verified by the caller.
771pub fn resolve_proxy_chain(config_path: &Path, alias: &str) -> Vec<String> {
772    let mut chain: Vec<String> = Vec::new();
773    let mut visited: HashSet<String> = HashSet::new();
774    let mut queue: Vec<String> = vec![alias.to_string()];
775
776    while let Some(current) = queue.pop() {
777        if !visited.insert(current.clone()) {
778            continue;
779        }
780        chain.push(current.clone());
781
782        let output = Command::new("ssh")
783            .args(["-G", "-F"])
784            .arg(config_path)
785            .arg("--")
786            .arg(&current)
787            .output();
788
789        let Ok(output) = output else {
790            debug!("[external] ssh -G failed for {}: spawn error", current);
791            continue;
792        };
793        if !output.status.success() {
794            debug!(
795                "[external] ssh -G non-zero exit for {} (code {:?})",
796                current,
797                output.status.code()
798            );
799            continue;
800        }
801
802        let stdout = String::from_utf8_lossy(&output.stdout);
803        for line in stdout.lines() {
804            let lower = line.to_ascii_lowercase();
805            let Some(rest) = lower.strip_prefix("proxyjump ") else {
806                continue;
807            };
808            // ssh -G emits literal "none" when no proxy is configured.
809            if rest.trim() == "none" {
810                continue;
811            }
812            // Use the original-case slice for the value; ssh prints the
813            // proxyjump value verbatim after the lower-cased key.
814            // strip_prefix already guarantees line.len() >= "proxyjump ".len().
815            let value = &line["proxyjump ".len()..];
816            for jump in value.split(',') {
817                let host = parse_proxy_jump_host(jump.trim());
818                if !host.is_empty() {
819                    queue.push(host.to_string());
820                }
821            }
822        }
823    }
824
825    chain.reverse();
826    chain
827}
828
829/// Extract the host portion from a single `[user@]host[:port]` ProxyJump entry.
830/// Handles bracketed IPv6 hosts like `[::1]:22`.
831fn parse_proxy_jump_host(jump: &str) -> &str {
832    let trimmed = jump.trim();
833    let after_user = trimmed.rsplit_once('@').map(|(_, h)| h).unwrap_or(trimmed);
834    if let Some(rest) = after_user.strip_prefix('[') {
835        if let Some(end) = rest.find(']') {
836            return &rest[..end];
837        }
838    }
839    after_user.split(':').next().unwrap_or(after_user)
840}
841
842/// One row in the Keys-tab Vault SSH strip.
843#[derive(Debug, Clone, PartialEq, Eq)]
844pub struct ActiveCert {
845    /// Host alias the cert belongs to.
846    pub alias: String,
847    /// Role name from `# purple:vault-ssh <role>`.
848    pub role: String,
849    /// Seconds remaining on the cert.
850    pub remaining_secs: i64,
851    /// Total signed-cert validity window in seconds. Used by the gauge
852    /// to compute `remaining/total` for the fill ratio.
853    pub total_secs: i64,
854}
855
856/// True iff a host has any purple-managed Vault context: either an
857/// explicit `# purple:vault-ssh` role marker, or a `CertificateFile`
858/// directive pointing into `~/.purple/certs/`. The second branch covers
859/// users who sign certs directly with the `vault` CLI and wire them in
860/// via `CertificateFile` without setting the role marker.
861pub fn has_purple_vault_context(host: &HostEntry) -> bool {
862    host.vault_ssh.is_some() || cert_file_in_purple_dir(&host.certificate_file)
863}
864
865/// `CertificateFile` path looks like a purple-managed cert when it
866/// references the per-user `.purple/certs/` directory. We match on the
867/// substring so the check works regardless of whether the path is
868/// tilde-expanded or absolute.
869pub fn cert_file_in_purple_dir(cert_file: &str) -> bool {
870    !cert_file.is_empty() && cert_file.contains("/.purple/certs/")
871}
872
873/// True when any host has a purple-managed Vault context. The Keys-tab
874/// strip renders iff this returns true. Even hosts whose cert is not
875/// yet cached count, so the strip appears the moment the user
876/// configures their first Vault role or sets a cert path.
877pub fn vault_ssh_in_use(hosts: &[HostEntry]) -> bool {
878    hosts.iter().any(has_purple_vault_context)
879}
880
881/// Build the strip's row list from the cert cache. Hosts that have a
882/// configured role (or a purple-managed cert path) but no cached
883/// `Valid` status are omitted; the gauge has nothing to fill until the
884/// lazy cert check populates the cache. Sort: longest remaining first
885/// so the user sees healthy certs at the top and expiring ones at the
886/// bottom.
887pub fn active_certs_for_strip(
888    hosts: &[HostEntry],
889    cache: &HashMap<String, (Instant, CertStatus, Option<SystemTime>)>,
890) -> Vec<ActiveCert> {
891    // Recompute `remaining_secs` against the current wall clock instead
892    // of using the cached snapshot. The cached number was correct only
893    // at the moment the check ran; the strip is redrawn on every event
894    // tick (~20× per second), so deriving from `expires_at - now` gives
895    // a per-second countdown without re-running the cert validation.
896    let now = SystemTime::now()
897        .duration_since(SystemTime::UNIX_EPOCH)
898        .map(|d| d.as_secs() as i64)
899        .unwrap_or(0);
900    let mut rows: Vec<ActiveCert> = hosts
901        .iter()
902        .filter(|h| has_purple_vault_context(h))
903        .filter_map(|h| {
904            let role = h.vault_ssh.clone().unwrap_or_default();
905            match cache.get(&h.alias) {
906                Some((
907                    _,
908                    CertStatus::Valid {
909                        expires_at,
910                        remaining_secs,
911                        total_secs,
912                    },
913                    _,
914                )) => {
915                    // `expires_at == 0` is the demo sentinel for "no
916                    // wall clock"; fall back to the static cached value
917                    // so visual fixtures stay byte-deterministic.
918                    let live_remaining = if *expires_at == 0 {
919                        *remaining_secs
920                    } else {
921                        (*expires_at - now).max(0)
922                    };
923                    Some(ActiveCert {
924                        alias: h.alias.clone(),
925                        role,
926                        remaining_secs: live_remaining,
927                        total_secs: *total_secs,
928                    })
929                }
930                _ => None,
931            }
932        })
933        .collect();
934    rows.sort_by_key(|r| std::cmp::Reverse(r.remaining_secs));
935    rows
936}
937
938/// Compute the fill ratio (0.0..=1.0) for a Vault SSH cert TTL gauge.
939/// Clamped so a cert in renewal-overlap or one whose `total_secs` was
940/// recorded as `i64::MAX` ("Valid: forever") does not produce NaN.
941pub fn cert_fill_ratio(remaining_secs: i64, total_secs: i64) -> f32 {
942    if total_secs <= 0 || remaining_secs <= 0 {
943        return 0.0;
944    }
945    if total_secs == i64::MAX || remaining_secs >= total_secs {
946        return 1.0;
947    }
948    (remaining_secs as f32 / total_secs as f32).clamp(0.0, 1.0)
949}
950
951/// Format remaining certificate time for display.
952pub fn format_remaining(remaining_secs: i64) -> String {
953    if remaining_secs <= 0 {
954        return "expired".to_string();
955    }
956    let hours = remaining_secs / 3600;
957    let mins = (remaining_secs % 3600) / 60;
958    if hours > 0 {
959        format!("{}h {}m", hours, mins)
960    } else {
961        format!("{}m", mins)
962    }
963}
964
965// Visible to sibling test modules (`main_tests.rs`) so they can share
966// `ENV_LOCK` and other process-global mocking helpers without spawning
967// a second lock that would race against this one.
968#[cfg(test)]
969#[path = "vault_ssh_tests.rs"]
970pub(crate) mod tests;