envseal 0.3.10

Write-only secret vault with process-level access control — post-agent secret management
Documentation
//! Scan shell history files for secrets the developer has already
//! typed at least once. Powers `envseal init --from-history`.
//!
//! Each command line found in the history is fed through the
//! [`super::preexec`] detector; matches are deduplicated by `(env_var,
//! value)` and returned in the order first seen.
//!
//! Supported formats:
//! - **bash**: `~/.bash_history` — one command per line, optionally
//!   preceded by `#timestamp` lines from `HISTTIMEFORMAT`.
//! - **zsh**: `~/.zsh_history` — either plain (one command per line)
//!   or extended-history `: <ts>:<dur>;<cmd>` format.
//! - **fish**: `~/.local/share/fish/fish_history` — YAML-ish records
//!   with `- cmd:`, `  when:` keys.

use std::fs;
use std::path::{Path, PathBuf};

use crate::error::Error;

use super::preexec::{detect_in_command, DetectedSecret};

/// One history file to scan.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum HistoryKind {
    /// `~/.bash_history`.
    Bash,
    /// `~/.zsh_history`.
    Zsh,
    /// `~/.local/share/fish/fish_history`.
    Fish,
}

impl HistoryKind {
    /// Default path for this history file under `$HOME`.
    ///
    /// `Fish`'s data directory differs by platform per Fish's own
    /// conventions: Linux uses `$XDG_DATA_HOME` (default
    /// `~/.local/share/fish/`); macOS defaults to
    /// `~/Library/Application Support/fish/` unless `XDG_DATA_HOME`
    /// is explicitly set; Windows falls back to the same XDG path
    /// since native Fish ships under WSL/MSYS in practice.
    pub fn default_path(&self) -> Option<PathBuf> {
        let home = std::env::var_os("HOME").or_else(|| std::env::var_os("USERPROFILE"))?;
        let home = PathBuf::from(home);
        Some(match self {
            Self::Bash => home.join(".bash_history"),
            Self::Zsh => home.join(".zsh_history"),
            Self::Fish => fish_history_path(&home),
        })
    }
}

/// Resolve the Fish history file path, honoring `$XDG_DATA_HOME` when
/// set and falling back to the platform-standard default otherwise.
fn fish_history_path(home: &Path) -> PathBuf {
    if let Some(xdg) = std::env::var_os("XDG_DATA_HOME") {
        return PathBuf::from(xdg).join("fish").join("fish_history");
    }
    #[cfg(target_os = "macos")]
    {
        home.join("Library")
            .join("Application Support")
            .join("fish")
            .join("fish_history")
    }
    #[cfg(not(target_os = "macos"))]
    {
        home.join(".local")
            .join("share")
            .join("fish")
            .join("fish_history")
    }
}

/// One distinct (`env_var`, value) candidate found in history, plus the
/// first history file/line we saw it in (for the user to confirm
/// "this is from your shell history at line N").
#[derive(Debug, Clone)]
pub struct HistoryFinding {
    /// The classified detection result.
    pub secret: DetectedSecret,
    /// Which history file the line came from.
    pub source: HistoryKind,
    /// Approximate command number / line number (best-effort).
    pub line_no: usize,
}

/// Run the preexec detector across every command line in `path`,
/// returning unique findings.
pub fn scan_file(kind: HistoryKind, path: &Path) -> Result<Vec<HistoryFinding>, Error> {
    let raw = match fs::read(path) {
        Ok(b) => b,
        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(Vec::new()),
        Err(e) => return Err(Error::StorageIo(e)),
    };
    // History files are usually UTF-8 but can carry mojibake — accept
    // anything by lossy decode rather than refusing.
    let content = String::from_utf8_lossy(&raw);

    let commands = match kind {
        HistoryKind::Bash => parse_bash(&content),
        HistoryKind::Zsh => parse_zsh(&content),
        HistoryKind::Fish => parse_fish(&content),
    };

    let mut findings = Vec::new();
    let mut seen = std::collections::BTreeSet::new();
    for (line_no, cmd) in commands {
        for det in detect_in_command(&cmd) {
            let key = (det.env_var.clone(), det.value.clone());
            if !seen.insert(key) {
                continue;
            }
            findings.push(HistoryFinding {
                secret: det,
                source: kind,
                line_no,
            });
        }
    }
    Ok(findings)
}

/// Convenience: scan all three default-location history files and
/// return findings concatenated. Missing files are silently skipped.
pub fn scan_all_default() -> Result<Vec<HistoryFinding>, Error> {
    let mut out = Vec::new();
    for kind in [HistoryKind::Bash, HistoryKind::Zsh, HistoryKind::Fish] {
        if let Some(path) = kind.default_path() {
            let mut found = scan_file(kind, &path)?;
            out.append(&mut found);
        }
    }
    Ok(out)
}

fn parse_bash(content: &str) -> Vec<(usize, String)> {
    content
        .lines()
        .enumerate()
        .filter_map(|(i, line)| {
            let trimmed = line.trim_end_matches(['\r', '\n']);
            // Skip HISTTIMEFORMAT timestamp lines.
            if trimmed.starts_with('#') && trimmed[1..].chars().all(|c| c.is_ascii_digit()) {
                return None;
            }
            if trimmed.is_empty() {
                return None;
            }
            Some((i + 1, trimmed.to_string()))
        })
        .collect()
}

fn parse_zsh(content: &str) -> Vec<(usize, String)> {
    content
        .lines()
        .enumerate()
        .filter_map(|(i, line)| {
            let trimmed = line.trim_end_matches(['\r', '\n']);
            if trimmed.is_empty() {
                return None;
            }
            // Extended format: ": 1690000000:0;cmd here"
            if trimmed.starts_with(": ") {
                if let Some(semi) = trimmed.find(';') {
                    return Some((i + 1, trimmed[semi + 1..].to_string()));
                }
            }
            Some((i + 1, trimmed.to_string()))
        })
        .collect()
}

fn parse_fish(content: &str) -> Vec<(usize, String)> {
    let mut out = Vec::new();
    for (i, line) in content.lines().enumerate() {
        let trimmed = line.trim_end();
        if let Some(rest) = trimmed.strip_prefix("- cmd: ") {
            // Fish escapes newlines as `\n` and backslashes as `\\`.
            let unescaped = unescape_fish(rest);
            out.push((i + 1, unescaped));
        }
    }
    out
}

fn unescape_fish(s: &str) -> String {
    let mut out = String::with_capacity(s.len());
    let mut chars = s.chars().peekable();
    while let Some(c) = chars.next() {
        if c == '\\' {
            match chars.next() {
                Some('n') => out.push('\n'),
                Some('t') => out.push('\t'),
                // Both `Some('\\')` and `None` mean we got a literal
                // backslash with nothing meaningful following it.
                Some('\\') | None => out.push('\\'),
                Some(other) => {
                    out.push('\\');
                    out.push(other);
                }
            }
        } else {
            out.push(c);
        }
    }
    out
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Write as _;
    use tempfile::tempdir;

    fn write_temp(name: &str, content: &str) -> (tempfile::TempDir, PathBuf) {
        let dir = tempdir().unwrap();
        let p = dir.path().join(name);
        let mut f = fs::File::create(&p).unwrap();
        f.write_all(content.as_bytes()).unwrap();
        (dir, p)
    }

    #[test]
    fn parses_bash_with_timestamps() {
        let content = "#1690000000\nls -la\n#1690000010\nexport OPENAI_API_KEY=sk-abc123def456ghi789jklmnopqrs\n";
        let cmds = parse_bash(content);
        assert_eq!(cmds.len(), 2);
        assert!(cmds[1].1.contains("OPENAI_API_KEY"));
    }

    #[test]
    fn parses_zsh_extended() {
        let content = ": 1690000000:0;ls -la\n: 1690000010:0;export GITHUB_TOKEN=ghp_abcdefghijklmnopqrstuvwxyz0123456789\n";
        let cmds = parse_zsh(content);
        assert_eq!(cmds.len(), 2);
        assert_eq!(cmds[0].1, "ls -la");
        assert!(cmds[1].1.contains("GITHUB_TOKEN"));
    }

    #[test]
    fn parses_zsh_plain() {
        let content = "ls -la\nexport KEY=ghp_abcdefghijklmnopqrstuvwxyz0123456789\n";
        let cmds = parse_zsh(content);
        assert_eq!(cmds.len(), 2);
    }

    #[test]
    fn parses_fish_history() {
        let content = "- cmd: ls -la\n  when: 1690000000\n- cmd: export OPENAI_API_KEY=sk-abc123def456ghi789jklmnopqrs\n  when: 1690000010\n";
        let cmds = parse_fish(content);
        assert_eq!(cmds.len(), 2);
        assert_eq!(cmds[0].1, "ls -la");
        assert!(cmds[1].1.contains("OPENAI_API_KEY"));
    }

    #[test]
    fn scan_file_detects_secrets() {
        let (_dir, p) = write_temp(
            ".bash_history",
            "ls -la\nexport OPENAI_API_KEY=sk-abc123def456ghi789jklmnopqrstu node app.js\n",
        );
        let findings = scan_file(HistoryKind::Bash, &p).unwrap();
        assert_eq!(findings.len(), 1);
        assert_eq!(findings[0].secret.env_var, "OPENAI_API_KEY");
    }

    #[test]
    fn scan_file_deduplicates() {
        let (_dir, p) = write_temp(
            ".bash_history",
            "export KEY=sk-abc123def456ghi789jklmnopqrstu\nexport KEY=sk-abc123def456ghi789jklmnopqrstu\n",
        );
        let findings = scan_file(HistoryKind::Bash, &p).unwrap();
        assert_eq!(findings.len(), 1);
    }

    #[test]
    fn missing_file_returns_empty() {
        let dir = tempdir().unwrap();
        let p = dir.path().join("does-not-exist");
        let findings = scan_file(HistoryKind::Bash, &p).unwrap();
        assert!(findings.is_empty());
    }

    #[test]
    fn fish_unescapes_backslash_n() {
        let unescaped = unescape_fish(r"echo hello\nworld");
        assert_eq!(unescaped, "echo hello\nworld");
    }
}