i-self 0.4.3

Personal developer-companion CLI: scans your repos, indexes code semantically, watches your activity, and moves AI-agent sessions between tools (Claude Code, Aider, Goose, OpenAI Codex CLI, Continue.dev, OpenCode).
//! Best-effort secret redaction for shared sessions.
//!
//! Catches common, high-recall token shapes: OpenAI / Anthropic / GitHub /
//! Slack / Stripe keys, AWS access key + secret pairs, generic JWTs, basic
//! `password=...` strings. This is **not** a substitute for human review —
//! a session can still leak business logic, customer names, schema, etc.
//! The opt-in `--redact` CLI flag is for the user; the choice to share at
//! all is theirs.
//!
//! Each pattern replaces the match with `[REDACTED:<kind>]` so the reader
//! knows what was elided.

use regex::Regex;
use std::sync::LazyLock;

struct Pattern {
    kind: &'static str,
    rx: Regex,
}

static PATTERNS: LazyLock<Vec<Pattern>> = LazyLock::new(|| {
    // Order matters: more-specific patterns first so they win the replace
    // race. e.g. "sk-ant-..." matches both anthropic and openai shapes; we
    // want "anthropic-key" in the placeholder, not "openai-key".
    vec![
        // Anthropic — listed before OpenAI because the OpenAI regex would
        // otherwise eat `sk-ant-...` strings.
        Pattern {
            kind: "anthropic-key",
            rx: Regex::new(r"sk-ant-[A-Za-z0-9_\-]{20,}").unwrap(),
        },
        // OpenAI: `sk-` followed by 20+ chars (covers sk-, sk-proj-, sk-svcacct-)
        Pattern {
            kind: "openai-key",
            rx: Regex::new(r"sk-(?:proj-|svcacct-|admin-)?[A-Za-z0-9_\-]{20,}").unwrap(),
        },
        // GitHub: classic / fine-grained / OAuth / app
        Pattern {
            kind: "github-token",
            rx: Regex::new(r"gh[pousr]_[A-Za-z0-9_]{30,}").unwrap(),
        },
        // GitLab personal access token
        Pattern {
            kind: "gitlab-token",
            rx: Regex::new(r"glpat-[A-Za-z0-9_\-]{20,}").unwrap(),
        },
        // Slack
        Pattern {
            kind: "slack-token",
            rx: Regex::new(r"xox[abporsu]-[A-Za-z0-9\-]{10,}").unwrap(),
        },
        // Stripe (live + test)
        Pattern {
            kind: "stripe-key",
            rx: Regex::new(r"(?:sk|rk|pk)_(?:live|test)_[A-Za-z0-9]{20,}").unwrap(),
        },
        // Google API key
        Pattern {
            kind: "google-api-key",
            rx: Regex::new(r"AIza[0-9A-Za-z_\-]{35}").unwrap(),
        },
        // AWS access key id (AKIA + 16) — pair with secret separately
        Pattern {
            kind: "aws-access-key-id",
            rx: Regex::new(r"AKIA[0-9A-Z]{16}").unwrap(),
        },
        // AWS secret access key when labelled (avoids shotgun-redacting random base64 strings)
        Pattern {
            kind: "aws-secret-key",
            rx: Regex::new(
                r#"(?i)aws[_-]?secret[_-]?access[_-]?key[\s"=:]+([A-Za-z0-9/+=]{40})"#,
            )
            .unwrap(),
        },
        // Generic Authorization: Bearer <token>
        Pattern {
            kind: "bearer-token",
            rx: Regex::new(r"(?i)Authorization:\s*Bearer\s+([A-Za-z0-9_\-\.]+)").unwrap(),
        },
        // JWTs (three base64 chunks separated by dots)
        Pattern {
            kind: "jwt",
            rx: Regex::new(r"eyJ[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}")
                .unwrap(),
        },
        // password / passwd / pwd = "..." or : "..."
        Pattern {
            kind: "password-assignment",
            rx: Regex::new(r#"(?i)(password|passwd|pwd)["']?\s*[:=]\s*["']([^"'\s]{4,})["']?"#)
                .unwrap(),
        },
    ]
});

/// Replace every secret-shaped substring with a tagged placeholder.
pub fn redact_secrets(text: &str) -> String {
    let mut s = text.to_string();
    for p in PATTERNS.iter() {
        s = p
            .rx
            .replace_all(&s, |_caps: &regex::Captures| {
                format!("[REDACTED:{}]", p.kind)
            })
            .into_owned();
    }
    s
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn redacts_openai_key() {
        let s = redact_secrets("token: sk-abc123def456ghi789jkl012mno345");
        assert!(s.contains("[REDACTED:openai-key]"));
        assert!(!s.contains("sk-abc"));
    }

    #[test]
    fn redacts_anthropic_key_before_openai_pattern() {
        // sk-ant-... shouldn't match the openai pattern; the anthropic one wins.
        let s = redact_secrets("api_key='sk-ant-api03-XXXXXXXXXXXXXXXXXXXX'");
        assert!(s.contains("[REDACTED:anthropic-key]"));
    }

    #[test]
    fn redacts_github_classic_and_fine_grained() {
        for tok in [
            "ghp_1234567890abcdefghijklmnopqrstuvwxyz",
            "ghs_1234567890abcdefghijklmnopqrstuvwxyz",
            "gho_1234567890abcdefghijklmnopqrstuvwxyz",
        ] {
            let s = redact_secrets(&format!("export TOKEN={}", tok));
            assert!(s.contains("[REDACTED:github-token]"), "missed: {}", tok);
        }
    }

    #[test]
    fn redacts_aws_pair() {
        let raw = "AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE\nAWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY";
        let s = redact_secrets(raw);
        assert!(s.contains("[REDACTED:aws-access-key-id]"));
        assert!(s.contains("[REDACTED:aws-secret-key]"));
        assert!(!s.contains("AKIAIOSF"));
        assert!(!s.contains("wJalrXUtn"));
    }

    #[test]
    fn redacts_jwt_and_bearer() {
        let s = redact_secrets(
            "Authorization: Bearer eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c",
        );
        // Both bearer pattern AND JWT pattern apply; either replacement is fine.
        assert!(
            s.contains("[REDACTED:bearer-token]") || s.contains("[REDACTED:jwt]"),
            "got: {}",
            s
        );
        assert!(!s.contains("eyJhbGciOiJIUzI1NiJ9"));
    }

    #[test]
    fn redacts_password_assignment() {
        let s = redact_secrets(r#"db_password = "hunter2" and pwd:"correct horse""#);
        assert!(s.contains("[REDACTED:password-assignment]"));
    }

    #[test]
    fn leaves_plain_text_alone() {
        let s = redact_secrets("the quick brown fox jumps over the lazy dog 12345");
        assert_eq!(s, "the quick brown fox jumps over the lazy dog 12345");
    }
}