bctx-weave 0.1.22

bctx-weave — FilterMesh lens pipeline, CLI interception, domain compression
Documentation
use regex::Regex;

struct Pattern {
    re: Regex,
    tag: &'static str,
}

/// Scans compressed command output for credential patterns and replaces
/// each match with `[REDACTED:<type>]` before the content reaches the agent.
///
/// Apply after the Lens pipeline. Bypass with `BCTX_NO_REDACT=1` or set
/// `scan_secrets = false` in a Recipe TOML when sharing a secret intentionally.
pub struct SecretScanner {
    patterns: Vec<Pattern>,
}

impl Default for SecretScanner {
    fn default() -> Self {
        Self::new()
    }
}

impl SecretScanner {
    pub fn new() -> Self {
        let defs: &[(&str, &str)] = &[
            // AWS Access Key ID — 20-char AKIA… prefix
            (r"AKIA[A-Z0-9]{16}", "aws-access-key"),
            // AWS Secret Access Key — 40 base64 chars after field name
            (
                r#"(?i)(?:SecretAccessKey|secret_access_key|aws_secret_access_key)["'\s:=]+[A-Za-z0-9/+=]{30,50}"#,
                "aws-secret-key",
            ),
            // Anthropic API key assignment — full ANTHROPIC_API_KEY=... line
            (
                r"(?i)ANTHROPIC_API_KEY\s*[=:]\s*[A-Za-z0-9_\-]{30,}",
                "anthropic-key",
            ),
            // OpenAI / Anthropic / common sk-… prefixed API keys
            (r"sk-[A-Za-z0-9_\-]{20,}", "api-key"),
            // GitHub classic Personal Access Token (ghp_)
            (r"ghp_[A-Za-z0-9]{36}", "github-pat"),
            // GitHub fine-grained Personal Access Token (github_pat_)
            (r"github_pat_[A-Za-z0-9_]{82}", "github-fine-grained-pat"),
            // SSH / TLS / PGP private key PEM header
            (
                r"-----BEGIN (?:RSA |EC |OPENSSH |DSA )?PRIVATE KEY-----",
                "private-key-header",
            ),
            // Database DSN with embedded password: scheme://user:pass@host
            (
                r"(?i)(?:postgres|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:@\s]+:[^@\s]{8,}@",
                "db-dsn-password",
            ),
            // HTTP Authorization / X-Api-Key / X-Auth-Token header values
            (
                r"(?i)(?:Authorization|X-Api-Key|X-Auth-Token):\s*(?:Bearer\s+|Token\s+)?[A-Za-z0-9._\-+/]{20,}",
                "bearer-token",
            ),
            // Generic long secret value assigned to a password/secret/api_key field
            (
                r#"(?i)(?:password|secret|api_key|apikey)\s*[=:]\s*["']?[A-Za-z0-9_\-\.+/]{40,}["']?"#,
                "generic-secret",
            ),
        ];

        let patterns = defs
            .iter()
            .map(|(pat, tag)| Pattern {
                re: Regex::new(pat)
                    .unwrap_or_else(|e| panic!("SecretScanner: invalid regex for {tag}: {e}")),
                tag,
            })
            .collect();

        Self { patterns }
    }

    /// Replace all detected secrets with `[REDACTED:<type>]`.
    /// Patterns are applied sequentially; each runs on the already-redacted output
    /// so a single secret can only trigger one tag.
    pub fn scan(&self, input: &str) -> String {
        let mut out = input.to_string();
        for p in &self.patterns {
            let replacement = format!("[REDACTED:{}]", p.tag);
            out = p.re.replace_all(&out, replacement.as_str()).into_owned();
        }
        out
    }

    /// Returns `true` if any pattern matches `input` — used in tests to check
    /// whether a string would be redacted without actually redacting it.
    pub fn has_secrets(&self, input: &str) -> bool {
        self.patterns.iter().any(|p| p.re.is_match(input))
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    fn s() -> SecretScanner {
        SecretScanner::new()
    }

    // ── Detection ──────────────────────────────────────────────────────────

    #[test]
    fn detects_aws_access_key() {
        let out = s().scan("AccessKeyId: AKIAIOSFODNN7EXAMPLE");
        assert!(!out.contains("AKIAIOSFODNN7EXAMPLE"), "{out}");
        assert!(out.contains("[REDACTED:aws-access-key]"), "{out}");
    }

    #[test]
    fn detects_aws_secret_key() {
        let out = s().scan("SecretAccessKey: wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY");
        assert!(!out.contains("wJalrXUtnFEMI"), "{out}");
        assert!(out.contains("[REDACTED:aws-secret-key]"), "{out}");
    }

    #[test]
    fn detects_anthropic_key_by_var_name() {
        let out = s().scan("ANTHROPIC_API_KEY=sk-ant-abc123def456ghi789jkl012mno");
        assert!(out.contains("[REDACTED:anthropic-key]"), "{out}");
    }

    #[test]
    fn detects_sk_api_key() {
        let out = s().scan("export OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl");
        assert!(!out.contains("sk-proj"), "{out}");
        assert!(out.contains("[REDACTED:api-key]"), "{out}");
    }

    #[test]
    fn detects_github_pat() {
        let out = s().scan("token: ghp_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghij");
        assert!(!out.contains("ghp_"), "{out}");
        assert!(out.contains("[REDACTED:github-pat]"), "{out}");
    }

    #[test]
    fn detects_github_fine_grained_pat() {
        let token = format!("github_pat_{}", "a".repeat(82));
        let out = s().scan(&format!("token={token}"));
        assert!(!out.contains("github_pat_"), "{out}");
        assert!(out.contains("[REDACTED:github-fine-grained-pat]"), "{out}");
    }

    #[test]
    fn detects_private_key_header() {
        let out = s().scan("-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQ...");
        assert!(!out.contains("BEGIN RSA PRIVATE"), "{out}");
        assert!(out.contains("[REDACTED:private-key-header]"), "{out}");
    }

    #[test]
    fn detects_db_dsn_password() {
        let out = s().scan("postgres://admin:supersecretpassword@localhost:5432/mydb");
        assert!(!out.contains("supersecretpassword"), "{out}");
        assert!(out.contains("[REDACTED:db-dsn-password]"), "{out}");
    }

    #[test]
    fn detects_bearer_token() {
        let out = s().scan("Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9");
        assert!(!out.contains("eyJhbGci"), "{out}");
        assert!(out.contains("[REDACTED:bearer-token]"), "{out}");
    }

    #[test]
    fn detects_generic_secret() {
        let out = s().scan("api_key=abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJ");
        assert!(!out.contains("abcdefghijklmnopqrstuvwxyz"), "{out}");
        assert!(out.contains("[REDACTED:generic-secret]"), "{out}");
    }

    // ── False-positive guards ──────────────────────────────────────────────

    #[test]
    fn passes_short_password_value() {
        // Under 40 chars → generic-secret does not trigger
        let out = s().scan("password=short123");
        assert!(out.contains("password=short123"), "{out}");
    }

    #[test]
    fn passes_normal_git_output() {
        let raw = "commit abc1234\nAuthor: Alice <alice@example.com>\nDate:   Mon Jan 1 00:00:00 2026\n\n    Fix bug";
        let out = s().scan(raw);
        assert_eq!(out, raw);
    }

    #[test]
    fn passes_cargo_build_output() {
        let raw = "   Compiling mylib v0.1.0\n   Finished dev [unoptimized] target(s) in 1.23s";
        let out = s().scan(raw);
        assert!(!out.contains("[REDACTED:"), "{out}");
    }

    // ── Idempotency ────────────────────────────────────────────────────────

    #[test]
    fn double_scan_is_idempotent() {
        let sc = s();
        let first = sc.scan("AccessKeyId: AKIAIOSFODNN7EXAMPLE");
        let second = sc.scan(&first);
        assert_eq!(first, second, "second scan changed output");
    }

    // ── has_secrets ────────────────────────────────────────────────────────

    #[test]
    fn has_secrets_true_for_aws_key() {
        assert!(s().has_secrets("AKIAIOSFODNN7EXAMPLE"));
    }

    #[test]
    fn has_secrets_false_for_clean_output() {
        assert!(!s().has_secrets("cargo build --release\nFinished in 1.23s"));
    }

    // ── Context preservation ───────────────────────────────────────────────

    #[test]
    fn preserves_surrounding_text() {
        let out = s().scan("Output: AKIAIOSFODNN7EXAMPLE done");
        assert!(out.contains("Output:"), "{out}");
        assert!(out.contains("done"), "{out}");
        assert!(!out.contains("AKIAIOSFODNN7EXAMPLE"), "{out}");
    }
}