envseal 0.3.5

Write-only secret vault with process-level access control — post-agent secret management
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
//! Detect API keys that the developer is about to type into a shell
//! command. Used by the bash/zsh/fish preexec hooks: every command
//! the user runs is parsed for `KEY=VALUE` style assignments, and any
//! match is offered to the GUI ("store this in envseal so you don't
//! have to paste it again?").
//!
//! Design constraints:
//! - **Fast.** Runs synchronously before every shell command. Must
//!   add no perceptible latency to interactive use; target < 1 ms
//!   per command for the typical case (no match).
//! - **Conservative.** The cost of a false positive is interrupting
//!   the developer with a prompt they don't want. Every detection
//!   has a stated [`Confidence`] tier, and the hook only prompts on
//!   `High`.
//! - **No regex engine dep.** The crate is intentionally regex-free
//!   to keep build size small and avoid pulling in `regex` for one
//!   use site. Detection runs on byte-string matchers.

use std::collections::BTreeSet;

/// One candidate secret found in a shell command.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct DetectedSecret {
    /// Environment variable name on the left side of `=`.
    pub env_var: String,
    /// Raw value (quotes stripped, no expansion attempted).
    pub value: String,
    /// Recognized provider, if a known prefix matched.
    pub provider: Option<&'static str>,
    /// How confident we are this is a real secret.
    pub confidence: Confidence,
}

/// Detection-confidence tier. The shell hook prompts only on
/// [`Confidence::High`]; the others are reported by `envseal scan`
/// but not surfaced as live interruptions.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub enum Confidence {
    /// The value matches a published provider prefix (e.g. `sk-ant-…`,
    /// `ghp_…`, `AKIA…`). Almost certainly a secret.
    High,
    /// The env var name is a known secret-bearing name (`*_API_KEY`,
    /// `*_TOKEN`, `*_PASSWORD`, …) and the value has high entropy.
    Medium,
    /// Generic high-entropy string with no other signals.
    Low,
}

/// Provider prefix table. Each entry is `(prefix, provider_name,
/// minimum_length_after_prefix)`. Order is meaningful only for tie-
/// breaking; longest match wins so e.g. `sk-ant-` is reported as
/// Anthropic, not `OpenAI`.
const PROVIDER_PREFIXES: &[(&str, &str, usize)] = &[
    ("sk-ant-", "Anthropic", 20),
    ("sk-proj-", "OpenAI", 20),
    ("sk-", "OpenAI", 20),
    ("ghp_", "GitHub", 36),
    ("gho_", "GitHub", 36),
    ("ghu_", "GitHub", 36),
    ("ghs_", "GitHub", 36),
    ("ghr_", "GitHub", 36),
    ("github_pat_", "GitHub", 40),
    ("xoxa-", "Slack", 10),
    ("xoxb-", "Slack", 10),
    ("xoxp-", "Slack", 10),
    ("xoxr-", "Slack", 10),
    ("xoxs-", "Slack", 10),
    ("AKIA", "AWS", 16),
    ("ASIA", "AWS", 16),
    ("ANPA", "AWS", 16),
    ("AROA", "AWS", 16),
    ("AIDA", "AWS", 16),
    ("dp.st.", "Doppler", 20),
    ("dp.pt.", "Doppler", 20),
    ("dp.ct.", "Doppler", 20),
    ("dp.sa.", "Doppler", 20),
    ("glpat-", "GitLab", 20),
    ("sk_live_", "Stripe", 24),
    ("sk_test_", "Stripe", 24),
    ("rk_live_", "Stripe", 24),
    ("rk_test_", "Stripe", 24),
    ("psk_", "Postmark", 20),
    ("EAA", "Facebook", 20),
    ("ya29.", "Google OAuth", 20),
    ("AIza", "Google API", 35),
    ("hf_", "HuggingFace", 30),
];

/// Suffix patterns on env-var names that strongly suggest a secret
/// payload. We require the *value* to clear the entropy floor before
/// a name match alone produces a [`Confidence::Medium`] hit.
const SECRET_NAME_SUFFIXES: &[&str] = &[
    "_API_KEY",
    "_APIKEY",
    "_TOKEN",
    "_SECRET",
    "_PASSWORD",
    "_PASSWD",
    "_PWD",
    "_KEY",
    "_AUTH",
    "_CREDENTIAL",
    "_CREDENTIALS",
];

/// Names that are NEVER worth prompting on, even if they end in
/// `_KEY` or `_TOKEN`. Reduces false positives on trivially-public
/// configuration knobs.
const NAME_DENYLIST: &[&str] = &[
    "PUBLIC_KEY",
    "AWS_ACCESS_KEY_ID",
    "GPG_KEY_ID",
    "DEPLOY_KEY_ID",
    "TF_VAR_", // prefixed names — caller checks contains-prefix
];

/// Minimum entropy (bits/char, Shannon over byte alphabet) for the
/// value of a name-only match to be reported as [`Confidence::Medium`].
const MEDIUM_ENTROPY_FLOOR: f64 = 3.5;

/// Minimum entropy for a value to be reported as [`Confidence::Low`]
/// purely on entropy grounds (no name or prefix signal).
const LOW_ENTROPY_FLOOR: f64 = 4.5;

/// Minimum value length we'll consider — anything shorter is almost
/// certainly not a secret in production use.
const MIN_VALUE_LEN: usize = 16;

/// Inspect a shell command line and return every assignment that
/// looks like an API key. Returns deduplicated detections in the
/// order they appear in `cmd`.
pub fn detect_in_command(cmd: &str) -> Vec<DetectedSecret> {
    let assignments = extract_assignments(cmd);
    let mut out = Vec::new();
    let mut seen = BTreeSet::new();

    for (env_var, value) in assignments {
        if value.len() < MIN_VALUE_LEN {
            continue;
        }

        let key = (env_var.clone(), value.clone());
        if !seen.insert(key) {
            continue;
        }

        if let Some(detection) = classify(&env_var, &value) {
            out.push(detection);
        }
    }
    out
}

fn classify(env_var: &str, value: &str) -> Option<DetectedSecret> {
    // 1. Provider-prefix match — strongest signal.
    if let Some((provider, prefix_len, min_after)) = match_provider_prefix(value) {
        if value.len() >= prefix_len + min_after {
            return Some(DetectedSecret {
                env_var: env_var.to_string(),
                value: value.to_string(),
                provider: Some(provider),
                confidence: Confidence::High,
            });
        }
    }

    // 2. JWT shape: three base64url segments separated by '.'
    if looks_like_jwt(value) {
        return Some(DetectedSecret {
            env_var: env_var.to_string(),
            value: value.to_string(),
            provider: Some("JWT"),
            confidence: Confidence::High,
        });
    }

    // 3. Name-suggests-secret + entropy floor
    if name_suggests_secret(env_var) && shannon_bits_per_char(value) >= MEDIUM_ENTROPY_FLOOR {
        return Some(DetectedSecret {
            env_var: env_var.to_string(),
            value: value.to_string(),
            provider: None,
            confidence: Confidence::Medium,
        });
    }

    // 4. Generic high-entropy fallback
    if shannon_bits_per_char(value) >= LOW_ENTROPY_FLOOR && value.len() >= 24 {
        return Some(DetectedSecret {
            env_var: env_var.to_string(),
            value: value.to_string(),
            provider: None,
            confidence: Confidence::Low,
        });
    }

    None
}

fn match_provider_prefix(value: &str) -> Option<(&'static str, usize, usize)> {
    let mut best: Option<(&'static str, usize, usize, usize)> = None;
    for (prefix, provider, min_after) in PROVIDER_PREFIXES {
        if value.starts_with(prefix) {
            let plen = prefix.len();
            match best {
                Some((_, _, _, cur_plen)) if cur_plen >= plen => {}
                _ => best = Some((*provider, plen, *min_after, plen)),
            }
        }
    }
    best.map(|(p, plen, min, _)| (p, plen, min))
}

fn name_suggests_secret(env_var: &str) -> bool {
    let upper = env_var.to_ascii_uppercase();
    if NAME_DENYLIST.iter().any(|d| {
        if d.ends_with('_') {
            upper.starts_with(d)
        } else {
            upper == *d
        }
    }) {
        return false;
    }
    SECRET_NAME_SUFFIXES.iter().any(|s| upper.ends_with(s))
}

fn looks_like_jwt(value: &str) -> bool {
    if !value.starts_with("eyJ") {
        return false;
    }
    let segs: Vec<&str> = value.split('.').collect();
    segs.len() == 3 && segs.iter().all(|s| !s.is_empty()) && value.len() >= 40
}

/// Shannon entropy in bits per character over the byte alphabet.
/// Pure stdlib — fast enough for short strings.
#[allow(clippy::cast_precision_loss)] // strings here are tens to hundreds of bytes; f64 loss only matters above 2^52
fn shannon_bits_per_char(s: &str) -> f64 {
    if s.is_empty() {
        return 0.0;
    }
    let mut counts = [0u32; 256];
    for &b in s.as_bytes() {
        counts[b as usize] += 1;
    }
    let len = s.len() as f64;
    let mut h = 0.0;
    for &c in &counts {
        if c == 0 {
            continue;
        }
        let p = f64::from(c) / len;
        h -= p * p.log2();
    }
    h
}

/// Extract `(name, value)` pairs from a shell command. Handles:
///
/// - Leading inline assignments: `FOO=bar BAZ=qux node app.js`
/// - `export FOO=bar`, `export FOO="bar baz"`, `export FOO='bar'`
/// - `env FOO=bar BAZ=qux command`
///
/// Stops at the first non-assignment word (the command name itself).
fn extract_assignments(cmd: &str) -> Vec<(String, String)> {
    let trimmed = cmd.trim();
    if trimmed.is_empty() {
        return Vec::new();
    }

    // Tokenize on shell whitespace, respecting single/double quotes.
    let tokens = tokenize_shell(trimmed);

    let mut out = Vec::new();
    let mut idx = 0usize;
    while idx < tokens.len() {
        let tok = &tokens[idx];
        // Skip leading "export" / "env" / "envseal" / "sudo -E" wrappers.
        if matches!(tok.as_str(), "export" | "env" | "declare" | "typeset") {
            idx += 1;
            // After `export`/`env`, only assignments are accepted.
            // Continue collecting until first non-assignment, then stop.
            while idx < tokens.len() {
                if let Some((k, v)) = parse_assignment(&tokens[idx]) {
                    out.push((k, v));
                    idx += 1;
                } else {
                    return out;
                }
            }
            return out;
        }
        if let Some((k, v)) = parse_assignment(tok) {
            out.push((k, v));
            idx += 1;
        } else {
            // First non-assignment token is the program name —
            // stop scanning. We intentionally do NOT recurse into
            // pipelines or subshells; the next preexec firing will
            // see them.
            break;
        }
    }
    out
}

fn parse_assignment(tok: &str) -> Option<(String, String)> {
    let eq = tok.find('=')?;
    if eq == 0 {
        return None;
    }
    let name = &tok[..eq];
    let value = &tok[eq + 1..];
    if !is_valid_env_name(name) {
        return None;
    }
    Some((name.to_string(), strip_quotes(value).to_string()))
}

fn is_valid_env_name(s: &str) -> bool {
    let mut bytes = s.bytes();
    let Some(first) = bytes.next() else {
        return false;
    };
    if !(first.is_ascii_alphabetic() || first == b'_') {
        return false;
    }
    bytes.all(|b| b.is_ascii_alphanumeric() || b == b'_')
}

fn strip_quotes(value: &str) -> &str {
    if value.len() >= 2 {
        let first = value.as_bytes()[0];
        let last = value.as_bytes()[value.len() - 1];
        if (first == b'"' && last == b'"') || (first == b'\'' && last == b'\'') {
            return &value[1..value.len() - 1];
        }
    }
    value
}

/// Minimal POSIX shell tokenizer — splits on whitespace but respects
/// `"..."` and `'...'` regions. Does not perform any expansion.
fn tokenize_shell(s: &str) -> Vec<String> {
    let mut out = Vec::new();
    let mut cur = String::new();
    let mut in_single = false;
    let mut in_double = false;
    let mut escaped = false;
    for ch in s.chars() {
        if escaped {
            cur.push(ch);
            escaped = false;
            continue;
        }
        if !in_single && ch == '\\' {
            cur.push(ch);
            escaped = true;
            continue;
        }
        if ch == '\'' && !in_double {
            in_single = !in_single;
            cur.push(ch);
            continue;
        }
        if ch == '"' && !in_single {
            in_double = !in_double;
            cur.push(ch);
            continue;
        }
        if ch.is_whitespace() && !in_single && !in_double {
            if !cur.is_empty() {
                out.push(std::mem::take(&mut cur));
            }
            continue;
        }
        cur.push(ch);
    }
    if !cur.is_empty() {
        out.push(cur);
    }
    out
}

#[cfg(test)]
mod tests {
    use super::*;

    fn detect(cmd: &str) -> Vec<DetectedSecret> {
        detect_in_command(cmd)
    }

    #[test]
    fn detects_openai_prefix() {
        let d = detect("OPENAI_API_KEY=sk-abc123def456ghi789jklmnopqrs node app.js");
        assert_eq!(d.len(), 1);
        assert_eq!(d[0].env_var, "OPENAI_API_KEY");
        assert_eq!(d[0].provider, Some("OpenAI"));
        assert_eq!(d[0].confidence, Confidence::High);
    }

    #[test]
    fn detects_anthropic_more_specific_than_openai() {
        let d = detect("KEY=sk-ant-api03-abcdefghijklmnopqrstuvwxyz1234567890 cmd");
        assert_eq!(d.len(), 1);
        assert_eq!(d[0].provider, Some("Anthropic"));
    }

    #[test]
    fn detects_github_token() {
        let d = detect("export GITHUB_TOKEN=ghp_AbCdEfGhIjKlMnOpQrStUvWxYz0123456789");
        assert_eq!(d.len(), 1);
        assert_eq!(d[0].provider, Some("GitHub"));
        assert_eq!(d[0].confidence, Confidence::High);
    }

    #[test]
    fn detects_aws_access_key() {
        let d = detect("AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE python deploy.py");
        assert_eq!(d.len(), 1);
        assert_eq!(d[0].provider, Some("AWS"));
    }

    #[test]
    fn detects_jwt() {
        let jwt = "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIn0.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c";
        let d = detect(&format!("AUTH=Bearer {jwt}"));
        // The Bearer prefix means tokenization yields AUTH=Bearer not AUTH=jwt;
        // Bearer string is short so won't match. JWT in plain form:
        let d2 = detect(&format!("TOKEN={jwt}"));
        assert_eq!(d2.len(), 1);
        assert_eq!(d2[0].provider, Some("JWT"));
        // The first one shouldn't trigger high — Bearer alone is not a JWT.
        assert!(d.is_empty() || d[0].confidence != Confidence::High);
    }

    #[test]
    fn ignores_short_values() {
        let d = detect("FOO=bar make build");
        assert!(d.is_empty());
    }

    #[test]
    fn ignores_pure_filenames() {
        let d = detect("cargo run --bin envseal");
        assert!(d.is_empty());
    }

    #[test]
    fn handles_double_quotes() {
        let d = detect(r#"export OPENAI_API_KEY="sk-abc123def456ghi789jklmnopqrs""#);
        assert_eq!(d.len(), 1);
        assert!(!d[0].value.starts_with('"'));
    }

    #[test]
    fn handles_single_quotes() {
        let d = detect("export GITHUB_TOKEN='ghp_AbCdEfGhIjKlMnOpQrStUvWxYz0123456789'");
        assert_eq!(d.len(), 1);
        assert_eq!(d[0].provider, Some("GitHub"));
    }

    #[test]
    fn multiple_assignments_in_one_command() {
        let d = detect(
            "OPENAI_API_KEY=sk-aaaaaaaaaaaaaaaaaaaaaa GITHUB_TOKEN=ghp_AbCdEfGhIjKlMnOpQrStUvWxYz0123456789 node app.js",
        );
        assert_eq!(d.len(), 2);
        assert_eq!(d[0].provider, Some("OpenAI"));
        assert_eq!(d[1].provider, Some("GitHub"));
    }

    #[test]
    fn name_match_with_high_entropy() {
        // No prefix, but name ends in _SECRET and value has high entropy.
        let d = detect("MY_SECRET=A8b7C9d1E2f3G4h5I6j7K8l9M0n1O2p3 node");
        assert_eq!(d.len(), 1);
        assert_eq!(d[0].confidence, Confidence::Medium);
    }

    #[test]
    fn name_match_low_entropy_skipped() {
        // Name matches but value is structured/low-entropy.
        let d = detect("DB_PASSWORD=password1234567890 node");
        // Value entropy is ~3.0 — below medium floor.
        // This should NOT be flagged as Medium; may be flagged Low or skipped.
        assert!(d.is_empty() || d[0].confidence != Confidence::High);
    }

    #[test]
    fn aws_access_key_id_is_denylisted_for_name_match() {
        // The id alone is published in many examples; we rely on
        // the AKIA prefix match for high confidence, not the name.
        let d = detect("AWS_ACCESS_KEY_ID=not-a-real-id-just-public-info node");
        assert!(d.is_empty() || d[0].confidence != Confidence::Medium);
    }

    #[test]
    fn ignores_malformed_assignment() {
        let d = detect("=sk-abc123def456ghi789jklmnopqrs cmd");
        assert!(d.is_empty());
    }

    #[test]
    fn stops_scanning_at_command_word() {
        // After `node`, even `KEY=sk-...` is an argument to node, not env.
        // We DO detect leading FOO=... but not args after command.
        let d = detect("FOO=value1 node KEY=sk-abc123def456ghi789jklmnopqrs");
        // FOO is too short to qualify, and KEY=... appears after node so it's skipped.
        assert!(d.iter().all(|x| x.env_var != "KEY"));
    }

    #[test]
    fn empty_command_returns_empty() {
        assert!(detect("").is_empty());
        assert!(detect("   ").is_empty());
    }

    #[test]
    fn entropy_calculation_basic() {
        assert!(shannon_bits_per_char("aaaaaaaa") < 0.5);
        // Random-ish 32 chars should have > 4 bits/char
        assert!(shannon_bits_per_char("A8b7C9d1E2f3G4h5I6j7K8l9M0n1O2p3") > 4.0);
    }

    #[test]
    fn jwt_must_have_three_segments() {
        assert!(looks_like_jwt(
            "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0.signaturepartheresignaturehere"
        ));
        assert!(!looks_like_jwt(
            "eyJhbGciOiJIUzI1NiJ9.eyJzdWIiOiIxMjM0NTY3ODkwIn0"
        ));
        assert!(!looks_like_jwt("notajwt.evenclose.tothis"));
    }
}