sanitize_engine/
secrets.rs

1//! Encrypted secrets management.
2//!
3//! This module provides **in-memory-only** decryption of user-supplied
4//! secrets files. Secrets are never written to disk in plaintext form;
5//! they are loaded from an AES-256-GCM encrypted `.enc` file, decrypted
6//! into memory, parsed, and converted directly into [`ScanPattern`]s
7//! for the streaming scanner.
8//!
9//! # Encryption Format
10//!
11//! ```text
12//! ┌────────────────────────────────┬──────────────┬─────────────────────────────┐
13//! │  Salt (32 B)                   │  Nonce (12 B)│  AES-256-GCM Ciphertext     │
14//! └────────────────────────────────┴──────────────┴─────────────────────────────┘
15//! ```
16//!
17//! - **Salt** (32 bytes): random, used for PBKDF2-derived key.
18//! - **Nonce** (12 bytes): random, for AES-256-GCM.
19//! - **Ciphertext**: authenticated encryption of the plaintext secrets
20//!   file (JSON / YAML / TOML).
21//!
22//! The 256-bit AES key is derived from the user password using
23//! PBKDF2-HMAC-SHA256 with 600 000 iterations, which meets current
24//! OWASP recommendations.
25//!
26//! # Key Derivation
27//!
28//! ```text
29//! key = PBKDF2-HMAC-SHA256(password, salt, iterations=600_000, dkLen=32)
30//! ```
31//!
32//! # Secrets File Schema
33//!
34//! The plaintext secrets file (before encryption) must deserialize to
35//! `Vec<SecretEntry>`:
36//!
37//! ```json
38//! [
39//!   {
40//!     "pattern": "alice@corp\\.com",
41//!     "kind": "regex",
42//!     "category": "email",
43//!     "label": "alice_email"
44//!   },
45//!   {
46//!     "pattern": "sk-proj-abc123secret",
47//!     "kind": "literal",
48//!     "category": "custom:api_key",
49//!     "label": "openai_key"
50//!   }
51//! ]
52//! ```
53//!
54//! # Thread Safety
55//!
56//! All public types are `Send + Sync`. Decrypted secrets use
57//! [`zeroize::Zeroizing`] to scrub plaintext from memory on drop.
58//!
59//! # Security Considerations
60//!
61//! - AES-256-GCM provides both confidentiality and integrity (AEAD).
62//! - PBKDF2 with 600 000 iterations resists offline brute-force attacks.
63//! - Decrypted plaintext is held in [`Zeroizing<Vec<u8>>`] and zeroed
64//!   on drop.
65//! - The plaintext secrets file is never written to disk by this crate.
66//! - Nonce and salt are generated with OS CSPRNG (`rand`).
67
68use crate::category::Category;
69use crate::error::{Result, SanitizeError};
70use crate::scanner::ScanPattern;
71
72/// Result of compiling secret entries into patterns.
73/// Contains successfully compiled patterns and a list of (index, error) for failures.
74pub type PatternCompileResult = (Vec<ScanPattern>, Vec<(usize, SanitizeError)>);
75
76use aes_gcm::aead::{Aead, KeyInit};
77use aes_gcm::{Aes256Gcm, Nonce};
78use hmac::Hmac;
79use rand::RngCore;
80use serde::{Deserialize, Serialize};
81use sha2::Sha256;
82use zeroize::{Zeroize, Zeroizing};
83
84// ---------------------------------------------------------------------------
85// Constants
86// ---------------------------------------------------------------------------
87
88/// Salt length for PBKDF2 key derivation (bytes).
89const SALT_LEN: usize = 32;
90
91/// AES-GCM nonce length (bytes). Must be 12 for AES-256-GCM.
92const NONCE_LEN: usize = 12;
93
94/// PBKDF2 iteration count — OWASP 2023+ recommendation.
95const PBKDF2_ITERATIONS: u32 = 600_000;
96
97/// Minimum ciphertext size: salt + nonce + at least 16-byte AES-GCM tag.
98const MIN_ENCRYPTED_LEN: usize = SALT_LEN + NONCE_LEN + 16;
99
100/// Maximum size of a plaintext secrets file accepted by [`parse_secrets`].
101/// Prevents OOM from accidentally passing a large binary or log file as secrets.
102const MAX_SECRETS_PLAINTEXT_BYTES: usize = 10 * 1024 * 1024; // 10 MiB
103
104// ---------------------------------------------------------------------------
105// Secrets file schema
106// ---------------------------------------------------------------------------
107
108/// A single secret entry as stored in the (plaintext) secrets file.
109///
110/// After decryption the entries are parsed from JSON, YAML, or TOML and
111/// converted into [`ScanPattern`]s.
112///
113/// Implements [`Drop`] via [`Zeroize`] to scrub sensitive pattern data
114/// from memory when no longer needed (S-1 fix).
115#[derive(Debug, Clone, Serialize, Deserialize)]
116pub struct SecretEntry {
117    /// The pattern string (regex or literal text).
118    ///
119    /// For `kind: allow` entries this is the single allowlist pattern.
120    /// Omit when using [`values`](Self::values) instead.
121    #[serde(default)]
122    pub pattern: String,
123
124    /// `"regex"`, `"literal"`, `"allow"`, `"entropy"`, or `"field-name"`.
125    ///
126    /// `"field-name"` entries are not compiled into scanner patterns — they
127    /// are extracted separately and injected into structured-processor profiles
128    /// as field-name signals.  The `pattern` field is a case-insensitive
129    /// regex matched against bare field/key names; `threshold` controls the
130    /// entropy gate (defaults to `3.5` bits/char when omitted).
131    #[serde(default = "default_kind")]
132    pub kind: String,
133
134    /// Category string. Supported values:
135    /// `email`, `name`, `phone`, `ipv4`, `ipv6`, `credit_card`, `ssn`,
136    /// `hostname`, `mac_address`, `container_id`, `uuid`, `jwt`,
137    /// `auth_token`, `file_path`, `windows_sid`, `url`, `aws_arn`,
138    /// `azure_resource_id`, or `custom:<tag>`.
139    #[serde(default = "default_category")]
140    pub category: String,
141
142    /// Human-readable label for stats reporting. Defaults to a truncated
143    /// version of `pattern` if omitted.
144    #[serde(default)]
145    pub label: Option<String>,
146
147    /// Multiple allowlist patterns for `kind: allow` entries.
148    ///
149    /// When non-empty, used instead of `pattern`. Allows a single entry to
150    /// allowlist many values compactly:
151    ///
152    /// ```toml
153    /// [[secrets]]
154    /// kind = "allow"
155    /// values = ["localhost", "true", "false", "null", "0.0.0.0"]
156    /// ```
157    #[serde(default, skip_serializing_if = "Vec::is_empty")]
158    pub values: Vec<String>,
159
160    // ── Entropy-detection fields (only used when kind = "entropy") ──────────
161    /// Minimum token length to consider (default: 20).
162    #[serde(default, skip_serializing_if = "Option::is_none")]
163    pub min_length: Option<usize>,
164
165    /// Maximum token length to consider (default: 200).
166    #[serde(default, skip_serializing_if = "Option::is_none")]
167    pub max_length: Option<usize>,
168
169    /// Shannon entropy threshold in bits per character (default: 4.5).
170    /// Tokens whose entropy is at or above this value are flagged.
171    #[serde(default, skip_serializing_if = "Option::is_none")]
172    pub threshold: Option<f64>,
173
174    /// Character set the token must consist of exclusively.
175    /// `"alphanumeric"` (default), `"base64"`, `"hex"`, or `"any"`.
176    #[serde(default, skip_serializing_if = "Option::is_none")]
177    pub charset: Option<String>,
178}
179
180impl Drop for SecretEntry {
181    fn drop(&mut self) {
182        self.pattern.zeroize();
183        self.kind.zeroize();
184        self.category.zeroize();
185        if let Some(ref mut l) = self.label {
186            l.zeroize();
187        }
188        for v in &mut self.values {
189            v.zeroize();
190        }
191        if let Some(ref mut s) = self.charset {
192            s.zeroize();
193        }
194    }
195}
196
197fn default_kind() -> String {
198    "literal".into()
199}
200
201fn default_category() -> String {
202    "custom:secret".into()
203}
204
205/// Supported plaintext file formats for secrets.
206#[derive(Debug, Clone, Copy, PartialEq, Eq)]
207pub enum SecretsFormat {
208    Json,
209    Yaml,
210    Toml,
211}
212
213impl SecretsFormat {
214    /// Detect format from file extension.
215    pub fn from_extension(path: &str) -> Option<Self> {
216        // Strip .enc suffix first if present.
217        let base = path.strip_suffix(".enc").unwrap_or(path);
218        let ext = std::path::Path::new(base).extension();
219        if ext.is_some_and(|e| e.eq_ignore_ascii_case("json")) {
220            Some(Self::Json)
221        } else if ext
222            .is_some_and(|e| e.eq_ignore_ascii_case("yaml") || e.eq_ignore_ascii_case("yml"))
223        {
224            Some(Self::Yaml)
225        } else if ext.is_some_and(|e| e.eq_ignore_ascii_case("toml")) {
226            Some(Self::Toml)
227        } else {
228            None
229        }
230    }
231
232    /// Try to auto-detect format from content.
233    pub fn detect(content: &[u8]) -> Self {
234        let s = String::from_utf8_lossy(content);
235        // Skip leading comment lines — both YAML and TOML use `#`, so a file
236        // that opens with comments must be scanned further to find the first
237        // meaningful token.
238        let first_meaningful = s
239            .lines()
240            .map(str::trim)
241            .find(|l| !l.is_empty() && !l.starts_with('#'))
242            .unwrap_or("");
243        if first_meaningful.starts_with('[') || first_meaningful.starts_with('{') {
244            // `[` is ambiguous: JSON arrays and TOML table headers both start
245            // with it. We pick JSON here because our secrets files are never
246            // bare TOML tables, and a wrong guess produces a clear parse error.
247            Self::Json
248        } else if first_meaningful.starts_with('-') || first_meaningful.starts_with("---") {
249            Self::Yaml
250        } else {
251            // Fallback: assume TOML
252            Self::Toml
253        }
254    }
255}
256
257// ---------------------------------------------------------------------------
258// TOML wrapper — serde_toml expects a top-level table
259// ---------------------------------------------------------------------------
260
261/// Wrapper for TOML deserialization: `secrets = [...]`
262#[derive(Deserialize)]
263struct TomlSecrets {
264    secrets: Vec<SecretEntry>,
265}
266
267/// Wrapper for TOML serialization.
268#[derive(Serialize)]
269struct TomlSecretsRef<'a> {
270    secrets: &'a [SecretEntry],
271}
272
273// ---------------------------------------------------------------------------
274// Key derivation
275// ---------------------------------------------------------------------------
276
277/// Derive a 256-bit AES key from a password and salt using PBKDF2.
278fn derive_key(password: &[u8], salt: &[u8]) -> Zeroizing<[u8; 32]> {
279    let mut key = Zeroizing::new([0u8; 32]);
280    pbkdf2::pbkdf2::<Hmac<Sha256>>(password, salt, PBKDF2_ITERATIONS, key.as_mut())
281        .expect("PBKDF2 output length is valid");
282    key
283}
284
285// ---------------------------------------------------------------------------
286// Encryption
287// ---------------------------------------------------------------------------
288
289/// Encrypt a plaintext secrets file.
290///
291/// Returns the encrypted blob: `salt (32) || nonce (12) || ciphertext`.
292///
293/// # Arguments
294///
295/// - `plaintext` — raw bytes of the secrets file (JSON / YAML / TOML).
296/// - `password` — user-supplied password.
297///
298/// # Errors
299///
300/// Returns [`SanitizeError::SecretsEmptyPassword`] if the password is empty, or
301/// [`SanitizeError::SecretsCipherError`] if encryption fails.
302///
303/// # Security
304///
305/// - Salt and nonce are generated with CSPRNG.
306/// - Key is derived with PBKDF2 (600 000 iterations).
307/// - AES-256-GCM provides authenticated encryption.
308pub fn encrypt_secrets(plaintext: &[u8], password: &str) -> Result<Vec<u8>> {
309    if password.is_empty() {
310        return Err(SanitizeError::SecretsEmptyPassword);
311    }
312
313    let mut rng = rand::rng();
314
315    // Generate random salt and nonce.
316    let mut salt = [0u8; SALT_LEN];
317    rng.fill_bytes(&mut salt);
318
319    let mut nonce_bytes = [0u8; NONCE_LEN];
320    rng.fill_bytes(&mut nonce_bytes);
321    let nonce = Nonce::from_slice(&nonce_bytes);
322
323    // Derive key.
324    let key = derive_key(password.as_bytes(), &salt);
325    let cipher = Aes256Gcm::new_from_slice(key.as_ref())
326        .map_err(|e| SanitizeError::SecretsCipherError(format!("cipher init: {}", e)))?;
327
328    // Encrypt.
329    let ciphertext = cipher
330        .encrypt(nonce, plaintext)
331        .map_err(|e| SanitizeError::SecretsCipherError(format!("encryption: {}", e)))?;
332
333    // Assemble: salt || nonce || ciphertext
334    let mut output = Vec::with_capacity(SALT_LEN + NONCE_LEN + ciphertext.len());
335    output.extend_from_slice(&salt);
336    output.extend_from_slice(&nonce_bytes);
337    output.extend_from_slice(&ciphertext);
338
339    Ok(output)
340}
341
342// ---------------------------------------------------------------------------
343// Decryption
344// ---------------------------------------------------------------------------
345
346/// Decrypt an encrypted secrets blob in memory.
347///
348/// Returns the plaintext wrapped in [`Zeroizing`] so it is scrubbed on drop.
349///
350/// # Arguments
351///
352/// - `encrypted` — `salt (32) || nonce (12) || ciphertext`.
353/// - `password` — user-supplied password.
354///
355/// # Errors
356///
357/// - [`SanitizeError::SecretsTooShort`] if the blob is too short,
358///   [`SanitizeError::SecretsDecryptFailed`] if the password is wrong or the ciphertext has been tampered with.
359pub fn decrypt_secrets(encrypted: &[u8], password: &str) -> Result<Zeroizing<Vec<u8>>> {
360    if encrypted.len() < MIN_ENCRYPTED_LEN {
361        return Err(SanitizeError::SecretsTooShort);
362    }
363
364    let salt = &encrypted[..SALT_LEN];
365    let nonce_bytes = &encrypted[SALT_LEN..SALT_LEN + NONCE_LEN];
366    let ciphertext = &encrypted[SALT_LEN + NONCE_LEN..];
367
368    let nonce = Nonce::from_slice(nonce_bytes);
369
370    let key = derive_key(password.as_bytes(), salt);
371    let cipher = Aes256Gcm::new_from_slice(key.as_ref())
372        .map_err(|e| SanitizeError::SecretsCipherError(format!("cipher init: {}", e)))?;
373
374    let plaintext = cipher
375        .decrypt(nonce, ciphertext)
376        .map_err(|_| SanitizeError::SecretsDecryptFailed)?;
377
378    Ok(Zeroizing::new(plaintext))
379}
380
381// ---------------------------------------------------------------------------
382// Parsing
383// ---------------------------------------------------------------------------
384
385/// Parse a decrypted plaintext into secret entries.
386///
387/// Supports JSON, YAML, and TOML. Format is auto-detected if `format`
388/// is `None`.
389///
390/// # Errors
391///
392/// Returns [`SanitizeError::SecretsInvalidUtf8`] if the plaintext is not
393/// valid UTF-8, [`SanitizeError::SecretsFormatError`] if it cannot be parsed
394/// in the specified format or if the file exceeds the size limit.
395pub fn parse_secrets(plaintext: &[u8], format: Option<SecretsFormat>) -> Result<Vec<SecretEntry>> {
396    if plaintext.len() > MAX_SECRETS_PLAINTEXT_BYTES {
397        return Err(SanitizeError::SecretsFormatError {
398            format: "secrets file".into(),
399            message: format!(
400                "file is {} bytes, exceeding the {} byte limit — \
401                 secrets files should be small YAML/JSON/TOML pattern lists",
402                plaintext.len(),
403                MAX_SECRETS_PLAINTEXT_BYTES,
404            ),
405        });
406    }
407    let fmt = format.unwrap_or_else(|| SecretsFormat::detect(plaintext));
408    let text = std::str::from_utf8(plaintext)
409        .map_err(|e| SanitizeError::SecretsInvalidUtf8(e.to_string()))?;
410
411    match fmt {
412        SecretsFormat::Json => {
413            serde_json::from_str(text).map_err(|e| SanitizeError::SecretsFormatError {
414                format: "JSON".into(),
415                message: e.to_string(),
416            })
417        }
418        SecretsFormat::Yaml => {
419            serde_yaml_ng::from_str(text).map_err(|e| SanitizeError::SecretsFormatError {
420                format: "YAML".into(),
421                message: e.to_string(),
422            })
423        }
424        SecretsFormat::Toml => {
425            let wrapper: TomlSecrets =
426                toml::from_str(text).map_err(|e| SanitizeError::SecretsFormatError {
427                    format: "TOML".into(),
428                    message: e.to_string(),
429                })?;
430            Ok(wrapper.secrets)
431        }
432    }
433}
434
435/// Serialize secret entries back into a plaintext format.
436///
437/// Used by the encryption helper CLI.
438///
439/// # Errors
440///
441/// Returns [`SanitizeError::SecretsFormatError`] if serialization fails.
442pub fn serialize_secrets(entries: &[SecretEntry], format: SecretsFormat) -> Result<Vec<u8>> {
443    match format {
444        SecretsFormat::Json => {
445            serde_json::to_vec_pretty(entries).map_err(|e| SanitizeError::SecretsFormatError {
446                format: "JSON-serialize".into(),
447                message: e.to_string(),
448            })
449        }
450        SecretsFormat::Yaml => serde_yaml_ng::to_string(entries)
451            .map(|s| s.into_bytes())
452            .map_err(|e| SanitizeError::SecretsFormatError {
453                format: "YAML-serialize".into(),
454                message: e.to_string(),
455            }),
456        SecretsFormat::Toml => {
457            let wrapper = TomlSecretsRef { secrets: entries };
458            toml::to_string_pretty(&wrapper)
459                .map(|s| s.into_bytes())
460                .map_err(|e| SanitizeError::SecretsFormatError {
461                    format: "TOML-serialize".into(),
462                    message: e.to_string(),
463                })
464        }
465    }
466}
467
468// ---------------------------------------------------------------------------
469// Category parsing
470// ---------------------------------------------------------------------------
471
472/// Parse a category string into a [`Category`].
473///
474/// Accepted values: `email`, `name`, `phone`, `ipv4`, `ipv6`,
475/// `credit_card`, `ssn`, `hostname`, `mac_address`, `container_id`,
476/// `uuid`, `jwt`, `auth_token`, `file_path`, `windows_sid`, `url`,
477/// `aws_arn`, `azure_resource_id`, or `custom:<tag>`.
478pub fn parse_category(s: &str) -> Category {
479    match s {
480        "email" => Category::Email,
481        "name" => Category::Name,
482        "phone" => Category::Phone,
483        "ipv4" => Category::IpV4,
484        "ipv6" => Category::IpV6,
485        "credit_card" => Category::CreditCard,
486        "ssn" => Category::Ssn,
487        "hostname" => Category::Hostname,
488        "mac_address" => Category::MacAddress,
489        "container_id" => Category::ContainerId,
490        "uuid" => Category::Uuid,
491        "jwt" => Category::Jwt,
492        "auth_token" => Category::AuthToken,
493        "file_path" => Category::FilePath,
494        "windows_sid" => Category::WindowsSid,
495        "url" => Category::Url,
496        "aws_arn" => Category::AwsArn,
497        "azure_resource_id" => Category::AzureResourceId,
498        other => {
499            let tag = other.strip_prefix("custom:").unwrap_or(other);
500            Category::Custom(tag.into())
501        }
502    }
503}
504
505// ---------------------------------------------------------------------------
506// Conversion to ScanPatterns
507// ---------------------------------------------------------------------------
508
509/// Zeroize all sensitive string fields in a `Vec<SecretEntry>` and drop it.
510///
511/// Extract allowlist patterns from a set of entries.
512///
513/// Entries with `kind: allow` are returned as raw pattern strings to be
514/// compiled into an [`AllowlistMatcher`](crate::allowlist::AllowlistMatcher). They are skipped by
515/// [`entries_to_patterns`].
516///
517/// Each entry contributes either its `values` list (when non-empty) or its
518/// `pattern` field (when `values` is absent), so both forms are supported:
519///
520/// ```toml
521/// # single pattern
522/// [[secrets]]
523/// kind = "allow"
524/// pattern = "localhost"
525///
526/// # compact multi-value form
527/// [[secrets]]
528/// kind = "allow"
529/// values = ["true", "false", "null", "0.0.0.0"]
530/// ```
531pub fn extract_allow_patterns(entries: &[SecretEntry]) -> Vec<String> {
532    let mut patterns = Vec::new();
533    for entry in entries.iter().filter(|e| e.kind == "allow") {
534        if !entry.values.is_empty() {
535            patterns.extend(entry.values.iter().cloned());
536        } else if !entry.pattern.is_empty() {
537            patterns.push(entry.pattern.clone());
538        }
539    }
540    patterns
541}
542
543/// Convert parsed [`SecretEntry`]s into compiled [`ScanPattern`]s.
544///
545/// Entries with `kind: allow` are silently skipped — they are handled by
546/// [`extract_allow_patterns`] instead.
547///
548/// Invalid entries (e.g. bad regex) are collected as errors and
549/// returned alongside the successfully compiled patterns.
550pub fn entries_to_patterns(entries: &[SecretEntry]) -> PatternCompileResult {
551    let mut patterns = Vec::with_capacity(entries.len());
552    let mut errors = Vec::new();
553
554    for (i, entry) in entries.iter().enumerate() {
555        if entry.kind == "allow"
556            || entry.kind == "entropy"
557            || entry.kind == "field-name"
558            || entry.pattern.is_empty()
559        {
560            continue;
561        }
562        let category = parse_category(&entry.category);
563        let label = entry
564            .label
565            .clone()
566            .unwrap_or_else(|| truncate_label(&entry.pattern));
567
568        let result = match entry.kind.as_str() {
569            "regex" => ScanPattern::from_regex(&entry.pattern, category, label),
570            "literal" => ScanPattern::from_literal(&entry.pattern, category, label),
571            other => {
572                errors.push((
573                    i,
574                    SanitizeError::InvalidConfig(format!(
575                        "unknown kind {:?} — expected \"literal\", \"regex\", \"allow\", \"entropy\", or \"field-name\"",
576                        other
577                    )),
578                ));
579                continue;
580            }
581        };
582
583        match result {
584            Ok(pat) => patterns.push(pat),
585            Err(e) => errors.push((i, e)),
586        }
587    }
588
589    (patterns, errors)
590}
591
592const MAX_LABEL_CHARS: usize = 32;
593
594/// Truncate to a maximum label length.
595fn truncate_label(s: &str) -> String {
596    if s.len() <= MAX_LABEL_CHARS {
597        s.to_string()
598    } else {
599        // Find a char boundary just before the limit to avoid panicking on
600        // multi-byte UTF-8 characters (e.g. Unicode in user-supplied patterns).
601        let cut = s
602            .char_indices()
603            .nth(MAX_LABEL_CHARS - 1)
604            .map_or(s.len(), |(i, _)| i);
605        format!("{}…", &s[..cut])
606    }
607}
608
609// ---------------------------------------------------------------------------
610// High-level: load encrypted secrets → ScanPatterns
611// ---------------------------------------------------------------------------
612
613/// Load, decrypt, parse, and compile an encrypted secrets file into
614/// [`ScanPattern`]s ready for the streaming scanner.
615///
616/// This is the primary entry point for CLI integration.
617///
618/// # Arguments
619///
620/// - `encrypted_bytes` — raw bytes of the `.enc` file.
621/// - `password` — user-supplied password.
622/// - `format` — optional explicit format override.
623///
624/// # Returns
625///
626/// `(patterns, warnings)` where `warnings` contains indices and errors
627/// for entries that failed to compile.
628///
629/// # Security
630///
631/// The decrypted plaintext is held in zeroizing memory and dropped
632/// immediately after parsing.
633///
634/// # Errors
635///
636/// Returns a secrets-related [`SanitizeError`] if decryption or parsing fails.
637pub fn load_encrypted_secrets(
638    encrypted_bytes: &[u8],
639    password: &str,
640    format: Option<SecretsFormat>,
641) -> Result<(PatternCompileResult, Vec<String>)> {
642    let plaintext = decrypt_secrets(encrypted_bytes, password)?;
643    let entries = parse_secrets(&plaintext, format)?;
644    let allow = extract_allow_patterns(&entries);
645    let result = entries_to_patterns(&entries);
646    // SecretEntry implements Drop with explicit zeroize() calls, so dropping
647    // the Vec is sufficient to scrub sensitive pattern data from heap memory.
648    drop(entries);
649    Ok((result, allow))
650}
651
652/// Load and parse a plaintext secrets file into [`ScanPattern`]s.
653///
654/// This function mirrors [`load_encrypted_secrets`] but skips
655/// AES decryption and password prompts entirely. It preserves
656/// memory hygiene by zeroizing parsed entries after compilation.
657///
658/// # Arguments
659///
660/// - `plaintext` — raw bytes of the secrets file (JSON / YAML / TOML).
661/// - `format` — optional explicit format override.
662///
663/// # Security
664///
665/// Even for unencrypted secrets, entries are zeroized after pattern
666/// compilation to minimise the window during which sensitive values
667/// reside in memory.
668///
669/// # Errors
670///
671/// Returns a secrets-related [`SanitizeError`] if parsing or pattern
672/// compilation fails.
673pub fn load_plaintext_secrets(
674    plaintext: &[u8],
675    format: Option<SecretsFormat>,
676) -> Result<(PatternCompileResult, Vec<String>)> {
677    let entries = parse_secrets(plaintext, format)?;
678    let allow = extract_allow_patterns(&entries);
679    let result = entries_to_patterns(&entries);
680    // SecretEntry implements Drop with explicit zeroize() calls, so dropping
681    // the Vec is sufficient to scrub sensitive pattern data from heap memory.
682    drop(entries);
683    Ok((result, allow))
684}
685
686/// Detect whether raw file bytes look like an AES-256-GCM encrypted
687/// secrets blob (binary with salt+nonce header) or a plaintext secrets
688/// file (UTF-8 JSON / YAML / TOML).
689///
690/// Returns `true` if the content appears to be encrypted.
691///
692/// Heuristic:
693/// 1. Files shorter than the minimum encrypted length cannot be valid
694///    ciphertext — return `false`.
695/// 2. The **entire** content is checked for UTF-8 validity (not just the
696///    first few bytes). Only if the whole file is valid UTF-8 and begins
697///    with a recognisable plaintext marker (`[`, `{`, `-`, `#`) is it
698///    treated as plaintext — return `false`.
699/// 3. Binary content (not valid UTF-8) or UTF-8 without a plaintext
700///    marker is assumed to be encrypted — return `true`.
701///
702/// Note: a pathological plaintext file that is valid UTF-8 but lacks a
703/// leading plaintext marker (e.g. a TOML file whose first non-whitespace
704/// character is a letter) will be misclassified as encrypted and produce
705/// a `SecretsDecryptFailed` error. Use `force_plaintext: true` in
706/// [`load_secrets_auto`] to bypass the heuristic in that case.
707pub fn looks_encrypted(data: &[u8]) -> bool {
708    if data.len() < MIN_ENCRYPTED_LEN {
709        // Too short for a valid encrypted blob — might be a tiny
710        // plaintext file, but definitely not encrypted.
711        return false;
712    }
713    // If the file is valid UTF-8 and starts with a recognisable
714    // plaintext marker, treat it as plaintext.
715    if let Ok(text) = std::str::from_utf8(data) {
716        let trimmed = text.trim_start();
717        // Recognisable plaintext markers for JSON ('[', '{'), YAML ('-'), TOML ('#').
718        // starts_with('[') already covers "[["; starts_with('-') covers "---".
719        let has_marker = trimmed.starts_with('[')
720            || trimmed.starts_with('{')
721            || trimmed.starts_with('-')
722            || trimmed.starts_with('#');
723        if has_marker {
724            return false;
725        }
726    }
727    // Binary / non-UTF-8 → assume encrypted.
728    true
729}
730
731/// Unified loader: auto-detect encrypted vs plaintext and load
732/// secret patterns accordingly.
733///
734/// When `force_plaintext` is `true`, decryption is skipped regardless
735/// of file content. When `false`, the function uses [`looks_encrypted`]
736/// to choose the path automatically.
737///
738/// # Arguments
739///
740/// - `data` — raw bytes read from the secrets file.
741/// - `password` — password for decryption (ignored when plaintext).
742/// - `format` — optional format override.
743/// - `force_plaintext` — if `true`, always treat as plaintext.
744///
745/// # Returns
746///
747/// `(patterns, warnings, was_encrypted)` — the compiled patterns,
748/// any compile warnings, and a flag indicating which path was taken.
749///
750/// # Errors
751///
752/// Returns a secrets-related [`SanitizeError`] if decryption or parsing
753/// fails, or if a password is required but not provided.
754/// Returns `((patterns, warnings, allow_patterns), was_encrypted)`.
755///
756/// `allow_patterns` are the raw strings from `kind: allow` entries in the
757/// secrets file — the caller should combine these with any `--allow` CLI
758/// values and pass the merged list to [`AllowlistMatcher::new`](crate::allowlist::AllowlistMatcher::new).
759pub fn load_secrets_auto(
760    data: &[u8],
761    password: Option<&str>,
762    format: Option<SecretsFormat>,
763    force_plaintext: bool,
764) -> Result<((PatternCompileResult, Vec<String>), bool)> {
765    if force_plaintext || !looks_encrypted(data) {
766        let (result, allow) = load_plaintext_secrets(data, format)?;
767        Ok(((result, allow), false))
768    } else {
769        let pw = password.ok_or(SanitizeError::SecretsPasswordRequired)?;
770        let (result, allow) = load_encrypted_secrets(data, pw, format)?;
771        Ok(((result, allow), true))
772    }
773}
774
775// ---------------------------------------------------------------------------
776// Unit tests
777// ---------------------------------------------------------------------------
778
779#[cfg(test)]
780mod tests {
781    use super::*;
782
783    fn sample_json() -> &'static str {
784        r#"[
785            {
786                "pattern": "alice@corp\\.com",
787                "kind": "regex",
788                "category": "email",
789                "label": "alice_email"
790            },
791            {
792                "pattern": "sk-proj-abc123secret",
793                "kind": "literal",
794                "category": "custom:api_key",
795                "label": "openai_key"
796            }
797        ]"#
798    }
799
800    fn sample_yaml() -> &'static str {
801        r#"- pattern: "alice@corp\\.com"
802  kind: regex
803  category: email
804  label: alice_email
805- pattern: sk-proj-abc123secret
806  kind: literal
807  category: "custom:api_key"
808  label: openai_key
809"#
810    }
811
812    fn sample_toml() -> &'static str {
813        r#"[[secrets]]
814pattern = "alice@corp\\.com"
815kind = "regex"
816category = "email"
817label = "alice_email"
818
819[[secrets]]
820pattern = "sk-proj-abc123secret"
821kind = "literal"
822category = "custom:api_key"
823label = "openai_key"
824"#
825    }
826
827    // ---- Parsing ----
828
829    #[test]
830    fn parse_json_entries() {
831        let entries = parse_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
832        assert_eq!(entries.len(), 2);
833        assert_eq!(entries[0].kind, "regex");
834        assert_eq!(entries[0].category, "email");
835        assert_eq!(entries[1].kind, "literal");
836    }
837
838    #[test]
839    fn parse_yaml_entries() {
840        let entries = parse_secrets(sample_yaml().as_bytes(), Some(SecretsFormat::Yaml)).unwrap();
841        assert_eq!(entries.len(), 2);
842        assert_eq!(entries[0].label, Some("alice_email".into()));
843    }
844
845    #[test]
846    fn parse_toml_entries() {
847        let entries = parse_secrets(sample_toml().as_bytes(), Some(SecretsFormat::Toml)).unwrap();
848        assert_eq!(entries.len(), 2);
849        assert_eq!(entries[1].pattern, "sk-proj-abc123secret");
850    }
851
852    #[test]
853    fn parse_auto_detect_json() {
854        let entries = parse_secrets(sample_json().as_bytes(), None).unwrap();
855        assert_eq!(entries.len(), 2);
856    }
857
858    #[test]
859    fn parse_auto_detect_yaml() {
860        let entries = parse_secrets(sample_yaml().as_bytes(), None).unwrap();
861        assert_eq!(entries.len(), 2);
862    }
863
864    // ---- Category parsing ----
865
866    #[test]
867    fn parse_builtin_categories() {
868        assert_eq!(parse_category("email"), Category::Email);
869        assert_eq!(parse_category("ipv4"), Category::IpV4);
870        assert_eq!(parse_category("ssn"), Category::Ssn);
871    }
872
873    #[test]
874    fn parse_custom_category() {
875        match parse_category("custom:api_key") {
876            Category::Custom(tag) => assert_eq!(tag.as_str(), "api_key"),
877            other => panic!("expected Custom, got {:?}", other),
878        }
879    }
880
881    #[test]
882    fn parse_unknown_category_becomes_custom() {
883        match parse_category("foobar") {
884            Category::Custom(tag) => assert_eq!(tag.as_str(), "foobar"),
885            other => panic!("expected Custom, got {:?}", other),
886        }
887    }
888
889    // ---- Entries to patterns ----
890
891    #[test]
892    fn entries_to_patterns_success() {
893        let entries = parse_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
894        let (patterns, errors) = entries_to_patterns(&entries);
895        assert_eq!(patterns.len(), 2);
896        assert!(errors.is_empty());
897    }
898
899    #[test]
900    fn entries_to_patterns_bad_regex() {
901        let json = r#"[{"pattern": "[invalid(", "kind": "regex", "category": "email"}]"#;
902        let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
903        let (patterns, errors) = entries_to_patterns(&entries);
904        assert!(patterns.is_empty());
905        assert_eq!(errors.len(), 1);
906        assert_eq!(errors[0].0, 0);
907    }
908
909    // ---- Encrypt / Decrypt round-trip ----
910
911    #[test]
912    fn encrypt_decrypt_roundtrip() {
913        let plaintext = sample_json().as_bytes();
914        let password = "test-password-42";
915
916        let encrypted = encrypt_secrets(plaintext, password).unwrap();
917
918        // Encrypted blob must be larger than plaintext (salt + nonce + tag).
919        assert!(encrypted.len() > plaintext.len());
920
921        let decrypted = decrypt_secrets(&encrypted, password).unwrap();
922        assert_eq!(decrypted.as_slice(), plaintext);
923    }
924
925    #[test]
926    fn decrypt_wrong_password_fails() {
927        let plaintext = b"hello";
928        let encrypted = encrypt_secrets(plaintext, "correct").unwrap();
929        let result = decrypt_secrets(&encrypted, "wrong");
930        assert!(result.is_err());
931    }
932
933    #[test]
934    fn decrypt_truncated_blob_fails() {
935        let result = decrypt_secrets(&[0u8; 10], "any");
936        assert!(result.is_err());
937    }
938
939    #[test]
940    fn decrypt_tampered_blob_fails() {
941        let plaintext = b"hello world";
942        let mut encrypted = encrypt_secrets(plaintext, "pw").unwrap();
943        // Flip a byte in the ciphertext portion.
944        let last = encrypted.len() - 1;
945        encrypted[last] ^= 0xFF;
946        let result = decrypt_secrets(&encrypted, "pw");
947        assert!(result.is_err());
948    }
949
950    #[test]
951    fn encrypt_empty_password_rejected() {
952        let result = encrypt_secrets(b"hello", "");
953        assert!(result.is_err());
954    }
955
956    // ---- Full pipeline: encrypt → decrypt → parse → patterns ----
957
958    #[test]
959    fn full_pipeline_json() {
960        let plaintext = sample_json().as_bytes();
961        let password = "pipeline-test";
962
963        let encrypted = encrypt_secrets(plaintext, password).unwrap();
964        let ((patterns, errors), _allow) =
965            load_encrypted_secrets(&encrypted, password, Some(SecretsFormat::Json)).unwrap();
966
967        assert_eq!(patterns.len(), 2);
968        assert!(errors.is_empty());
969        assert_eq!(patterns[0].label(), "alice_email");
970        assert_eq!(patterns[1].label(), "openai_key");
971    }
972
973    #[test]
974    fn full_pipeline_yaml() {
975        let plaintext = sample_yaml().as_bytes();
976        let password = "yaml-test";
977
978        let encrypted = encrypt_secrets(plaintext, password).unwrap();
979        let ((patterns, errors), _allow) =
980            load_encrypted_secrets(&encrypted, password, Some(SecretsFormat::Yaml)).unwrap();
981
982        assert_eq!(patterns.len(), 2);
983        assert!(errors.is_empty());
984    }
985
986    #[test]
987    fn full_pipeline_toml() {
988        let plaintext = sample_toml().as_bytes();
989        let password = "toml-test";
990
991        let encrypted = encrypt_secrets(plaintext, password).unwrap();
992        let ((patterns, errors), _allow) =
993            load_encrypted_secrets(&encrypted, password, Some(SecretsFormat::Toml)).unwrap();
994
995        assert_eq!(patterns.len(), 2);
996        assert!(errors.is_empty());
997    }
998
999    // ---- Plaintext loader ----
1000
1001    #[test]
1002    fn load_plaintext_secrets_works() {
1003        let ((patterns, errors), _allow) =
1004            load_plaintext_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
1005        assert_eq!(patterns.len(), 2);
1006        assert!(errors.is_empty());
1007    }
1008
1009    // ---- Serialization round-trip ----
1010
1011    #[test]
1012    fn serialize_roundtrip_json() {
1013        let entries = parse_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
1014        let serialized = serialize_secrets(&entries, SecretsFormat::Json).unwrap();
1015        let reparsed = parse_secrets(&serialized, Some(SecretsFormat::Json)).unwrap();
1016        assert_eq!(entries.len(), reparsed.len());
1017        assert_eq!(entries[0].pattern, reparsed[0].pattern);
1018    }
1019
1020    // ---- Format detection ----
1021
1022    #[test]
1023    fn format_from_extension() {
1024        assert_eq!(
1025            SecretsFormat::from_extension("secrets.json"),
1026            Some(SecretsFormat::Json)
1027        );
1028        assert_eq!(
1029            SecretsFormat::from_extension("secrets.json.enc"),
1030            Some(SecretsFormat::Json)
1031        );
1032        assert_eq!(
1033            SecretsFormat::from_extension("secrets.yaml"),
1034            Some(SecretsFormat::Yaml)
1035        );
1036        assert_eq!(
1037            SecretsFormat::from_extension("secrets.yml.enc"),
1038            Some(SecretsFormat::Yaml)
1039        );
1040        assert_eq!(
1041            SecretsFormat::from_extension("secrets.toml"),
1042            Some(SecretsFormat::Toml)
1043        );
1044        assert_eq!(SecretsFormat::from_extension("secrets.txt"), None);
1045    }
1046
1047    #[test]
1048    fn detect_yaml_with_leading_comment_header() {
1049        // Regression: the auto-provisioned global secrets file opens with '#'
1050        // comment lines. Before the fix, detect() saw '#' first, fell through
1051        // to the TOML fallback, and failed to parse valid YAML.
1052        let content = "# Global sanitize allowlist — add patterns here.\n# Auto-loaded on every plain run.\n\n- pattern: foo\n  kind: allow\n";
1053        assert_eq!(
1054            SecretsFormat::detect(content.as_bytes()),
1055            SecretsFormat::Yaml
1056        );
1057    }
1058
1059    #[test]
1060    fn detect_yaml_comment_header_parses_correctly() {
1061        // Round-trip: same shape as the auto-provisioned file must load without error.
1062        let content = "# Global sanitize allowlist — add patterns or kind:regex entries here.\n# Auto-loaded on every plain run. Edit freely; deleted values take effect immediately.\n\n- pattern: ''\n  kind: allow\n  category: ''\n  values:\n  - localhost\n  - 127.0.0.1\n";
1063        let entries = parse_secrets(content.as_bytes(), None)
1064            .expect("auto-provisioned secrets file with comment header must parse");
1065        assert_eq!(entries.len(), 1);
1066        assert_eq!(entries[0].kind, "allow");
1067        assert!(entries[0].values.contains(&"localhost".to_string()));
1068    }
1069
1070    #[test]
1071    fn detect_json_array() {
1072        assert_eq!(
1073            SecretsFormat::detect(b"[{\"pattern\": \"foo\"}]"),
1074            SecretsFormat::Json
1075        );
1076    }
1077
1078    #[test]
1079    fn detect_toml_fallback() {
1080        // TOML that doesn't open with '[' or '{' — must not be mistaken for YAML.
1081        assert_eq!(
1082            SecretsFormat::detect(b"# toml comment\nkey = \"value\""),
1083            SecretsFormat::Toml
1084        );
1085    }
1086
1087    // ---- Defaults ----
1088
1089    #[test]
1090    fn default_kind_is_literal() {
1091        let json = r#"[{"pattern": "foo"}]"#;
1092        let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
1093        assert_eq!(entries[0].kind, "literal");
1094    }
1095
1096    #[test]
1097    fn default_category_is_custom_secret() {
1098        let json = r#"[{"pattern": "foo"}]"#;
1099        let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
1100        assert_eq!(entries[0].category, "custom:secret");
1101    }
1102
1103    #[test]
1104    fn default_label_from_pattern() {
1105        let json = r#"[{"pattern": "short"}]"#;
1106        let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
1107        let (patterns, _) = entries_to_patterns(&entries);
1108        assert_eq!(patterns[0].label(), "short");
1109    }
1110
1111    // ---- looks_encrypted ----
1112
1113    #[test]
1114    fn looks_encrypted_json_plaintext() {
1115        assert!(!looks_encrypted(sample_json().as_bytes()));
1116    }
1117
1118    #[test]
1119    fn looks_encrypted_yaml_plaintext() {
1120        assert!(!looks_encrypted(sample_yaml().as_bytes()));
1121    }
1122
1123    #[test]
1124    fn looks_encrypted_toml_plaintext() {
1125        assert!(!looks_encrypted(sample_toml().as_bytes()));
1126    }
1127
1128    #[test]
1129    fn looks_encrypted_actual_encrypted() {
1130        let encrypted = encrypt_secrets(sample_json().as_bytes(), "pw").unwrap();
1131        assert!(looks_encrypted(&encrypted));
1132    }
1133
1134    #[test]
1135    fn looks_encrypted_too_short() {
1136        assert!(!looks_encrypted(&[0u8; 10]));
1137    }
1138
1139    // ---- load_secrets_auto ----
1140
1141    #[test]
1142    fn auto_load_plaintext_json() {
1143        let data = sample_json().as_bytes();
1144        let (((pats, errs), _allow), was_enc) =
1145            load_secrets_auto(data, None, Some(SecretsFormat::Json), false).unwrap();
1146        assert!(!was_enc);
1147        assert_eq!(pats.len(), 2);
1148        assert!(errs.is_empty());
1149    }
1150
1151    #[test]
1152    fn auto_load_encrypted_json() {
1153        let encrypted = encrypt_secrets(sample_json().as_bytes(), "pw").unwrap();
1154        let (((pats, errs), _allow), was_enc) =
1155            load_secrets_auto(&encrypted, Some("pw"), Some(SecretsFormat::Json), false).unwrap();
1156        assert!(was_enc);
1157        assert_eq!(pats.len(), 2);
1158        assert!(errs.is_empty());
1159    }
1160
1161    #[test]
1162    fn auto_load_force_plaintext() {
1163        let data = sample_json().as_bytes();
1164        let (((pats, _), _allow), was_enc) =
1165            load_secrets_auto(data, None, Some(SecretsFormat::Json), true).unwrap();
1166        assert!(!was_enc);
1167        assert_eq!(pats.len(), 2);
1168    }
1169
1170    #[test]
1171    fn auto_load_encrypted_no_password_fails() {
1172        let encrypted = encrypt_secrets(sample_json().as_bytes(), "pw").unwrap();
1173        let result = load_secrets_auto(&encrypted, None, None, false);
1174        assert!(result.is_err());
1175    }
1176
1177    #[test]
1178    fn parse_secrets_rejects_oversized_input() {
1179        // Construct input just over the 10 MiB cap.
1180        let oversized = vec![b' '; MAX_SECRETS_PLAINTEXT_BYTES + 1];
1181        let result = parse_secrets(&oversized, None);
1182        assert!(result.is_err());
1183        let msg = result.unwrap_err().to_string();
1184        assert!(
1185            msg.contains("exceeding") || msg.contains("limit"),
1186            "unexpected error message: {msg}"
1187        );
1188    }
1189
1190    #[test]
1191    fn parse_secrets_accepts_input_at_limit() {
1192        // Valid JSON just at the cap boundary — should succeed or fail on
1193        // parse, not on the size check. We use a tiny valid payload here
1194        // to confirm the size gate does not block small files.
1195        let tiny = b"[]";
1196        let result = parse_secrets(tiny, Some(SecretsFormat::Json));
1197        assert!(
1198            result.is_ok(),
1199            "unexpected error: {:?}",
1200            result.unwrap_err()
1201        );
1202    }
1203
1204    #[test]
1205    fn truncate_label_at_boundary() {
1206        let short = "a".repeat(32);
1207        assert_eq!(truncate_label(&short), short);
1208
1209        let long = "a".repeat(33);
1210        let truncated = truncate_label(&long);
1211        assert!(truncated.ends_with('…'), "expected ellipsis: {truncated}");
1212        // Character count (not byte count) must be within the limit.
1213        // The trailing '…' is 1 char; the rest must be < MAX_LABEL_CHARS.
1214        assert!(
1215            truncated.chars().count() <= MAX_LABEL_CHARS,
1216            "char count {} exceeds limit: {truncated}",
1217            truncated.chars().count()
1218        );
1219    }
1220
1221    // ---- Multi-value allow entries ----
1222
1223    #[test]
1224    fn allow_single_pattern_field() {
1225        let json = r#"[{"kind":"allow","pattern":"localhost"}]"#;
1226        let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
1227        let patterns = extract_allow_patterns(&entries);
1228        assert_eq!(patterns, vec!["localhost"]);
1229    }
1230
1231    #[test]
1232    fn allow_values_list_used_instead_of_pattern() {
1233        let json = r#"[{"kind":"allow","values":["localhost","true","false","null"]}]"#;
1234        let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
1235        let patterns = extract_allow_patterns(&entries);
1236        assert_eq!(patterns, vec!["localhost", "true", "false", "null"]);
1237    }
1238
1239    #[test]
1240    fn allow_values_list_yaml() {
1241        let yaml =
1242            "- kind: allow\n  values:\n    - localhost\n    - \"127.0.0.1\"\n    - \"0.0.0.0\"\n";
1243        let entries = parse_secrets(yaml.as_bytes(), Some(SecretsFormat::Yaml)).unwrap();
1244        let patterns = extract_allow_patterns(&entries);
1245        assert_eq!(patterns, vec!["localhost", "127.0.0.1", "0.0.0.0"]);
1246    }
1247
1248    #[test]
1249    fn allow_values_list_toml() {
1250        let toml = "[[secrets]]\nkind = \"allow\"\nvalues = [\"localhost\", \"true\", \"false\"]\n";
1251        let entries = parse_secrets(toml.as_bytes(), Some(SecretsFormat::Toml)).unwrap();
1252        let patterns = extract_allow_patterns(&entries);
1253        assert_eq!(patterns, vec!["localhost", "true", "false"]);
1254    }
1255
1256    #[test]
1257    fn allow_mixed_single_and_multi_value_entries() {
1258        let json = r#"[
1259            {"kind":"allow","pattern":"localhost"},
1260            {"kind":"allow","values":["true","false","null"]},
1261            {"kind":"allow","pattern":"*.internal"}
1262        ]"#;
1263        let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
1264        let patterns = extract_allow_patterns(&entries);
1265        assert_eq!(
1266            patterns,
1267            vec!["localhost", "true", "false", "null", "*.internal"]
1268        );
1269    }
1270
1271    #[test]
1272    fn allow_entries_skipped_by_entries_to_patterns() {
1273        let json = r#"[
1274            {"pattern":"secret","kind":"literal"},
1275            {"kind":"allow","values":["localhost","true"]}
1276        ]"#;
1277        let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
1278        let (patterns, errors) = entries_to_patterns(&entries);
1279        assert_eq!(patterns.len(), 1);
1280        assert!(errors.is_empty());
1281        assert_eq!(patterns[0].label(), "secret");
1282    }
1283
1284    #[test]
1285    fn allow_empty_values_falls_back_to_pattern() {
1286        // An entry with an empty `values` list should still use `pattern`.
1287        let json = r#"[{"kind":"allow","pattern":"localhost","values":[]}]"#;
1288        let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
1289        let patterns = extract_allow_patterns(&entries);
1290        assert_eq!(patterns, vec!["localhost"]);
1291    }
1292
1293    // ── kind: field-name ─────────────────────────────────────────────────────
1294
1295    #[test]
1296    fn field_name_entries_skipped_by_entries_to_patterns() {
1297        // kind:field-name entries must not produce ScanPatterns — they are
1298        // handled separately as FieldNameSignals injected into profiles.
1299        let json = r#"[
1300            {"pattern":"secret","kind":"literal"},
1301            {"pattern":"^password$","kind":"field-name","threshold":3.0}
1302        ]"#;
1303        let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
1304        let (patterns, errors) = entries_to_patterns(&entries);
1305        assert_eq!(
1306            patterns.len(),
1307            1,
1308            "only the literal entry should produce a pattern"
1309        );
1310        assert!(errors.is_empty());
1311        assert_eq!(patterns[0].label(), "secret");
1312    }
1313
1314    #[test]
1315    fn field_name_entry_parses_correctly() {
1316        let yaml = "- kind: field-name\n  pattern: \"^(password|secret)$\"\n  threshold: 3.0\n  label: my-signal\n";
1317        let entries = parse_secrets(yaml.as_bytes(), Some(SecretsFormat::Yaml)).unwrap();
1318        assert_eq!(entries.len(), 1);
1319        assert_eq!(entries[0].kind, "field-name");
1320        assert_eq!(entries[0].pattern, "^(password|secret)$");
1321        assert_eq!(entries[0].threshold, Some(3.0));
1322        assert_eq!(entries[0].label, Some("my-signal".into()));
1323    }
1324
1325    #[test]
1326    fn field_name_entry_not_extracted_as_allow_pattern() {
1327        // kind:field-name entries must not bleed into the allowlist.
1328        let json = r#"[{"pattern":"^password$","kind":"field-name"}]"#;
1329        let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
1330        let allow = extract_allow_patterns(&entries);
1331        assert!(allow.is_empty());
1332    }
1333}
sanitize_engine/secrets.rs

sanitize_engine/
secrets.rs