1use crate::category::Category;
69use crate::error::{Result, SanitizeError};
70use crate::scanner::ScanPattern;
71
72pub type PatternCompileResult = (Vec<ScanPattern>, Vec<(usize, SanitizeError)>);
75
76use aes_gcm::aead::{Aead, KeyInit};
77use aes_gcm::{Aes256Gcm, Nonce};
78use hmac::Hmac;
79use rand::RngCore;
80use serde::{Deserialize, Serialize};
81use sha2::Sha256;
82use zeroize::{Zeroize, Zeroizing};
83
84const SALT_LEN: usize = 32;
90
91const NONCE_LEN: usize = 12;
93
94const PBKDF2_ITERATIONS: u32 = 600_000;
96
97const MIN_ENCRYPTED_LEN: usize = SALT_LEN + NONCE_LEN + 16;
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
112pub struct SecretEntry {
113 pub pattern: String,
115
116 #[serde(default = "default_kind")]
118 pub kind: String,
119
120 #[serde(default = "default_category")]
126 pub category: String,
127
128 #[serde(default)]
131 pub label: Option<String>,
132}
133
134impl Drop for SecretEntry {
135 fn drop(&mut self) {
136 self.pattern.zeroize();
137 self.kind.zeroize();
138 self.category.zeroize();
139 if let Some(ref mut l) = self.label {
140 l.zeroize();
141 }
142 }
143}
144
145fn default_kind() -> String {
146 "literal".into()
147}
148
149fn default_category() -> String {
150 "custom:secret".into()
151}
152
153#[derive(Debug, Clone, Copy, PartialEq, Eq)]
155pub enum SecretsFormat {
156 Json,
157 Yaml,
158 Toml,
159}
160
161impl SecretsFormat {
162 pub fn from_extension(path: &str) -> Option<Self> {
164 let base = path.strip_suffix(".enc").unwrap_or(path);
166 let ext = std::path::Path::new(base).extension();
167 if ext.is_some_and(|e| e.eq_ignore_ascii_case("json")) {
168 Some(Self::Json)
169 } else if ext
170 .is_some_and(|e| e.eq_ignore_ascii_case("yaml") || e.eq_ignore_ascii_case("yml"))
171 {
172 Some(Self::Yaml)
173 } else if ext.is_some_and(|e| e.eq_ignore_ascii_case("toml")) {
174 Some(Self::Toml)
175 } else {
176 None
177 }
178 }
179
180 pub fn detect(content: &[u8]) -> Self {
182 let s = String::from_utf8_lossy(content);
183 let trimmed = s.trim_start();
184 if trimmed.starts_with('[') || trimmed.starts_with('{') {
185 Self::Json
188 } else if trimmed.starts_with('-') || trimmed.starts_with("---") {
189 Self::Yaml
190 } else {
191 Self::Toml
193 }
194 }
195}
196
197#[derive(Deserialize)]
203struct TomlSecrets {
204 secrets: Vec<SecretEntry>,
205}
206
207#[derive(Serialize)]
209struct TomlSecretsRef<'a> {
210 secrets: &'a [SecretEntry],
211}
212
213fn derive_key(password: &[u8], salt: &[u8]) -> Zeroizing<[u8; 32]> {
219 let mut key = Zeroizing::new([0u8; 32]);
220 pbkdf2::pbkdf2::<Hmac<Sha256>>(password, salt, PBKDF2_ITERATIONS, key.as_mut())
221 .expect("PBKDF2 output length is valid");
222 key
223}
224
225pub fn encrypt_secrets(plaintext: &[u8], password: &str) -> Result<Vec<u8>> {
249 if password.is_empty() {
250 return Err(SanitizeError::SecretsEmptyPassword);
251 }
252
253 let mut rng = rand::thread_rng();
254
255 let mut salt = [0u8; SALT_LEN];
257 rng.fill_bytes(&mut salt);
258
259 let mut nonce_bytes = [0u8; NONCE_LEN];
260 rng.fill_bytes(&mut nonce_bytes);
261 let nonce = Nonce::from_slice(&nonce_bytes);
262
263 let key = derive_key(password.as_bytes(), &salt);
265 let cipher = Aes256Gcm::new_from_slice(key.as_ref())
266 .map_err(|e| SanitizeError::SecretsCipherError(format!("cipher init: {}", e)))?;
267
268 let ciphertext = cipher
270 .encrypt(nonce, plaintext)
271 .map_err(|e| SanitizeError::SecretsCipherError(format!("encryption: {}", e)))?;
272
273 let mut output = Vec::with_capacity(SALT_LEN + NONCE_LEN + ciphertext.len());
275 output.extend_from_slice(&salt);
276 output.extend_from_slice(&nonce_bytes);
277 output.extend_from_slice(&ciphertext);
278
279 Ok(output)
280}
281
282pub fn decrypt_secrets(encrypted: &[u8], password: &str) -> Result<Zeroizing<Vec<u8>>> {
300 if encrypted.len() < MIN_ENCRYPTED_LEN {
301 return Err(SanitizeError::SecretsTooShort);
302 }
303
304 let salt = &encrypted[..SALT_LEN];
305 let nonce_bytes = &encrypted[SALT_LEN..SALT_LEN + NONCE_LEN];
306 let ciphertext = &encrypted[SALT_LEN + NONCE_LEN..];
307
308 let nonce = Nonce::from_slice(nonce_bytes);
309
310 let key = derive_key(password.as_bytes(), salt);
311 let cipher = Aes256Gcm::new_from_slice(key.as_ref())
312 .map_err(|e| SanitizeError::SecretsCipherError(format!("cipher init: {}", e)))?;
313
314 let plaintext = cipher.decrypt(nonce, ciphertext).map_err(|_| {
315 SanitizeError::SecretsDecryptFailed
316 })?;
317
318 Ok(Zeroizing::new(plaintext))
319}
320
321pub fn parse_secrets(plaintext: &[u8], format: Option<SecretsFormat>) -> Result<Vec<SecretEntry>> {
335 let fmt = format.unwrap_or_else(|| SecretsFormat::detect(plaintext));
336 let text = std::str::from_utf8(plaintext)
337 .map_err(|e| SanitizeError::SecretsInvalidUtf8(e.to_string()))?;
338
339 match fmt {
340 SecretsFormat::Json => serde_json::from_str(text).map_err(|e| {
341 SanitizeError::SecretsFormatError {
342 format: "JSON".into(),
343 message: e.to_string(),
344 }
345 }),
346 SecretsFormat::Yaml => serde_yaml_ng::from_str(text).map_err(|e| {
347 SanitizeError::SecretsFormatError {
348 format: "YAML".into(),
349 message: e.to_string(),
350 }
351 }),
352 SecretsFormat::Toml => {
353 let wrapper: TomlSecrets = toml::from_str(text).map_err(|e| {
354 SanitizeError::SecretsFormatError {
355 format: "TOML".into(),
356 message: e.to_string(),
357 }
358 })?;
359 Ok(wrapper.secrets)
360 }
361 }
362}
363
364pub fn serialize_secrets(entries: &[SecretEntry], format: SecretsFormat) -> Result<Vec<u8>> {
372 match format {
373 SecretsFormat::Json => serde_json::to_vec_pretty(entries).map_err(|e| {
374 SanitizeError::SecretsFormatError {
375 format: "JSON-serialize".into(),
376 message: e.to_string(),
377 }
378 }),
379 SecretsFormat::Yaml => serde_yaml_ng::to_string(entries)
380 .map(|s| s.into_bytes())
381 .map_err(|e| SanitizeError::SecretsFormatError {
382 format: "YAML-serialize".into(),
383 message: e.to_string(),
384 }),
385 SecretsFormat::Toml => {
386 let wrapper = TomlSecretsRef { secrets: entries };
387 toml::to_string_pretty(&wrapper)
388 .map(|s| s.into_bytes())
389 .map_err(|e| SanitizeError::SecretsFormatError {
390 format: "TOML-serialize".into(),
391 message: e.to_string(),
392 })
393 }
394 }
395}
396
397pub fn parse_category(s: &str) -> Category {
408 match s {
409 "email" => Category::Email,
410 "name" => Category::Name,
411 "phone" => Category::Phone,
412 "ipv4" => Category::IpV4,
413 "ipv6" => Category::IpV6,
414 "credit_card" => Category::CreditCard,
415 "ssn" => Category::Ssn,
416 "hostname" => Category::Hostname,
417 "mac_address" => Category::MacAddress,
418 "container_id" => Category::ContainerId,
419 "uuid" => Category::Uuid,
420 "jwt" => Category::Jwt,
421 "auth_token" => Category::AuthToken,
422 "file_path" => Category::FilePath,
423 "windows_sid" => Category::WindowsSid,
424 "url" => Category::Url,
425 "aws_arn" => Category::AwsArn,
426 "azure_resource_id" => Category::AzureResourceId,
427 other => {
428 let tag = other.strip_prefix("custom:").unwrap_or(other);
429 Category::Custom(tag.into())
430 }
431 }
432}
433
434fn zeroize_and_drop_entries(mut entries: Vec<SecretEntry>) {
443 for entry in &mut entries {
444 entry.pattern.zeroize();
445 entry.kind.zeroize();
446 entry.category.zeroize();
447 if let Some(ref mut l) = entry.label {
448 l.zeroize();
449 }
450 }
451 }
454
455pub fn entries_to_patterns(entries: &[SecretEntry]) -> PatternCompileResult {
460 let mut patterns = Vec::with_capacity(entries.len());
461 let mut errors = Vec::new();
462
463 for (i, entry) in entries.iter().enumerate() {
464 let category = parse_category(&entry.category);
465 let label = entry
466 .label
467 .clone()
468 .unwrap_or_else(|| truncate_label(&entry.pattern));
469
470 let result = match entry.kind.as_str() {
471 "regex" => ScanPattern::from_regex(&entry.pattern, category, label),
472 _ => ScanPattern::from_literal(&entry.pattern, category, label),
473 };
474
475 match result {
476 Ok(pat) => patterns.push(pat),
477 Err(e) => errors.push((i, e)),
478 }
479 }
480
481 (patterns, errors)
482}
483
484fn truncate_label(s: &str) -> String {
486 if s.len() <= 32 {
487 s.to_string()
488 } else {
489 format!("{}…", &s[..31])
490 }
491}
492
493pub fn load_encrypted_secrets(
522 encrypted_bytes: &[u8],
523 password: &str,
524 format: Option<SecretsFormat>,
525) -> Result<PatternCompileResult> {
526 let plaintext = decrypt_secrets(encrypted_bytes, password)?;
527 let entries = parse_secrets(&plaintext, format)?;
528 let result = entries_to_patterns(&entries);
529 zeroize_and_drop_entries(entries);
530 Ok(result)
531}
532
533pub fn load_plaintext_secrets(
555 plaintext: &[u8],
556 format: Option<SecretsFormat>,
557) -> Result<PatternCompileResult> {
558 let entries = parse_secrets(plaintext, format)?;
559 let result = entries_to_patterns(&entries);
560 zeroize_and_drop_entries(entries);
561 Ok(result)
562}
563
564pub fn looks_encrypted(data: &[u8]) -> bool {
578 if data.len() < MIN_ENCRYPTED_LEN {
579 return false;
582 }
583 if let Ok(text) = std::str::from_utf8(data) {
586 let trimmed = text.trim_start();
587 let has_marker = trimmed.starts_with('[')
590 || trimmed.starts_with('{')
591 || trimmed.starts_with('-')
592 || trimmed.starts_with('#');
593 if has_marker {
594 return false;
595 }
596 }
597 true
599}
600
601pub fn load_secrets_auto(
625 data: &[u8],
626 password: Option<&str>,
627 format: Option<SecretsFormat>,
628 force_plaintext: bool,
629) -> Result<(PatternCompileResult, bool)> {
630 if force_plaintext || !looks_encrypted(data) {
631 let result = load_plaintext_secrets(data, format)?;
632 Ok((result, false))
633 } else {
634 let pw = password.ok_or(SanitizeError::SecretsPasswordRequired)?;
635 let result = load_encrypted_secrets(data, pw, format)?;
636 Ok((result, true))
637 }
638}
639
640#[cfg(test)]
645mod tests {
646 use super::*;
647
648 fn sample_json() -> &'static str {
649 r#"[
650 {
651 "pattern": "alice@corp\\.com",
652 "kind": "regex",
653 "category": "email",
654 "label": "alice_email"
655 },
656 {
657 "pattern": "sk-proj-abc123secret",
658 "kind": "literal",
659 "category": "custom:api_key",
660 "label": "openai_key"
661 }
662 ]"#
663 }
664
665 fn sample_yaml() -> &'static str {
666 r#"- pattern: "alice@corp\\.com"
667 kind: regex
668 category: email
669 label: alice_email
670- pattern: sk-proj-abc123secret
671 kind: literal
672 category: "custom:api_key"
673 label: openai_key
674"#
675 }
676
677 fn sample_toml() -> &'static str {
678 r#"[[secrets]]
679pattern = "alice@corp\\.com"
680kind = "regex"
681category = "email"
682label = "alice_email"
683
684[[secrets]]
685pattern = "sk-proj-abc123secret"
686kind = "literal"
687category = "custom:api_key"
688label = "openai_key"
689"#
690 }
691
692 #[test]
695 fn parse_json_entries() {
696 let entries = parse_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
697 assert_eq!(entries.len(), 2);
698 assert_eq!(entries[0].kind, "regex");
699 assert_eq!(entries[0].category, "email");
700 assert_eq!(entries[1].kind, "literal");
701 }
702
703 #[test]
704 fn parse_yaml_entries() {
705 let entries = parse_secrets(sample_yaml().as_bytes(), Some(SecretsFormat::Yaml)).unwrap();
706 assert_eq!(entries.len(), 2);
707 assert_eq!(entries[0].label, Some("alice_email".into()));
708 }
709
710 #[test]
711 fn parse_toml_entries() {
712 let entries = parse_secrets(sample_toml().as_bytes(), Some(SecretsFormat::Toml)).unwrap();
713 assert_eq!(entries.len(), 2);
714 assert_eq!(entries[1].pattern, "sk-proj-abc123secret");
715 }
716
717 #[test]
718 fn parse_auto_detect_json() {
719 let entries = parse_secrets(sample_json().as_bytes(), None).unwrap();
720 assert_eq!(entries.len(), 2);
721 }
722
723 #[test]
724 fn parse_auto_detect_yaml() {
725 let entries = parse_secrets(sample_yaml().as_bytes(), None).unwrap();
726 assert_eq!(entries.len(), 2);
727 }
728
729 #[test]
732 fn parse_builtin_categories() {
733 assert_eq!(parse_category("email"), Category::Email);
734 assert_eq!(parse_category("ipv4"), Category::IpV4);
735 assert_eq!(parse_category("ssn"), Category::Ssn);
736 }
737
738 #[test]
739 fn parse_custom_category() {
740 match parse_category("custom:api_key") {
741 Category::Custom(tag) => assert_eq!(tag.as_str(), "api_key"),
742 other => panic!("expected Custom, got {:?}", other),
743 }
744 }
745
746 #[test]
747 fn parse_unknown_category_becomes_custom() {
748 match parse_category("foobar") {
749 Category::Custom(tag) => assert_eq!(tag.as_str(), "foobar"),
750 other => panic!("expected Custom, got {:?}", other),
751 }
752 }
753
754 #[test]
757 fn entries_to_patterns_success() {
758 let entries = parse_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
759 let (patterns, errors) = entries_to_patterns(&entries);
760 assert_eq!(patterns.len(), 2);
761 assert!(errors.is_empty());
762 }
763
764 #[test]
765 fn entries_to_patterns_bad_regex() {
766 let json = r#"[{"pattern": "[invalid(", "kind": "regex", "category": "email"}]"#;
767 let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
768 let (patterns, errors) = entries_to_patterns(&entries);
769 assert!(patterns.is_empty());
770 assert_eq!(errors.len(), 1);
771 assert_eq!(errors[0].0, 0);
772 }
773
774 #[test]
777 fn encrypt_decrypt_roundtrip() {
778 let plaintext = sample_json().as_bytes();
779 let password = "test-password-42";
780
781 let encrypted = encrypt_secrets(plaintext, password).unwrap();
782
783 assert!(encrypted.len() > plaintext.len());
785
786 let decrypted = decrypt_secrets(&encrypted, password).unwrap();
787 assert_eq!(decrypted.as_slice(), plaintext);
788 }
789
790 #[test]
791 fn decrypt_wrong_password_fails() {
792 let plaintext = b"hello";
793 let encrypted = encrypt_secrets(plaintext, "correct").unwrap();
794 let result = decrypt_secrets(&encrypted, "wrong");
795 assert!(result.is_err());
796 }
797
798 #[test]
799 fn decrypt_truncated_blob_fails() {
800 let result = decrypt_secrets(&[0u8; 10], "any");
801 assert!(result.is_err());
802 }
803
804 #[test]
805 fn decrypt_tampered_blob_fails() {
806 let plaintext = b"hello world";
807 let mut encrypted = encrypt_secrets(plaintext, "pw").unwrap();
808 let last = encrypted.len() - 1;
810 encrypted[last] ^= 0xFF;
811 let result = decrypt_secrets(&encrypted, "pw");
812 assert!(result.is_err());
813 }
814
815 #[test]
816 fn encrypt_empty_password_rejected() {
817 let result = encrypt_secrets(b"hello", "");
818 assert!(result.is_err());
819 }
820
821 #[test]
824 fn full_pipeline_json() {
825 let plaintext = sample_json().as_bytes();
826 let password = "pipeline-test";
827
828 let encrypted = encrypt_secrets(plaintext, password).unwrap();
829 let (patterns, errors) =
830 load_encrypted_secrets(&encrypted, password, Some(SecretsFormat::Json)).unwrap();
831
832 assert_eq!(patterns.len(), 2);
833 assert!(errors.is_empty());
834 assert_eq!(patterns[0].label(), "alice_email");
835 assert_eq!(patterns[1].label(), "openai_key");
836 }
837
838 #[test]
839 fn full_pipeline_yaml() {
840 let plaintext = sample_yaml().as_bytes();
841 let password = "yaml-test";
842
843 let encrypted = encrypt_secrets(plaintext, password).unwrap();
844 let (patterns, errors) =
845 load_encrypted_secrets(&encrypted, password, Some(SecretsFormat::Yaml)).unwrap();
846
847 assert_eq!(patterns.len(), 2);
848 assert!(errors.is_empty());
849 }
850
851 #[test]
852 fn full_pipeline_toml() {
853 let plaintext = sample_toml().as_bytes();
854 let password = "toml-test";
855
856 let encrypted = encrypt_secrets(plaintext, password).unwrap();
857 let (patterns, errors) =
858 load_encrypted_secrets(&encrypted, password, Some(SecretsFormat::Toml)).unwrap();
859
860 assert_eq!(patterns.len(), 2);
861 assert!(errors.is_empty());
862 }
863
864 #[test]
867 fn load_plaintext_secrets_works() {
868 let (patterns, errors) =
869 load_plaintext_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
870 assert_eq!(patterns.len(), 2);
871 assert!(errors.is_empty());
872 }
873
874 #[test]
877 fn serialize_roundtrip_json() {
878 let entries = parse_secrets(sample_json().as_bytes(), Some(SecretsFormat::Json)).unwrap();
879 let serialized = serialize_secrets(&entries, SecretsFormat::Json).unwrap();
880 let reparsed = parse_secrets(&serialized, Some(SecretsFormat::Json)).unwrap();
881 assert_eq!(entries.len(), reparsed.len());
882 assert_eq!(entries[0].pattern, reparsed[0].pattern);
883 }
884
885 #[test]
888 fn format_from_extension() {
889 assert_eq!(
890 SecretsFormat::from_extension("secrets.json"),
891 Some(SecretsFormat::Json)
892 );
893 assert_eq!(
894 SecretsFormat::from_extension("secrets.json.enc"),
895 Some(SecretsFormat::Json)
896 );
897 assert_eq!(
898 SecretsFormat::from_extension("secrets.yaml"),
899 Some(SecretsFormat::Yaml)
900 );
901 assert_eq!(
902 SecretsFormat::from_extension("secrets.yml.enc"),
903 Some(SecretsFormat::Yaml)
904 );
905 assert_eq!(
906 SecretsFormat::from_extension("secrets.toml"),
907 Some(SecretsFormat::Toml)
908 );
909 assert_eq!(SecretsFormat::from_extension("secrets.txt"), None);
910 }
911
912 #[test]
915 fn default_kind_is_literal() {
916 let json = r#"[{"pattern": "foo"}]"#;
917 let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
918 assert_eq!(entries[0].kind, "literal");
919 }
920
921 #[test]
922 fn default_category_is_custom_secret() {
923 let json = r#"[{"pattern": "foo"}]"#;
924 let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
925 assert_eq!(entries[0].category, "custom:secret");
926 }
927
928 #[test]
929 fn default_label_from_pattern() {
930 let json = r#"[{"pattern": "short"}]"#;
931 let entries = parse_secrets(json.as_bytes(), Some(SecretsFormat::Json)).unwrap();
932 let (patterns, _) = entries_to_patterns(&entries);
933 assert_eq!(patterns[0].label(), "short");
934 }
935
936 #[test]
939 fn looks_encrypted_json_plaintext() {
940 assert!(!looks_encrypted(sample_json().as_bytes()));
941 }
942
943 #[test]
944 fn looks_encrypted_yaml_plaintext() {
945 assert!(!looks_encrypted(sample_yaml().as_bytes()));
946 }
947
948 #[test]
949 fn looks_encrypted_toml_plaintext() {
950 assert!(!looks_encrypted(sample_toml().as_bytes()));
951 }
952
953 #[test]
954 fn looks_encrypted_actual_encrypted() {
955 let encrypted = encrypt_secrets(sample_json().as_bytes(), "pw").unwrap();
956 assert!(looks_encrypted(&encrypted));
957 }
958
959 #[test]
960 fn looks_encrypted_too_short() {
961 assert!(!looks_encrypted(&[0u8; 10]));
962 }
963
964 #[test]
967 fn auto_load_plaintext_json() {
968 let data = sample_json().as_bytes();
969 let ((pats, errs), was_enc) =
970 load_secrets_auto(data, None, Some(SecretsFormat::Json), false).unwrap();
971 assert!(!was_enc);
972 assert_eq!(pats.len(), 2);
973 assert!(errs.is_empty());
974 }
975
976 #[test]
977 fn auto_load_encrypted_json() {
978 let encrypted = encrypt_secrets(sample_json().as_bytes(), "pw").unwrap();
979 let ((pats, errs), was_enc) =
980 load_secrets_auto(&encrypted, Some("pw"), Some(SecretsFormat::Json), false).unwrap();
981 assert!(was_enc);
982 assert_eq!(pats.len(), 2);
983 assert!(errs.is_empty());
984 }
985
986 #[test]
987 fn auto_load_force_plaintext() {
988 let data = sample_json().as_bytes();
989 let ((pats, _), was_enc) =
990 load_secrets_auto(data, None, Some(SecretsFormat::Json), true).unwrap();
991 assert!(!was_enc);
992 assert_eq!(pats.len(), 2);
993 }
994
995 #[test]
996 fn auto_load_encrypted_no_password_fails() {
997 let encrypted = encrypt_secrets(sample_json().as_bytes(), "pw").unwrap();
998 let result = load_secrets_auto(&encrypted, None, None, false);
999 assert!(result.is_err());
1000 }
1001}