1use std::borrow::Cow;
37use std::collections::HashSet;
38use std::fs;
39use std::path::{Path, PathBuf};
40
41use regex::{Regex, RegexBuilder};
42use serde::{Deserialize, Serialize};
43use thiserror::Error;
44use tracing::warn;
45
46use crate::builtin;
47use crate::{PatternMetadata, SecretPattern, Severity};
48
49pub const USER_PATTERNS_SUBDIR: &str = "patterns.d";
52
53#[derive(Debug, Default, Clone, Deserialize, Serialize)]
59pub struct UserPatternFile {
60 #[serde(default, rename = "pattern")]
62 pub patterns: Vec<UserPatternEntry>,
63}
64
65#[derive(Debug, Clone, Deserialize, Serialize)]
68#[serde(deny_unknown_fields)]
69pub struct UserPatternEntry {
70 pub id: String,
73 pub display_name: String,
76 pub format_regex: String,
79 pub severity: Severity,
82
83 #[serde(default, skip_serializing_if = "Option::is_none")]
89 pub provider_id: Option<String>,
90 #[serde(default, skip_serializing_if = "Option::is_none")]
93 pub retrieval_url_template: Option<String>,
94 #[serde(default, skip_serializing_if = "Option::is_none")]
96 pub default_expiry_days: Option<u32>,
97 #[serde(default, skip_serializing_if = "Vec::is_empty")]
100 pub scopes_hint: Vec<String>,
101}
102
103#[derive(Debug)]
110pub struct UserPattern {
111 id: String,
112 display_name: String,
113 severity: Severity,
114 regex: Regex,
115 metadata: Option<PatternMetadata>,
116}
117
118impl UserPattern {
119 pub fn id(&self) -> &str {
123 &self.id
124 }
125}
126
127impl SecretPattern for UserPattern {
128 fn id(&self) -> &str {
129 &self.id
130 }
131 fn display_name(&self) -> &str {
132 &self.display_name
133 }
134 fn format_regex(&self) -> &Regex {
135 &self.regex
136 }
137 fn severity(&self) -> Severity {
138 self.severity
139 }
140 fn metadata(&self) -> Option<&PatternMetadata> {
141 self.metadata.as_ref()
142 }
143 }
145
146#[derive(Debug, Error)]
152pub enum LoadError {
153 #[error("failed to read user patterns at {path}: {source}")]
155 Read {
156 path: PathBuf,
158 #[source]
160 source: std::io::Error,
161 },
162
163 #[error("failed to parse user patterns file at {path}: {source}")]
165 Parse {
166 path: PathBuf,
168 #[source]
170 source: toml::de::Error,
171 },
172
173 #[error("invalid regex for user pattern '{id}' in {path}: {source}")]
175 BadRegex {
176 path: PathBuf,
178 id: String,
180 #[source]
182 source: regex::Error,
183 },
184
185 #[error("user pattern id '{id}' in {path} must be lowercase kebab-case (`[a-z][a-z0-9-]*`)")]
188 BadId {
189 path: PathBuf,
191 id: String,
193 },
194
195 #[error("user pattern id '{id}' is declared twice (first in {first}, then in {second})")]
199 DuplicateUserId {
200 id: String,
202 first: PathBuf,
204 second: PathBuf,
206 },
207}
208
209#[derive(Debug, Clone, PartialEq, Eq)]
212pub struct LoadWarning {
213 pub kind: LoadWarningKind,
215 pub subject: String,
217}
218
219#[derive(Debug, Clone, PartialEq, Eq)]
221pub enum LoadWarningKind {
222 ShadowsBuiltin,
226 SkippedNonToml,
230}
231
232#[derive(Debug, Default)]
239pub struct Catalogue {
240 user_patterns: Vec<UserPattern>,
241 shadowed: HashSet<String>,
243 warnings: Vec<LoadWarning>,
244}
245
246impl Catalogue {
247 pub fn builtins_only() -> Self {
250 Self::default()
251 }
252
253 pub fn load(dir: &Path) -> Result<Self, LoadError> {
258 let mut cat = Self::default();
259 if !dir.exists() {
260 return Ok(cat);
261 }
262
263 let read = fs::read_dir(dir).map_err(|e| LoadError::Read {
264 path: dir.to_path_buf(),
265 source: e,
266 })?;
267
268 let mut paths: Vec<PathBuf> = read.filter_map(|res| res.ok().map(|e| e.path())).collect();
272 paths.sort();
273
274 let mut id_origin: std::collections::HashMap<String, PathBuf> =
276 std::collections::HashMap::new();
277
278 for path in paths {
279 if path.extension().and_then(|s| s.to_str()) != Some("toml") {
280 cat.warnings.push(LoadWarning {
281 kind: LoadWarningKind::SkippedNonToml,
282 subject: path.display().to_string(),
283 });
284 continue;
285 }
286
287 let body = fs::read_to_string(&path).map_err(|e| LoadError::Read {
288 path: path.clone(),
289 source: e,
290 })?;
291 let parsed: UserPatternFile = toml::from_str(&body).map_err(|e| LoadError::Parse {
292 path: path.clone(),
293 source: e,
294 })?;
295
296 for entry in parsed.patterns {
297 if !is_kebab_id(&entry.id) {
298 return Err(LoadError::BadId {
299 path: path.clone(),
300 id: entry.id,
301 });
302 }
303
304 if let Some(prev) = id_origin.get(&entry.id) {
305 return Err(LoadError::DuplicateUserId {
306 id: entry.id,
307 first: prev.clone(),
308 second: path.clone(),
309 });
310 }
311
312 let regex = RegexBuilder::new(&entry.format_regex)
322 .size_limit(64 * 1024)
323 .build()
324 .map_err(|e| LoadError::BadRegex {
325 path: path.clone(),
326 id: entry.id.clone(),
327 source: e,
328 })?;
329
330 let metadata = if entry.provider_id.is_some()
331 || entry.retrieval_url_template.is_some()
332 || entry.default_expiry_days.is_some()
333 || !entry.scopes_hint.is_empty()
334 {
335 Some(PatternMetadata {
336 provider_id: Cow::Owned(entry.provider_id.unwrap_or_default()),
337 retrieval_url_template: Cow::Owned(
338 entry.retrieval_url_template.unwrap_or_default(),
339 ),
340 default_expiry_days: entry.default_expiry_days,
341 scopes_hint: entry.scopes_hint.into_iter().map(Cow::Owned).collect(),
342 })
343 } else {
344 None
345 };
346
347 if builtin::find(&entry.id).is_some() {
348 cat.shadowed.insert(entry.id.clone());
349 cat.warnings.push(LoadWarning {
350 kind: LoadWarningKind::ShadowsBuiltin,
351 subject: entry.id.clone(),
352 });
353 warn!(
354 id = %entry.id,
355 path = %path.display(),
356 "user pattern shadows built-in entry"
357 );
358 }
359
360 id_origin.insert(entry.id.clone(), path.clone());
361 cat.user_patterns.push(UserPattern {
362 id: entry.id,
363 display_name: entry.display_name,
364 severity: entry.severity,
365 regex,
366 metadata,
367 });
368 }
369 }
370
371 Ok(cat)
372 }
373
374 pub fn iter(&self) -> Vec<&dyn SecretPattern> {
378 let mut out: Vec<&dyn SecretPattern> = self
379 .user_patterns
380 .iter()
381 .map(|p| p as &dyn SecretPattern)
382 .collect();
383 for b in builtin::builtins() {
384 if !self.shadowed.contains(b.id()) {
385 out.push(b);
386 }
387 }
388 out
389 }
390
391 pub fn find(&self, id: &str) -> Option<&dyn SecretPattern> {
394 if let Some(p) = self.user_patterns.iter().find(|p| p.id() == id) {
395 return Some(p as &dyn SecretPattern);
396 }
397 builtin::find(id)
398 }
399
400 pub fn warnings(&self) -> &[LoadWarning] {
403 &self.warnings
404 }
405
406 pub fn has_user_patterns(&self) -> bool {
408 !self.user_patterns.is_empty()
409 }
410}
411
412fn is_kebab_id(id: &str) -> bool {
421 let bytes = id.as_bytes();
422 if bytes.is_empty() {
423 return false;
424 }
425 if !bytes[0].is_ascii_lowercase() {
426 return false;
427 }
428 bytes
429 .iter()
430 .skip(1)
431 .all(|&b| b.is_ascii_lowercase() || b.is_ascii_digit() || b == b'-')
432}
433
434#[cfg(test)]
439mod tests {
440 use super::*;
441
442 fn write_toml(dir: &Path, name: &str, body: &str) -> PathBuf {
443 let p = dir.join(name);
444 std::fs::write(&p, body).expect("write fixture");
445 p
446 }
447
448 #[test]
451 fn missing_directory_yields_empty_catalogue() {
452 let dir = tempfile::tempdir().unwrap();
453 let cat = Catalogue::load(&dir.path().join("nonexistent")).unwrap();
454 assert!(!cat.has_user_patterns());
455 assert!(cat.warnings.is_empty());
456 }
457
458 #[test]
459 fn empty_directory_yields_empty_catalogue() {
460 let dir = tempfile::tempdir().unwrap();
461 let cat = Catalogue::load(dir.path()).unwrap();
462 assert!(!cat.has_user_patterns());
463 assert!(cat.warnings.is_empty());
464 }
465
466 #[test]
467 fn loads_single_user_pattern() {
468 let dir = tempfile::tempdir().unwrap();
469 write_toml(
470 dir.path(),
471 "internal.toml",
472 r#"
473[[pattern]]
474id = "internal-mfa-token"
475display_name = "Internal MFA Token"
476format_regex = "^mfa_[A-Z0-9]{40}$"
477severity = "high"
478provider_id = "internal"
479retrieval_url_template = "https://mfa.example.internal/tokens"
480default_expiry_days = 180
481scopes_hint = ["read", "write"]
482"#,
483 );
484
485 let cat = Catalogue::load(dir.path()).unwrap();
486 assert!(cat.has_user_patterns());
487 let p = cat.find("internal-mfa-token").expect("found");
488 assert_eq!(p.id(), "internal-mfa-token");
489 assert_eq!(p.display_name(), "Internal MFA Token");
490 assert_eq!(p.severity(), Severity::High);
491 assert!(
492 p.format_regex()
493 .is_match("mfa_ABCDEFGHIJ0123456789ABCDEFGHIJ0123456789")
494 );
495 let m = p.metadata().expect("metadata");
496 assert_eq!(m.provider_id.as_ref(), "internal");
497 assert_eq!(
498 m.retrieval_url_template.as_ref(),
499 "https://mfa.example.internal/tokens"
500 );
501 assert_eq!(m.default_expiry_days, Some(180));
502 assert_eq!(m.scopes_hint.len(), 2);
503 }
504
505 #[test]
506 fn user_pattern_without_metadata_fields_has_no_metadata() {
507 let dir = tempfile::tempdir().unwrap();
508 write_toml(
509 dir.path(),
510 "minimal.toml",
511 r#"
512[[pattern]]
513id = "minimal-x"
514display_name = "Minimal X"
515format_regex = "^x_[a-z]{8}$"
516severity = "low"
517"#,
518 );
519
520 let cat = Catalogue::load(dir.path()).unwrap();
521 let p = cat.find("minimal-x").unwrap();
522 assert!(p.metadata().is_none());
523 }
524
525 #[test]
526 fn loads_multiple_files_in_sorted_order() {
527 let dir = tempfile::tempdir().unwrap();
528 write_toml(
529 dir.path(),
530 "b.toml",
531 r#"
532[[pattern]]
533id = "second"
534display_name = "Second"
535format_regex = "^.+$"
536severity = "low"
537"#,
538 );
539 write_toml(
540 dir.path(),
541 "a.toml",
542 r#"
543[[pattern]]
544id = "first"
545display_name = "First"
546format_regex = "^.+$"
547severity = "low"
548"#,
549 );
550
551 let cat = Catalogue::load(dir.path()).unwrap();
552 assert_eq!(cat.user_patterns[0].id(), "first");
554 assert_eq!(cat.user_patterns[1].id(), "second");
555 }
556
557 #[test]
558 fn skips_non_toml_files_with_warning() {
559 let dir = tempfile::tempdir().unwrap();
560 std::fs::write(dir.path().join("readme.md"), "not toml").unwrap();
561 write_toml(
562 dir.path(),
563 "real.toml",
564 r#"
565[[pattern]]
566id = "real-x"
567display_name = "Real"
568format_regex = "^.+$"
569severity = "low"
570"#,
571 );
572
573 let cat = Catalogue::load(dir.path()).unwrap();
574 assert_eq!(cat.user_patterns.len(), 1);
575 assert!(
576 cat.warnings
577 .iter()
578 .any(|w| matches!(w.kind, LoadWarningKind::SkippedNonToml))
579 );
580 }
581
582 #[test]
585 fn user_pattern_shadows_builtin_with_warning() {
586 let dir = tempfile::tempdir().unwrap();
587 write_toml(
588 dir.path(),
589 "shadow.toml",
590 r#"
591[[pattern]]
592id = "github-pat"
593display_name = "Custom GitHub PAT"
594format_regex = "^my-custom-gh-.+$"
595severity = "high"
596"#,
597 );
598
599 let cat = Catalogue::load(dir.path()).unwrap();
600 let p = cat.find("github-pat").expect("found via user");
601 assert_eq!(p.display_name(), "Custom GitHub PAT");
602 assert!(p.format_regex().is_match("my-custom-gh-anything"));
603 assert!(!p.format_regex().is_match("ghp_someValidLookingTokenString"));
604
605 let visible_ids: Vec<&str> = cat.iter().iter().map(|p| p.id()).collect();
607 let count = visible_ids.iter().filter(|id| **id == "github-pat").count();
609 assert_eq!(count, 1);
610 assert!(cat.warnings.iter().any(
611 |w| matches!(w.kind, LoadWarningKind::ShadowsBuiltin) && w.subject == "github-pat"
612 ));
613 }
614
615 #[test]
616 fn non_shadowing_user_patterns_coexist_with_builtins() {
617 let dir = tempfile::tempdir().unwrap();
618 write_toml(
619 dir.path(),
620 "x.toml",
621 r#"
622[[pattern]]
623id = "internal-x"
624display_name = "Internal X"
625format_regex = "^x_.+$"
626severity = "low"
627"#,
628 );
629
630 let cat = Catalogue::load(dir.path()).unwrap();
631 let visible: Vec<&dyn SecretPattern> = cat.iter();
632 assert_eq!(visible.len(), 32);
634 assert!(
636 !cat.warnings
637 .iter()
638 .any(|w| matches!(w.kind, LoadWarningKind::ShadowsBuiltin))
639 );
640 }
641
642 #[test]
645 fn rejects_bad_regex() {
646 let dir = tempfile::tempdir().unwrap();
647 write_toml(
648 dir.path(),
649 "bad.toml",
650 r#"
651[[pattern]]
652id = "bad-regex"
653display_name = "Bad Regex"
654format_regex = "^[unclosed$"
655severity = "low"
656"#,
657 );
658
659 let err = Catalogue::load(dir.path()).unwrap_err();
660 match err {
661 LoadError::BadRegex { id, .. } => assert_eq!(id, "bad-regex"),
662 other => panic!("expected BadRegex, got {other:?}"),
663 }
664 }
665
666 #[test]
667 fn rejects_bad_id_uppercase() {
668 let dir = tempfile::tempdir().unwrap();
669 write_toml(
670 dir.path(),
671 "bad.toml",
672 r#"
673[[pattern]]
674id = "BadId"
675display_name = "x"
676format_regex = "^.+$"
677severity = "low"
678"#,
679 );
680
681 let err = Catalogue::load(dir.path()).unwrap_err();
682 assert!(matches!(err, LoadError::BadId { .. }));
683 }
684
685 #[test]
686 fn rejects_bad_id_starts_with_digit() {
687 let dir = tempfile::tempdir().unwrap();
688 write_toml(
689 dir.path(),
690 "bad.toml",
691 r#"
692[[pattern]]
693id = "9bad"
694display_name = "x"
695format_regex = "^.+$"
696severity = "low"
697"#,
698 );
699 assert!(matches!(
700 Catalogue::load(dir.path()).unwrap_err(),
701 LoadError::BadId { .. }
702 ));
703 }
704
705 #[test]
706 fn rejects_duplicate_user_id_across_files() {
707 let dir = tempfile::tempdir().unwrap();
708 write_toml(
709 dir.path(),
710 "a.toml",
711 r#"
712[[pattern]]
713id = "dup"
714display_name = "First"
715format_regex = "^a$"
716severity = "low"
717"#,
718 );
719 write_toml(
720 dir.path(),
721 "b.toml",
722 r#"
723[[pattern]]
724id = "dup"
725display_name = "Second"
726format_regex = "^b$"
727severity = "low"
728"#,
729 );
730
731 let err = Catalogue::load(dir.path()).unwrap_err();
732 match err {
733 LoadError::DuplicateUserId { id, .. } => assert_eq!(id, "dup"),
734 other => panic!("expected DuplicateUserId, got {other:?}"),
735 }
736 }
737
738 #[test]
739 fn rejects_duplicate_user_id_within_file() {
740 let dir = tempfile::tempdir().unwrap();
741 write_toml(
742 dir.path(),
743 "a.toml",
744 r#"
745[[pattern]]
746id = "dup"
747display_name = "First"
748format_regex = "^a$"
749severity = "low"
750
751[[pattern]]
752id = "dup"
753display_name = "Second"
754format_regex = "^b$"
755severity = "low"
756"#,
757 );
758
759 let err = Catalogue::load(dir.path()).unwrap_err();
760 assert!(matches!(err, LoadError::DuplicateUserId { .. }));
761 }
762
763 #[test]
764 fn rejects_unknown_field() {
765 let dir = tempfile::tempdir().unwrap();
766 write_toml(
767 dir.path(),
768 "bad.toml",
769 r#"
770[[pattern]]
771id = "x"
772display_name = "x"
773format_regex = "^.+$"
774severity = "low"
775unknown_field = "wrong"
776"#,
777 );
778 assert!(matches!(
779 Catalogue::load(dir.path()).unwrap_err(),
780 LoadError::Parse { .. }
781 ));
782 }
783
784 #[test]
787 fn builtins_only_returns_thirty_one_patterns() {
788 let cat = Catalogue::builtins_only();
789 let visible = cat.iter();
790 assert_eq!(visible.len(), 31);
791 assert!(cat.warnings.is_empty());
792 }
793
794 #[test]
795 fn builtins_only_find_works() {
796 let cat = Catalogue::builtins_only();
797 let p = cat.find("github-pat").unwrap();
798 assert_eq!(p.id(), "github-pat");
799 assert!(cat.find("no-such-id").is_none());
800 }
801
802 #[test]
805 fn is_kebab_id_accepts_valid_ids() {
806 assert!(is_kebab_id("github-pat"));
807 assert!(is_kebab_id("a"));
808 assert!(is_kebab_id("a1"));
809 assert!(is_kebab_id("a-b-c"));
810 }
811
812 #[test]
813 fn is_kebab_id_rejects_invalid_ids() {
814 assert!(!is_kebab_id(""));
815 assert!(!is_kebab_id("Github"));
816 assert!(!is_kebab_id("9start"));
817 assert!(!is_kebab_id("with_underscore"));
818 assert!(!is_kebab_id("with.dot"));
819 assert!(!is_kebab_id("-leading-dash"));
820 }
821}