devboy_storage/validation.rs
1//! Format validation per [ADR-021] §6 (the "validation framework"
2//! umbrella) and [ADR-020] §3 (`format_regex` / `pattern_id`
3//! metadata fields).
4//!
5//! The check is *format-only* and *lazy on demand*: it compares a
6//! candidate value against the regex declared by the global-index
7//! entry (`format_regex`) or, failing that, by the pattern referenced
8//! through `pattern_id`. A `Liveness` probe — actually asking the
9//! upstream whether the value still works — is a separate phase
10//! (P9.2) and lives in its own module.
11//!
12//! ## Resolution order
13//!
14//! 1. If the entry has an inline `format_regex`, compile and use it.
15//! 2. Otherwise, if the entry has a `pattern_id`, look it up in the
16//! [`devboy_secret_patterns::Catalogue`] and use its
17//! `SecretPattern::format_regex`.
18//! 3. Otherwise, return [`FormatCheck::NoRule`] — the caller chose
19//! not to declare a format, so the validator stays silent.
20//!
21//! Inline `format_regex` wins over `pattern_id` because a project may
22//! have a tighter shape in mind than the generic pattern (e.g. a
23//! regex that pins the prefix to a specific tenant id).
24//!
25//! ## What the validator does **not** do
26//!
27//! - **Compile patterns ahead of time.** Inline `format_regex`
28//! compiles on every call. The catalogue's `format_regex()` is
29//! already cached behind `OnceLock`. A full ahead-of-time compile
30//! of all index entries can land later if profiling shows it's
31//! needed; for now `secrets validate <path>` is on demand and the
32//! cost is acceptable.
33//! - **Probe upstream liveness.** That's P9.2. A pattern that is
34//! well-formed but revoked still passes this check.
35//!
36//! [ADR-020]: https://github.com/meteora-pro/devboy-tools/blob/main/docs/architecture/adr/ADR-020-secret-manifest-and-alias-resolution.md
37//! [ADR-021]: https://github.com/meteora-pro/devboy-tools/blob/main/docs/architecture/adr/ADR-021-external-secret-sources.md
38
39use devboy_secret_patterns::Catalogue;
40
41use crate::index::IndexEntry;
42
43// =============================================================================
44// Public types
45// =============================================================================
46
47/// Outcome of [`validate_format`].
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub enum FormatCheck {
50 /// No `format_regex` and no `pattern_id` (or the `pattern_id`
51 /// names a pattern that isn't in the catalogue). The caller
52 /// chose not to declare a shape; format validation is a no-op.
53 NoRule,
54 /// The value matched the resolved regex.
55 Ok {
56 /// Where the regex came from — `format_regex` inline on the
57 /// entry, or the `pattern_id` it resolved through.
58 source: FormatRuleSource,
59 },
60 /// The value did not match.
61 Mismatch {
62 /// Where the regex came from.
63 source: FormatRuleSource,
64 /// The regex pattern that was checked. Useful in error
65 /// messages — `{expected}` lets the user see exactly which
66 /// shape the system was expecting.
67 expected: String,
68 },
69 /// Something went wrong during validation — usually a regex
70 /// compile failure for an inline `format_regex`.
71 Error {
72 /// Human-readable detail.
73 message: String,
74 },
75}
76
77/// Provenance of the regex used by the check.
78#[derive(Debug, Clone, PartialEq, Eq)]
79pub enum FormatRuleSource {
80 /// Inline `format_regex` on the index entry.
81 Inline,
82 /// `pattern_id` resolved through the catalogue. The string is
83 /// the `id` we looked up.
84 PatternId(String),
85}
86
87// =============================================================================
88// Validator
89// =============================================================================
90
91/// Validate `value` against the format rule attached to `entry`.
92///
93/// Resolution order:
94///
95/// 1. `entry.format_regex` (inline) wins.
96/// 2. `entry.pattern_id` then resolves through `catalogue`.
97/// 3. Neither set → [`FormatCheck::NoRule`].
98///
99/// Inline regex compile errors surface as
100/// [`FormatCheck::Error`] — the caller decides whether that's a
101/// hard fail or a `doctor`-style warning.
102pub fn validate_format(entry: &IndexEntry, value: &str, catalogue: &Catalogue) -> FormatCheck {
103 if let Some(pattern) = entry.format_regex.as_deref() {
104 let re = match regex::Regex::new(pattern) {
105 Ok(r) => r,
106 Err(e) => {
107 return FormatCheck::Error {
108 message: format!("invalid format_regex `{pattern}`: {e}"),
109 };
110 }
111 };
112 return if re.is_match(value) {
113 FormatCheck::Ok {
114 source: FormatRuleSource::Inline,
115 }
116 } else {
117 FormatCheck::Mismatch {
118 source: FormatRuleSource::Inline,
119 expected: pattern.to_owned(),
120 }
121 };
122 }
123
124 if let Some(id) = entry.pattern_id.as_deref() {
125 let pattern = match catalogue.find(id) {
126 Some(p) => p,
127 None => {
128 // Pattern referenced but not loaded — treat as
129 // "no rule" rather than Error. The recursion check
130 // (P5.5) and `doctor` already surface unresolved
131 // pattern_ids; the format validator should not
132 // double-fail on that.
133 return FormatCheck::NoRule;
134 }
135 };
136 let re = pattern.format_regex();
137 return if re.is_match(value) {
138 FormatCheck::Ok {
139 source: FormatRuleSource::PatternId(id.to_owned()),
140 }
141 } else {
142 FormatCheck::Mismatch {
143 source: FormatRuleSource::PatternId(id.to_owned()),
144 expected: re.as_str().to_owned(),
145 }
146 };
147 }
148
149 FormatCheck::NoRule
150}
151
152// =============================================================================
153// Tests
154// =============================================================================
155
156#[cfg(test)]
157mod tests {
158 use super::*;
159 use crate::index::IndexEntry;
160 use devboy_secret_patterns::Catalogue;
161
162 fn empty_catalogue() -> Catalogue {
163 Catalogue::builtins_only()
164 }
165
166 fn entry_with_inline(regex: &str) -> IndexEntry {
167 IndexEntry {
168 format_regex: Some(regex.to_owned()),
169 ..IndexEntry::default()
170 }
171 }
172
173 fn entry_with_pattern_id(id: &str) -> IndexEntry {
174 IndexEntry {
175 pattern_id: Some(id.to_owned()),
176 ..IndexEntry::default()
177 }
178 }
179
180 // -- No rule ---------------------------------------------------
181
182 #[test]
183 fn no_rule_when_neither_field_is_set() {
184 let entry = IndexEntry::default();
185 let r = validate_format(&entry, "anything", &empty_catalogue());
186 assert!(matches!(r, FormatCheck::NoRule));
187 }
188
189 // -- Inline format_regex --------------------------------------
190
191 #[test]
192 fn inline_regex_matching_value_returns_ok_with_inline_source() {
193 let entry = entry_with_inline(r"^ghp_[A-Za-z0-9]{36}$");
194 let value = "ghp_abcdefghijklmnopqrstuvwxyz0123456789";
195 let r = validate_format(&entry, value, &empty_catalogue());
196 match r {
197 FormatCheck::Ok { source } => assert_eq!(source, FormatRuleSource::Inline),
198 other => panic!("expected Ok, got {other:?}"),
199 }
200 }
201
202 #[test]
203 fn inline_regex_mismatching_value_returns_mismatch_with_pattern_echoed() {
204 let entry = entry_with_inline(r"^ghp_[A-Za-z0-9]{36}$");
205 let r = validate_format(&entry, "not-a-token", &empty_catalogue());
206 match r {
207 FormatCheck::Mismatch { source, expected } => {
208 assert_eq!(source, FormatRuleSource::Inline);
209 assert_eq!(expected, r"^ghp_[A-Za-z0-9]{36}$");
210 }
211 other => panic!("expected Mismatch, got {other:?}"),
212 }
213 }
214
215 #[test]
216 fn invalid_inline_regex_surfaces_compile_error() {
217 // `[unterminated` — guaranteed compile failure across regex
218 // versions.
219 let entry = entry_with_inline("[unterminated");
220 let r = validate_format(&entry, "anything", &empty_catalogue());
221 match r {
222 FormatCheck::Error { message } => {
223 assert!(message.contains("invalid format_regex"));
224 assert!(message.contains("[unterminated"));
225 }
226 other => panic!("expected Error, got {other:?}"),
227 }
228 }
229
230 // -- pattern_id via catalogue ---------------------------------
231
232 #[test]
233 fn known_pattern_id_matches_a_real_token() {
234 // `github-pat` is a built-in (P2.2) — `ghp_` followed by
235 // 36 ASCII alphanum chars. Use a synthetic well-formed
236 // sample.
237 let entry = entry_with_pattern_id("github-pat");
238 let value = "ghp_abcdefghijklmnopqrstuvwxyz0123456789";
239 let r = validate_format(&entry, value, &empty_catalogue());
240 match r {
241 FormatCheck::Ok { source } => {
242 assert_eq!(source, FormatRuleSource::PatternId("github-pat".into()));
243 }
244 other => panic!("expected Ok, got {other:?}"),
245 }
246 }
247
248 #[test]
249 fn known_pattern_id_rejects_gibberish() {
250 let entry = entry_with_pattern_id("github-pat");
251 let r = validate_format(&entry, "definitely-not-a-token", &empty_catalogue());
252 match r {
253 FormatCheck::Mismatch { source, .. } => {
254 assert_eq!(source, FormatRuleSource::PatternId("github-pat".into()));
255 }
256 other => panic!("expected Mismatch, got {other:?}"),
257 }
258 }
259
260 #[test]
261 fn unknown_pattern_id_treated_as_no_rule() {
262 // `pattern_id` references something the catalogue doesn't
263 // have. The format validator returns `NoRule` rather than
264 // failing — `doctor` is the place that flags missing
265 // pattern ids.
266 let entry = entry_with_pattern_id("not-a-real-pattern-id");
267 let r = validate_format(&entry, "anything", &empty_catalogue());
268 assert!(matches!(r, FormatCheck::NoRule));
269 }
270
271 // -- Inline beats pattern_id when both are set ---------------
272
273 #[test]
274 fn inline_format_regex_wins_over_pattern_id() {
275 // Both fields set. Inline pattern is restrictive
276 // (`tighter-prefix-`); the pattern_id (`github-pat`) is
277 // looser. The inline regex must be the one consulted.
278 let mut entry = entry_with_pattern_id("github-pat");
279 entry.format_regex = Some(r"^tighter-prefix-[a-z]+$".to_owned());
280
281 // Inline matches → Ok with Inline source (NOT PatternId).
282 let r = validate_format(&entry, "tighter-prefix-abc", &empty_catalogue());
283 match r {
284 FormatCheck::Ok { source } => assert_eq!(source, FormatRuleSource::Inline),
285 other => panic!("expected Inline Ok, got {other:?}"),
286 }
287
288 // Inline does NOT match — even if a github-pat-shaped
289 // value is supplied, the inline rule wins and rejects.
290 let r = validate_format(
291 &entry,
292 "ghp_abcdefghijklmnopqrstuvwxyz0123456789",
293 &empty_catalogue(),
294 );
295 assert!(matches!(
296 r,
297 FormatCheck::Mismatch {
298 source: FormatRuleSource::Inline,
299 ..
300 }
301 ));
302 }
303
304 // -- User patterns via Catalogue ------------------------------
305
306 /// Smoke test that a user-loaded pattern (added through a
307 /// `patterns.d/` TOML file) is reachable via `Catalogue::find`
308 /// and therefore consumed by the validator.
309 #[test]
310 fn user_pattern_via_catalogue_is_used() {
311 // We construct the catalogue by parsing an inline TOML
312 // pattern. `Catalogue::load` reads from a directory; we
313 // skip that path and use the same `parse_str` route the
314 // loader uses internally — but `parse_str` isn't part of
315 // the public API yet. Until it is, this test remains a
316 // compile-only smoke ensuring `validate_format` typechecks
317 // against the public `Catalogue`.
318 //
319 // The full user-pattern integration test will land in P9.4
320 // (`devboy secrets validate`), where the CLI builds a real
321 // catalogue from `patterns.d/`.
322 let entry = entry_with_pattern_id("user-defined-not-loaded");
323 let r = validate_format(&entry, "x", &empty_catalogue());
324 // Without the user file the catalogue can't find the id —
325 // contract is `NoRule`.
326 assert!(matches!(r, FormatCheck::NoRule));
327 }
328}