Skip to main content

devboy_storage/
validation.rs

1//! Format validation per [ADR-021] §6 (the "validation framework"
2//! umbrella) and [ADR-020] §3 (`format_regex` / `pattern_id`
3//! metadata fields).
4//!
5//! The check is *format-only* and *lazy on demand*: it compares a
6//! candidate value against the regex declared by the global-index
7//! entry (`format_regex`) or, failing that, by the pattern referenced
8//! through `pattern_id`. A `Liveness` probe — actually asking the
9//! upstream whether the value still works — is a separate phase
10//! (P9.2) and lives in its own module.
11//!
12//! ## Resolution order
13//!
14//! 1. If the entry has an inline `format_regex`, compile and use it.
15//! 2. Otherwise, if the entry has a `pattern_id`, look it up in the
16//!    [`devboy_secret_patterns::Catalogue`] and use its
17//!    `SecretPattern::format_regex`.
18//! 3. Otherwise, return [`FormatCheck::NoRule`] — the caller chose
19//!    not to declare a format, so the validator stays silent.
20//!
21//! Inline `format_regex` wins over `pattern_id` because a project may
22//! have a tighter shape in mind than the generic pattern (e.g. a
23//! regex that pins the prefix to a specific tenant id).
24//!
25//! ## What the validator does **not** do
26//!
27//! - **Compile patterns ahead of time.** Inline `format_regex`
28//!   compiles on every call. The catalogue's `format_regex()` is
29//!   already cached behind `OnceLock`. A full ahead-of-time compile
30//!   of all index entries can land later if profiling shows it's
31//!   needed; for now `secrets validate <path>` is on demand and the
32//!   cost is acceptable.
33//! - **Probe upstream liveness.** That's P9.2. A pattern that is
34//!   well-formed but revoked still passes this check.
35//!
36//! [ADR-020]: https://github.com/meteora-pro/devboy-tools/blob/main/docs/architecture/adr/ADR-020-secret-manifest-and-alias-resolution.md
37//! [ADR-021]: https://github.com/meteora-pro/devboy-tools/blob/main/docs/architecture/adr/ADR-021-external-secret-sources.md
38
39use devboy_secret_patterns::Catalogue;
40
41use crate::index::IndexEntry;
42
43// =============================================================================
44// Public types
45// =============================================================================
46
47/// Outcome of [`validate_format`].
48#[derive(Debug, Clone, PartialEq, Eq)]
49pub enum FormatCheck {
50    /// No `format_regex` and no `pattern_id` (or the `pattern_id`
51    /// names a pattern that isn't in the catalogue). The caller
52    /// chose not to declare a shape; format validation is a no-op.
53    NoRule,
54    /// The value matched the resolved regex.
55    Ok {
56        /// Where the regex came from — `format_regex` inline on the
57        /// entry, or the `pattern_id` it resolved through.
58        source: FormatRuleSource,
59    },
60    /// The value did not match.
61    Mismatch {
62        /// Where the regex came from.
63        source: FormatRuleSource,
64        /// The regex pattern that was checked. Useful in error
65        /// messages — `{expected}` lets the user see exactly which
66        /// shape the system was expecting.
67        expected: String,
68    },
69    /// Something went wrong during validation — usually a regex
70    /// compile failure for an inline `format_regex`.
71    Error {
72        /// Human-readable detail.
73        message: String,
74    },
75}
76
77/// Provenance of the regex used by the check.
78#[derive(Debug, Clone, PartialEq, Eq)]
79pub enum FormatRuleSource {
80    /// Inline `format_regex` on the index entry.
81    Inline,
82    /// `pattern_id` resolved through the catalogue. The string is
83    /// the `id` we looked up.
84    PatternId(String),
85}
86
87// =============================================================================
88// Validator
89// =============================================================================
90
91/// Validate `value` against the format rule attached to `entry`.
92///
93/// Resolution order:
94///
95/// 1. `entry.format_regex` (inline) wins.
96/// 2. `entry.pattern_id` then resolves through `catalogue`.
97/// 3. Neither set → [`FormatCheck::NoRule`].
98///
99/// Inline regex compile errors surface as
100/// [`FormatCheck::Error`] — the caller decides whether that's a
101/// hard fail or a `doctor`-style warning.
102pub fn validate_format(entry: &IndexEntry, value: &str, catalogue: &Catalogue) -> FormatCheck {
103    if let Some(pattern) = entry.format_regex.as_deref() {
104        let re = match regex::Regex::new(pattern) {
105            Ok(r) => r,
106            Err(e) => {
107                return FormatCheck::Error {
108                    message: format!("invalid format_regex `{pattern}`: {e}"),
109                };
110            }
111        };
112        return if re.is_match(value) {
113            FormatCheck::Ok {
114                source: FormatRuleSource::Inline,
115            }
116        } else {
117            FormatCheck::Mismatch {
118                source: FormatRuleSource::Inline,
119                expected: pattern.to_owned(),
120            }
121        };
122    }
123
124    if let Some(id) = entry.pattern_id.as_deref() {
125        let pattern = match catalogue.find(id) {
126            Some(p) => p,
127            None => {
128                // Pattern referenced but not loaded — treat as
129                // "no rule" rather than Error. The recursion check
130                // (P5.5) and `doctor` already surface unresolved
131                // pattern_ids; the format validator should not
132                // double-fail on that.
133                return FormatCheck::NoRule;
134            }
135        };
136        let re = pattern.format_regex();
137        return if re.is_match(value) {
138            FormatCheck::Ok {
139                source: FormatRuleSource::PatternId(id.to_owned()),
140            }
141        } else {
142            FormatCheck::Mismatch {
143                source: FormatRuleSource::PatternId(id.to_owned()),
144                expected: re.as_str().to_owned(),
145            }
146        };
147    }
148
149    FormatCheck::NoRule
150}
151
152// =============================================================================
153// Tests
154// =============================================================================
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159    use crate::index::IndexEntry;
160    use devboy_secret_patterns::Catalogue;
161
162    fn empty_catalogue() -> Catalogue {
163        Catalogue::builtins_only()
164    }
165
166    fn entry_with_inline(regex: &str) -> IndexEntry {
167        IndexEntry {
168            format_regex: Some(regex.to_owned()),
169            ..IndexEntry::default()
170        }
171    }
172
173    fn entry_with_pattern_id(id: &str) -> IndexEntry {
174        IndexEntry {
175            pattern_id: Some(id.to_owned()),
176            ..IndexEntry::default()
177        }
178    }
179
180    // -- No rule ---------------------------------------------------
181
182    #[test]
183    fn no_rule_when_neither_field_is_set() {
184        let entry = IndexEntry::default();
185        let r = validate_format(&entry, "anything", &empty_catalogue());
186        assert!(matches!(r, FormatCheck::NoRule));
187    }
188
189    // -- Inline format_regex --------------------------------------
190
191    #[test]
192    fn inline_regex_matching_value_returns_ok_with_inline_source() {
193        let entry = entry_with_inline(r"^ghp_[A-Za-z0-9]{36}$");
194        let value = "ghp_abcdefghijklmnopqrstuvwxyz0123456789";
195        let r = validate_format(&entry, value, &empty_catalogue());
196        match r {
197            FormatCheck::Ok { source } => assert_eq!(source, FormatRuleSource::Inline),
198            other => panic!("expected Ok, got {other:?}"),
199        }
200    }
201
202    #[test]
203    fn inline_regex_mismatching_value_returns_mismatch_with_pattern_echoed() {
204        let entry = entry_with_inline(r"^ghp_[A-Za-z0-9]{36}$");
205        let r = validate_format(&entry, "not-a-token", &empty_catalogue());
206        match r {
207            FormatCheck::Mismatch { source, expected } => {
208                assert_eq!(source, FormatRuleSource::Inline);
209                assert_eq!(expected, r"^ghp_[A-Za-z0-9]{36}$");
210            }
211            other => panic!("expected Mismatch, got {other:?}"),
212        }
213    }
214
215    #[test]
216    fn invalid_inline_regex_surfaces_compile_error() {
217        // `[unterminated` — guaranteed compile failure across regex
218        // versions.
219        let entry = entry_with_inline("[unterminated");
220        let r = validate_format(&entry, "anything", &empty_catalogue());
221        match r {
222            FormatCheck::Error { message } => {
223                assert!(message.contains("invalid format_regex"));
224                assert!(message.contains("[unterminated"));
225            }
226            other => panic!("expected Error, got {other:?}"),
227        }
228    }
229
230    // -- pattern_id via catalogue ---------------------------------
231
232    #[test]
233    fn known_pattern_id_matches_a_real_token() {
234        // `github-pat` is a built-in (P2.2) — `ghp_` followed by
235        // 36 ASCII alphanum chars. Use a synthetic well-formed
236        // sample.
237        let entry = entry_with_pattern_id("github-pat");
238        let value = "ghp_abcdefghijklmnopqrstuvwxyz0123456789";
239        let r = validate_format(&entry, value, &empty_catalogue());
240        match r {
241            FormatCheck::Ok { source } => {
242                assert_eq!(source, FormatRuleSource::PatternId("github-pat".into()));
243            }
244            other => panic!("expected Ok, got {other:?}"),
245        }
246    }
247
248    #[test]
249    fn known_pattern_id_rejects_gibberish() {
250        let entry = entry_with_pattern_id("github-pat");
251        let r = validate_format(&entry, "definitely-not-a-token", &empty_catalogue());
252        match r {
253            FormatCheck::Mismatch { source, .. } => {
254                assert_eq!(source, FormatRuleSource::PatternId("github-pat".into()));
255            }
256            other => panic!("expected Mismatch, got {other:?}"),
257        }
258    }
259
260    #[test]
261    fn unknown_pattern_id_treated_as_no_rule() {
262        // `pattern_id` references something the catalogue doesn't
263        // have. The format validator returns `NoRule` rather than
264        // failing — `doctor` is the place that flags missing
265        // pattern ids.
266        let entry = entry_with_pattern_id("not-a-real-pattern-id");
267        let r = validate_format(&entry, "anything", &empty_catalogue());
268        assert!(matches!(r, FormatCheck::NoRule));
269    }
270
271    // -- Inline beats pattern_id when both are set ---------------
272
273    #[test]
274    fn inline_format_regex_wins_over_pattern_id() {
275        // Both fields set. Inline pattern is restrictive
276        // (`tighter-prefix-`); the pattern_id (`github-pat`) is
277        // looser. The inline regex must be the one consulted.
278        let mut entry = entry_with_pattern_id("github-pat");
279        entry.format_regex = Some(r"^tighter-prefix-[a-z]+$".to_owned());
280
281        // Inline matches → Ok with Inline source (NOT PatternId).
282        let r = validate_format(&entry, "tighter-prefix-abc", &empty_catalogue());
283        match r {
284            FormatCheck::Ok { source } => assert_eq!(source, FormatRuleSource::Inline),
285            other => panic!("expected Inline Ok, got {other:?}"),
286        }
287
288        // Inline does NOT match — even if a github-pat-shaped
289        // value is supplied, the inline rule wins and rejects.
290        let r = validate_format(
291            &entry,
292            "ghp_abcdefghijklmnopqrstuvwxyz0123456789",
293            &empty_catalogue(),
294        );
295        assert!(matches!(
296            r,
297            FormatCheck::Mismatch {
298                source: FormatRuleSource::Inline,
299                ..
300            }
301        ));
302    }
303
304    // -- User patterns via Catalogue ------------------------------
305
306    /// Smoke test that a user-loaded pattern (added through a
307    /// `patterns.d/` TOML file) is reachable via `Catalogue::find`
308    /// and therefore consumed by the validator.
309    #[test]
310    fn user_pattern_via_catalogue_is_used() {
311        // We construct the catalogue by parsing an inline TOML
312        // pattern. `Catalogue::load` reads from a directory; we
313        // skip that path and use the same `parse_str` route the
314        // loader uses internally — but `parse_str` isn't part of
315        // the public API yet. Until it is, this test remains a
316        // compile-only smoke ensuring `validate_format` typechecks
317        // against the public `Catalogue`.
318        //
319        // The full user-pattern integration test will land in P9.4
320        // (`devboy secrets validate`), where the CLI builds a real
321        // catalogue from `patterns.d/`.
322        let entry = entry_with_pattern_id("user-defined-not-loaded");
323        let r = validate_format(&entry, "x", &empty_catalogue());
324        // Without the user file the catalogue can't find the id —
325        // contract is `NoRule`.
326        assert!(matches!(r, FormatCheck::NoRule));
327    }
328}