detritus_server/schemas.rs
1//! Per-tenant schema registry for crash and log payload validation.
2//!
3//! [`SchemaRegistry`] holds a compiled (or, in this phase, a no-op marker)
4//! schema for every `(project, SchemaKind)` pair that was declared in the
5//! tokens configuration file. It is populated once at startup via
6//! [`SchemaRegistry::load`] and then held inside [`crate::server::AppState`]
7//! for the lifetime of the process.
8//!
9//! # Validation contract
10//!
11//! [`SchemaRegistry::validate`] returns `Ok(())` for any `(project, kind)`
12//! pair that has no registered schema (accept-by-default — tenants without
13//! a schema are not gated). For registered pairs it runs the compiled
14//! `jsonschema` validator and returns [`SchemaError::Validation`] with all
15//! collected errors on failure.
16
17use std::{collections::HashMap, path::PathBuf, sync::Arc};
18
19pub(crate) use detritus_protocol::schema::SchemaError;
20pub use detritus_protocol::schema::SchemaKind;
21use jsonschema::Validator;
22use tokio::fs;
23
24/// One `[[schema]]` entry as parsed from `tokens.toml`.
25///
26/// `path` is always resolved relative to the tokens config file's parent
27/// directory by the caller before being passed to [`SchemaRegistry::load`].
28#[derive(Debug, Clone)]
29pub struct ProjectSchemaEntry {
30 /// Project identifier; must match an existing `[[token]].project` value.
31 pub project: String,
32 /// The payload kind this schema governs.
33 pub kind: SchemaKind,
34 /// Absolute (already-resolved) path to the JSON Schema document on disk.
35 pub path: PathBuf,
36}
37
38/// A compiled JSON Schema validator behind an `Arc` so registry clones
39/// stay cheap.
40#[derive(Clone)]
41struct CompiledSchema(Arc<Validator>);
42
43impl std::fmt::Debug for CompiledSchema {
44 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45 f.debug_struct("CompiledSchema").finish_non_exhaustive()
46 }
47}
48
49/// Registry of per-tenant JSON Schema validators, keyed by `(project, kind)`.
50///
51/// Obtain an instance with [`SchemaRegistry::empty`] (for tests / configs
52/// without schema entries) or [`SchemaRegistry::load`] (production path).
53#[derive(Debug, Clone)]
54pub struct SchemaRegistry {
55 schemas: HashMap<(String, SchemaKind), CompiledSchema>,
56}
57
58impl SchemaRegistry {
59 /// Returns an empty registry that accepts any payload for any project.
60 ///
61 /// Used by tests and by tokens configs that omit `[[schema]]` tables.
62 pub fn empty() -> Self {
63 Self {
64 schemas: HashMap::new(),
65 }
66 }
67
68 /// Loads schema files from disk and compiles them with `jsonschema`.
69 ///
70 /// `entries` must already have their `path` fields resolved to absolute
71 /// paths (i.e. relative to the tokens config's parent directory, not to
72 /// the current working directory).
73 ///
74 /// # Errors
75 ///
76 /// Returns [`SchemaError::Io`] if a file cannot be read, or
77 /// [`SchemaError::Parse`] if the file content is not valid JSON or fails
78 /// to compile as a JSON Schema.
79 pub async fn load(entries: &[ProjectSchemaEntry]) -> Result<Self, SchemaError> {
80 let mut schemas = HashMap::with_capacity(entries.len());
81 for entry in entries {
82 let raw = fs::read_to_string(&entry.path)
83 .await
84 .map_err(|source| SchemaError::Io {
85 path: entry.path.clone(),
86 source,
87 })?;
88 let value: serde_json::Value =
89 serde_json::from_str(&raw).map_err(|source| SchemaError::Parse {
90 path: entry.path.clone(),
91 source,
92 })?;
93 let validator = Validator::new(&value).map_err(|err| SchemaError::Parse {
94 path: entry.path.clone(),
95 source: serde::de::Error::custom(err.to_string()),
96 })?;
97 schemas.insert(
98 (entry.project.clone(), entry.kind),
99 CompiledSchema(Arc::new(validator)),
100 );
101 }
102 Ok(Self { schemas })
103 }
104
105 /// Validates `payload` against the schema registered for `(project, kind)`.
106 ///
107 /// Returns `Ok(())` when no schema is registered for the pair
108 /// (accept-by-default — tenants without a schema are not gated). Returns
109 /// [`SchemaError::Validation`] when a registered schema rejects the
110 /// payload, with all collected errors.
111 pub fn validate(
112 &self,
113 project: &str,
114 kind: SchemaKind,
115 payload: &serde_json::Value,
116 ) -> Result<(), SchemaError> {
117 let Some(compiled) = self.schemas.get(&(project.to_owned(), kind)) else {
118 return Ok(());
119 };
120 let errors: Vec<String> = compiled
121 .0
122 .iter_errors(payload)
123 .map(|e| e.to_string())
124 .collect();
125 if errors.is_empty() {
126 Ok(())
127 } else {
128 Err(SchemaError::Validation { kind, errors })
129 }
130 }
131}
132
133#[cfg(test)]
134mod tests {
135 use std::path::Path;
136
137 use detritus_protocol::schema::SchemaKind;
138 use serde_json::json;
139 use tempfile::TempDir;
140
141 use super::{ProjectSchemaEntry, SchemaRegistry};
142
143 // ------------------------------------------------------------------
144 // empty_registry_validates_anything
145 // ------------------------------------------------------------------
146
147 /// An empty registry accepts any payload for any project/kind. This is
148 /// the accept-by-default contract: tenants without a registered schema
149 /// are not gated.
150 #[test]
151 fn empty_registry_validates_anything() {
152 let registry = SchemaRegistry::empty();
153 let payload = json!({"key": "value"});
154 assert!(
155 registry
156 .validate("acme", SchemaKind::CrashMetadata, &payload)
157 .is_ok(),
158 "empty registry should accept any payload",
159 );
160 assert!(registry.schemas.is_empty());
161 }
162
163 // ------------------------------------------------------------------
164 // load_two_schemas_resolves_relative_paths
165 // ------------------------------------------------------------------
166
167 /// The loader reads files from disk and registers them under their keys.
168 /// This test also exercises the path-resolution logic: we pass absolute
169 /// paths (as `load_security_config` does after joining against the tokens
170 /// config parent), confirming the loader does not re-join against CWD.
171 #[tokio::test]
172 async fn load_two_schemas_resolves_relative_paths() {
173 // Locate the fixture directory next to this file's crate root.
174 let fixtures = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/schemas");
175 let crash_path = fixtures.join("crash.schema.json");
176 let log_path = fixtures.join("log.schema.json");
177
178 let entries = vec![
179 ProjectSchemaEntry {
180 project: "acme".to_owned(),
181 kind: SchemaKind::CrashMetadata,
182 path: crash_path,
183 },
184 ProjectSchemaEntry {
185 project: "acme".to_owned(),
186 kind: SchemaKind::LogAttributes,
187 path: log_path,
188 },
189 ];
190
191 let registry = SchemaRegistry::load(&entries)
192 .await
193 .expect("load should succeed");
194
195 assert_eq!(registry.schemas.len(), 2, "both schemas should be loaded");
196 // Validate returns Ok for registered entries (no-op contract).
197 let payload = json!({});
198 assert!(
199 registry
200 .validate("acme", SchemaKind::CrashMetadata, &payload)
201 .is_ok(),
202 "registered schema should validate (no-op Ok)",
203 );
204 assert!(
205 registry
206 .validate("acme", SchemaKind::LogAttributes, &payload)
207 .is_ok(),
208 "registered schema should validate (no-op Ok)",
209 );
210 }
211
212 // ------------------------------------------------------------------
213 // unknown_project_accepts_by_default
214 // ------------------------------------------------------------------
215
216 /// Calling validate for a project that was never registered returns
217 /// `Ok(())` — accept-by-default. Only projects with a registered schema
218 /// are subject to validation.
219 #[tokio::test]
220 async fn unknown_project_accepts_by_default() {
221 let fixtures = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/schemas");
222 let entries = vec![ProjectSchemaEntry {
223 project: "known-project".to_owned(),
224 kind: SchemaKind::CrashMetadata,
225 path: fixtures.join("crash.schema.json"),
226 }];
227 let registry = SchemaRegistry::load(&entries)
228 .await
229 .expect("load should succeed");
230
231 let payload = json!({"x": 1});
232 assert!(
233 registry
234 .validate("nope", SchemaKind::CrashMetadata, &payload)
235 .is_ok(),
236 "unknown project should be accepted by default",
237 );
238 }
239
240 // ------------------------------------------------------------------
241 // tokens_config_without_schemas_loads_empty_registry
242 // ------------------------------------------------------------------
243
244 /// Tokens config files that have no `[[schema]]` table must still parse
245 /// successfully and produce an empty registry. This is the backward-
246 /// compat guarantee that prevents existing deployments from breaking.
247 #[tokio::test]
248 async fn tokens_config_without_schemas_loads_empty_registry() {
249 use std::io::Write as _;
250 // Write a minimal tokens.toml with no [[schema]] table.
251 let dir = TempDir::new().expect("tempdir");
252 let tokens_path = dir.path().join("tokens.toml");
253 {
254 let mut f = std::fs::File::create(&tokens_path).expect("create tokens.toml");
255 writeln!(
256 f,
257 r#"
258[[token]]
259id = "t1"
260secret = "$argon2id$v=19$m=19456,t=2,p=1$AAAAAAAAAAAAAAAAAAAAAA$bm90YXJlYWxoYXNoYnV0cGFzc2VzZm9ybWF0Y2hlY2s"
261project = "proj"
262source_prefix = "src/"
263"#
264 )
265 .expect("write");
266 }
267 let config = crate::auth::load_security_config(&tokens_path)
268 .await
269 .expect("load_security_config should succeed");
270 assert!(
271 config.schema_registry.schemas.is_empty(),
272 "no [[schema]] entries → empty registry",
273 );
274 }
275
276 // ------------------------------------------------------------------
277 // tokens_config_schema_project_mismatch_errors
278 // ------------------------------------------------------------------
279
280 /// A `[[schema]]` entry whose `project` does not match any `[[token]]`
281 /// must cause `load_security_config` to fail with
282 /// `AuthConfigError::SchemaProjectMismatch`.
283 #[tokio::test]
284 async fn tokens_config_schema_project_mismatch_errors() {
285 use std::io::Write as _;
286 let dir = TempDir::new().expect("tempdir");
287 let schema_path = dir.path().join("crash.schema.json");
288 std::fs::write(&schema_path, r#"{"type":"object"}"#).expect("write schema");
289
290 let tokens_path = dir.path().join("tokens.toml");
291 {
292 let mut f = std::fs::File::create(&tokens_path).expect("create tokens.toml");
293 writeln!(
294 f,
295 r#"
296[[token]]
297id = "t1"
298secret = "$argon2id$v=19$m=19456,t=2,p=1$AAAAAAAAAAAAAAAAAAAAAA$bm90YXJlYWxoYXNoYnV0cGFzc2VzZm9ybWF0Y2hlY2s"
299project = "real-project"
300source_prefix = "src/"
301
302[[schema]]
303project = "ghost-project"
304kind = "crash_metadata"
305path = "crash.schema.json"
306"#
307 )
308 .expect("write");
309 }
310 let err = crate::auth::load_security_config(&tokens_path)
311 .await
312 .expect_err("mismatched project should fail");
313 assert!(
314 matches!(
315 err,
316 crate::auth::AuthConfigError::SchemaProjectMismatch { ref project, .. }
317 if project == "ghost-project"
318 ),
319 "unexpected error: {err:?}",
320 );
321 }
322}