Skip to main content

detritus_server/
schemas.rs

1//! Per-tenant schema registry for crash and log payload validation.
2//!
3//! [`SchemaRegistry`] holds a compiled (or, in this phase, a no-op marker)
4//! schema for every `(project, SchemaKind)` pair that was declared in the
5//! tokens configuration file.  It is populated once at startup via
6//! [`SchemaRegistry::load`] and then held inside [`crate::server::AppState`]
7//! for the lifetime of the process.
8//!
9//! # Validation contract
10//!
11//! [`SchemaRegistry::validate`] returns `Ok(())` for any `(project, kind)`
12//! pair that has no registered schema (accept-by-default — tenants without
13//! a schema are not gated). For registered pairs it runs the compiled
14//! `jsonschema` validator and returns [`SchemaError::Validation`] with all
15//! collected errors on failure.
16
17use std::{collections::HashMap, path::PathBuf, sync::Arc};
18
19pub(crate) use detritus_protocol::schema::SchemaError;
20pub use detritus_protocol::schema::SchemaKind;
21use jsonschema::Validator;
22use tokio::fs;
23
24/// One `[[schema]]` entry as parsed from `tokens.toml`.
25///
26/// `path` is always resolved relative to the tokens config file's parent
27/// directory by the caller before being passed to [`SchemaRegistry::load`].
28#[derive(Debug, Clone)]
29pub struct ProjectSchemaEntry {
30    /// Project identifier; must match an existing `[[token]].project` value.
31    pub project: String,
32    /// The payload kind this schema governs.
33    pub kind: SchemaKind,
34    /// Absolute (already-resolved) path to the JSON Schema document on disk.
35    pub path: PathBuf,
36}
37
38/// A compiled JSON Schema validator behind an `Arc` so registry clones
39/// stay cheap.
40#[derive(Clone)]
41struct CompiledSchema(Arc<Validator>);
42
43impl std::fmt::Debug for CompiledSchema {
44    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
45        f.debug_struct("CompiledSchema").finish_non_exhaustive()
46    }
47}
48
49/// Registry of per-tenant JSON Schema validators, keyed by `(project, kind)`.
50///
51/// Obtain an instance with [`SchemaRegistry::empty`] (for tests / configs
52/// without schema entries) or [`SchemaRegistry::load`] (production path).
53#[derive(Debug, Clone)]
54pub struct SchemaRegistry {
55    schemas: HashMap<(String, SchemaKind), CompiledSchema>,
56}
57
58impl SchemaRegistry {
59    /// Returns an empty registry that accepts any payload for any project.
60    ///
61    /// Used by tests and by tokens configs that omit `[[schema]]` tables.
62    pub fn empty() -> Self {
63        Self {
64            schemas: HashMap::new(),
65        }
66    }
67
68    /// Loads schema files from disk and compiles them with `jsonschema`.
69    ///
70    /// `entries` must already have their `path` fields resolved to absolute
71    /// paths (i.e. relative to the tokens config's parent directory, not to
72    /// the current working directory).
73    ///
74    /// # Errors
75    ///
76    /// Returns [`SchemaError::Io`] if a file cannot be read, or
77    /// [`SchemaError::Parse`] if the file content is not valid JSON or fails
78    /// to compile as a JSON Schema.
79    pub async fn load(entries: &[ProjectSchemaEntry]) -> Result<Self, SchemaError> {
80        let mut schemas = HashMap::with_capacity(entries.len());
81        for entry in entries {
82            let raw = fs::read_to_string(&entry.path)
83                .await
84                .map_err(|source| SchemaError::Io {
85                    path: entry.path.clone(),
86                    source,
87                })?;
88            let value: serde_json::Value =
89                serde_json::from_str(&raw).map_err(|source| SchemaError::Parse {
90                    path: entry.path.clone(),
91                    source,
92                })?;
93            let validator = Validator::new(&value).map_err(|err| SchemaError::Parse {
94                path: entry.path.clone(),
95                source: serde::de::Error::custom(err.to_string()),
96            })?;
97            schemas.insert(
98                (entry.project.clone(), entry.kind),
99                CompiledSchema(Arc::new(validator)),
100            );
101        }
102        Ok(Self { schemas })
103    }
104
105    /// Validates `payload` against the schema registered for `(project, kind)`.
106    ///
107    /// Returns `Ok(())` when no schema is registered for the pair
108    /// (accept-by-default — tenants without a schema are not gated). Returns
109    /// [`SchemaError::Validation`] when a registered schema rejects the
110    /// payload, with all collected errors.
111    pub fn validate(
112        &self,
113        project: &str,
114        kind: SchemaKind,
115        payload: &serde_json::Value,
116    ) -> Result<(), SchemaError> {
117        let Some(compiled) = self.schemas.get(&(project.to_owned(), kind)) else {
118            return Ok(());
119        };
120        let errors: Vec<String> = compiled
121            .0
122            .iter_errors(payload)
123            .map(|e| e.to_string())
124            .collect();
125        if errors.is_empty() {
126            Ok(())
127        } else {
128            Err(SchemaError::Validation { kind, errors })
129        }
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use std::path::Path;
136
137    use detritus_protocol::schema::SchemaKind;
138    use serde_json::json;
139    use tempfile::TempDir;
140
141    use super::{ProjectSchemaEntry, SchemaRegistry};
142
143    // ------------------------------------------------------------------
144    // empty_registry_validates_anything
145    // ------------------------------------------------------------------
146
147    /// An empty registry accepts any payload for any project/kind. This is
148    /// the accept-by-default contract: tenants without a registered schema
149    /// are not gated.
150    #[test]
151    fn empty_registry_validates_anything() {
152        let registry = SchemaRegistry::empty();
153        let payload = json!({"key": "value"});
154        assert!(
155            registry
156                .validate("acme", SchemaKind::CrashMetadata, &payload)
157                .is_ok(),
158            "empty registry should accept any payload",
159        );
160        assert!(registry.schemas.is_empty());
161    }
162
163    // ------------------------------------------------------------------
164    // load_two_schemas_resolves_relative_paths
165    // ------------------------------------------------------------------
166
167    /// The loader reads files from disk and registers them under their keys.
168    /// This test also exercises the path-resolution logic: we pass absolute
169    /// paths (as `load_security_config` does after joining against the tokens
170    /// config parent), confirming the loader does not re-join against CWD.
171    #[tokio::test]
172    async fn load_two_schemas_resolves_relative_paths() {
173        // Locate the fixture directory next to this file's crate root.
174        let fixtures = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/schemas");
175        let crash_path = fixtures.join("crash.schema.json");
176        let log_path = fixtures.join("log.schema.json");
177
178        let entries = vec![
179            ProjectSchemaEntry {
180                project: "acme".to_owned(),
181                kind: SchemaKind::CrashMetadata,
182                path: crash_path,
183            },
184            ProjectSchemaEntry {
185                project: "acme".to_owned(),
186                kind: SchemaKind::LogAttributes,
187                path: log_path,
188            },
189        ];
190
191        let registry = SchemaRegistry::load(&entries)
192            .await
193            .expect("load should succeed");
194
195        assert_eq!(registry.schemas.len(), 2, "both schemas should be loaded");
196        // Validate returns Ok for registered entries (no-op contract).
197        let payload = json!({});
198        assert!(
199            registry
200                .validate("acme", SchemaKind::CrashMetadata, &payload)
201                .is_ok(),
202            "registered schema should validate (no-op Ok)",
203        );
204        assert!(
205            registry
206                .validate("acme", SchemaKind::LogAttributes, &payload)
207                .is_ok(),
208            "registered schema should validate (no-op Ok)",
209        );
210    }
211
212    // ------------------------------------------------------------------
213    // unknown_project_accepts_by_default
214    // ------------------------------------------------------------------
215
216    /// Calling validate for a project that was never registered returns
217    /// `Ok(())` — accept-by-default. Only projects with a registered schema
218    /// are subject to validation.
219    #[tokio::test]
220    async fn unknown_project_accepts_by_default() {
221        let fixtures = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/schemas");
222        let entries = vec![ProjectSchemaEntry {
223            project: "known-project".to_owned(),
224            kind: SchemaKind::CrashMetadata,
225            path: fixtures.join("crash.schema.json"),
226        }];
227        let registry = SchemaRegistry::load(&entries)
228            .await
229            .expect("load should succeed");
230
231        let payload = json!({"x": 1});
232        assert!(
233            registry
234                .validate("nope", SchemaKind::CrashMetadata, &payload)
235                .is_ok(),
236            "unknown project should be accepted by default",
237        );
238    }
239
240    // ------------------------------------------------------------------
241    // tokens_config_without_schemas_loads_empty_registry
242    // ------------------------------------------------------------------
243
244    /// Tokens config files that have no `[[schema]]` table must still parse
245    /// successfully and produce an empty registry.  This is the backward-
246    /// compat guarantee that prevents existing deployments from breaking.
247    #[tokio::test]
248    async fn tokens_config_without_schemas_loads_empty_registry() {
249        use std::io::Write as _;
250        // Write a minimal tokens.toml with no [[schema]] table.
251        let dir = TempDir::new().expect("tempdir");
252        let tokens_path = dir.path().join("tokens.toml");
253        {
254            let mut f = std::fs::File::create(&tokens_path).expect("create tokens.toml");
255            writeln!(
256                f,
257                r#"
258[[token]]
259id = "t1"
260secret = "$argon2id$v=19$m=19456,t=2,p=1$AAAAAAAAAAAAAAAAAAAAAA$bm90YXJlYWxoYXNoYnV0cGFzc2VzZm9ybWF0Y2hlY2s"
261project = "proj"
262source_prefix = "src/"
263"#
264            )
265            .expect("write");
266        }
267        let config = crate::auth::load_security_config(&tokens_path)
268            .await
269            .expect("load_security_config should succeed");
270        assert!(
271            config.schema_registry.schemas.is_empty(),
272            "no [[schema]] entries → empty registry",
273        );
274    }
275
276    // ------------------------------------------------------------------
277    // tokens_config_schema_project_mismatch_errors
278    // ------------------------------------------------------------------
279
280    /// A `[[schema]]` entry whose `project` does not match any `[[token]]`
281    /// must cause `load_security_config` to fail with
282    /// `AuthConfigError::SchemaProjectMismatch`.
283    #[tokio::test]
284    async fn tokens_config_schema_project_mismatch_errors() {
285        use std::io::Write as _;
286        let dir = TempDir::new().expect("tempdir");
287        let schema_path = dir.path().join("crash.schema.json");
288        std::fs::write(&schema_path, r#"{"type":"object"}"#).expect("write schema");
289
290        let tokens_path = dir.path().join("tokens.toml");
291        {
292            let mut f = std::fs::File::create(&tokens_path).expect("create tokens.toml");
293            writeln!(
294                f,
295                r#"
296[[token]]
297id = "t1"
298secret = "$argon2id$v=19$m=19456,t=2,p=1$AAAAAAAAAAAAAAAAAAAAAA$bm90YXJlYWxoYXNoYnV0cGFzc2VzZm9ybWF0Y2hlY2s"
299project = "real-project"
300source_prefix = "src/"
301
302[[schema]]
303project = "ghost-project"
304kind = "crash_metadata"
305path = "crash.schema.json"
306"#
307            )
308            .expect("write");
309        }
310        let err = crate::auth::load_security_config(&tokens_path)
311            .await
312            .expect_err("mismatched project should fail");
313        assert!(
314            matches!(
315                err,
316                crate::auth::AuthConfigError::SchemaProjectMismatch { ref project, .. }
317                if project == "ghost-project"
318            ),
319            "unexpected error: {err:?}",
320        );
321    }
322}