1use jsonschema::{Draft, JSONSchema};
7use serde_json::Value;
8use std::collections::HashMap;
9use std::sync::Arc;
10
11use crate::error::{MplError, Result, SchemaError};
12use crate::qom::QomMetrics;
13
14pub const DEFAULT_MAX_PAYLOAD_SIZE: usize = 1024 * 1024;
16
17pub const DEFAULT_MAX_SCHEMAS: usize = 1000;
19
20pub const DEFAULT_MAX_NESTING_DEPTH: usize = 50;
22
23pub struct SchemaValidator {
25 schemas: HashMap<String, Arc<JSONSchema>>,
27 max_payload_size: usize,
29 max_schemas: usize,
31}
32
33impl SchemaValidator {
34 pub fn new() -> Self {
36 Self {
37 schemas: HashMap::new(),
38 max_payload_size: DEFAULT_MAX_PAYLOAD_SIZE,
39 max_schemas: DEFAULT_MAX_SCHEMAS,
40 }
41 }
42
43 pub fn with_limits(max_payload_size: usize, max_schemas: usize) -> Self {
45 Self {
46 schemas: HashMap::new(),
47 max_payload_size,
48 max_schemas,
49 }
50 }
51
52 pub fn register(&mut self, stype: &str, schema: Value) -> Result<()> {
54 if !self.schemas.contains_key(stype) && self.schemas.len() >= self.max_schemas {
56 return Err(MplError::Validation(format!(
57 "Schema cache limit reached ({}). Cannot register schema for {}",
58 self.max_schemas, stype
59 )));
60 }
61
62 let compiled = JSONSchema::options()
63 .with_draft(Draft::Draft7)
64 .compile(&schema)
65 .map_err(|e| MplError::Validation(format!("Invalid schema for {}: {}", stype, e)))?;
66
67 self.schemas.insert(stype.to_string(), Arc::new(compiled));
68 Ok(())
69 }
70
71 pub fn schema_count(&self) -> usize {
73 self.schemas.len()
74 }
75
76 pub fn max_payload_size(&self) -> usize {
78 self.max_payload_size
79 }
80
81 pub fn register_json(&mut self, stype: &str, schema_json: &str) -> Result<()> {
83 let schema: Value = serde_json::from_str(schema_json)?;
84 self.register(stype, schema)
85 }
86
87 pub fn has_schema(&self, stype: &str) -> bool {
89 self.schemas.contains_key(stype)
90 }
91
92 pub fn validate(&self, stype: &str, payload: &Value) -> Result<ValidationResult> {
94 let payload_size = estimate_json_size(payload);
96 if payload_size > self.max_payload_size {
97 return Err(MplError::Validation(format!(
98 "Payload size ({} bytes) exceeds maximum ({} bytes) for SType {}",
99 payload_size, self.max_payload_size, stype
100 )));
101 }
102
103 let schema = self.schemas.get(stype).ok_or_else(|| MplError::UnknownStype {
104 stype: stype.to_string(),
105 suggestions: self.suggest_similar(stype),
106 })?;
107
108 let result = schema.validate(payload);
109
110 match result {
111 Ok(_) => Ok(ValidationResult::valid()),
112 Err(errors) => {
113 let schema_errors: Vec<SchemaError> = errors
114 .map(|e| SchemaError {
115 path: e.instance_path.to_string(),
116 message: e.to_string(),
117 expected: None,
118 actual: None,
119 })
120 .collect();
121
122 Ok(ValidationResult::invalid(schema_errors))
123 }
124 }
125 }
126
127 pub fn validate_qom(&self, stype: &str, payload: &Value) -> Result<QomMetrics> {
129 let result = self.validate(stype, payload)?;
130 Ok(QomMetrics {
131 schema_fidelity: if result.valid { 1.0 } else { 0.0 },
132 ..Default::default()
133 })
134 }
135
136 pub fn validate_or_error(&self, stype: &str, payload: &Value) -> Result<()> {
138 let result = self.validate(stype, payload)?;
139
140 if result.valid {
141 Ok(())
142 } else {
143 Err(MplError::SchemaFidelity {
144 message: format!("Payload does not conform to {}", stype),
145 stype: stype.to_string(),
146 errors: result.errors,
147 hints: vec![
148 "Check required fields are present".to_string(),
149 "Verify field types match schema".to_string(),
150 ],
151 })
152 }
153 }
154
155 fn suggest_similar(&self, stype: &str) -> Vec<String> {
157 self.schemas
158 .keys()
159 .filter(|k| {
160 k.ends_with(stype.split('.').last().unwrap_or(""))
162 || k.starts_with(stype.split('.').next().unwrap_or(""))
163 })
164 .take(3)
165 .cloned()
166 .collect()
167 }
168
169 pub fn registered_stypes(&self) -> Vec<&str> {
171 self.schemas.keys().map(|s| s.as_str()).collect()
172 }
173}
174
175impl Default for SchemaValidator {
176 fn default() -> Self {
177 Self::new()
178 }
179}
180
181#[derive(Debug, Clone)]
183pub struct ValidationResult {
184 pub valid: bool,
186 pub errors: Vec<SchemaError>,
188}
189
190impl ValidationResult {
191 pub fn valid() -> Self {
193 Self {
194 valid: true,
195 errors: Vec::new(),
196 }
197 }
198
199 pub fn invalid(errors: Vec<SchemaError>) -> Self {
201 Self {
202 valid: false,
203 errors,
204 }
205 }
206
207 pub fn to_qom_metrics(&self) -> QomMetrics {
209 QomMetrics {
210 schema_fidelity: if self.valid { 1.0 } else { 0.0 },
211 ..Default::default()
212 }
213 }
214}
215
216pub struct ValidatorBuilder {
218 validator: SchemaValidator,
219}
220
221impl ValidatorBuilder {
222 pub fn new() -> Self {
223 Self {
224 validator: SchemaValidator::new(),
225 }
226 }
227
228 pub fn with_schema(mut self, stype: &str, schema: Value) -> Result<Self> {
230 self.validator.register(stype, schema)?;
231 Ok(self)
232 }
233
234 pub fn with_schema_json(mut self, stype: &str, schema_json: &str) -> Result<Self> {
236 self.validator.register_json(stype, schema_json)?;
237 Ok(self)
238 }
239
240 pub fn build(self) -> SchemaValidator {
242 self.validator
243 }
244}
245
246impl Default for ValidatorBuilder {
247 fn default() -> Self {
248 Self::new()
249 }
250}
251
252fn estimate_json_size(value: &Value) -> usize {
254 match value {
255 Value::Null => 4, Value::Bool(b) => if *b { 4 } else { 5 }, Value::Number(n) => n.to_string().len(),
258 Value::String(s) => s.len() + 2, Value::Array(arr) => {
260 arr.iter().map(estimate_json_size).sum::<usize>() + arr.len() + 2 }
262 Value::Object(obj) => {
263 obj.iter()
264 .map(|(k, v)| k.len() + 3 + estimate_json_size(v)) .sum::<usize>()
266 + obj.len()
267 + 2 }
269 }
270}
271
272pub fn check_nesting_depth(value: &Value, max_depth: usize) -> Result<()> {
274 fn check_depth(value: &Value, current: usize, max: usize) -> bool {
275 if current > max {
276 return false;
277 }
278 match value {
279 Value::Array(arr) => arr.iter().all(|v| check_depth(v, current + 1, max)),
280 Value::Object(obj) => obj.values().all(|v| check_depth(v, current + 1, max)),
281 _ => true,
282 }
283 }
284
285 if check_depth(value, 0, max_depth) {
286 Ok(())
287 } else {
288 Err(MplError::Validation(format!(
289 "JSON nesting depth exceeds maximum of {}",
290 max_depth
291 )))
292 }
293}
294
295#[cfg(test)]
296mod tests {
297 use super::*;
298 use serde_json::json;
299
300 fn sample_schema() -> Value {
301 json!({
302 "$schema": "http://json-schema.org/draft-07/schema#",
303 "type": "object",
304 "properties": {
305 "title": {"type": "string"},
306 "start": {"type": "string", "format": "date-time"},
307 "end": {"type": "string", "format": "date-time"}
308 },
309 "required": ["title", "start", "end"],
310 "additionalProperties": false
311 })
312 }
313
314 #[test]
315 fn test_register_and_validate() {
316 let mut validator = SchemaValidator::new();
317 validator
318 .register("org.calendar.Event.v1", sample_schema())
319 .unwrap();
320
321 let valid_payload = json!({
322 "title": "Meeting",
323 "start": "2025-01-01T10:00:00Z",
324 "end": "2025-01-01T11:00:00Z"
325 });
326
327 let result = validator
328 .validate("org.calendar.Event.v1", &valid_payload)
329 .unwrap();
330 assert!(result.valid);
331 }
332
333 #[test]
334 fn test_invalid_payload() {
335 let mut validator = SchemaValidator::new();
336 validator
337 .register("org.calendar.Event.v1", sample_schema())
338 .unwrap();
339
340 let invalid_payload = json!({
341 "title": "Meeting"
342 });
344
345 let result = validator
346 .validate("org.calendar.Event.v1", &invalid_payload)
347 .unwrap();
348 assert!(!result.valid);
349 assert!(!result.errors.is_empty());
350 }
351
352 #[test]
353 fn test_unknown_stype() {
354 let validator = SchemaValidator::new();
355 let result = validator.validate("unknown.Type.v1", &json!({}));
356 assert!(result.is_err());
357 assert!(matches!(result.unwrap_err(), MplError::UnknownStype { .. }));
358 }
359
360 #[test]
361 fn test_qom_metrics() {
362 let mut validator = SchemaValidator::new();
363 validator
364 .register("org.test.Test.v1", json!({"type": "object"}))
365 .unwrap();
366
367 let metrics = validator
368 .validate_qom("org.test.Test.v1", &json!({}))
369 .unwrap();
370 assert_eq!(metrics.schema_fidelity, 1.0);
371 }
372
373 #[test]
374 fn test_builder() {
375 let validator = ValidatorBuilder::new()
376 .with_schema("org.test.Test.v1", json!({"type": "object"}))
377 .unwrap()
378 .build();
379
380 assert!(validator.has_schema("org.test.Test.v1"));
381 }
382}