mpl_core/
validation.rs

1//! Schema Validation
2//!
3//! JSON Schema validation for SType payloads.
4//! Schema Fidelity is the mandatory QoM metric.
5
6use jsonschema::{Draft, JSONSchema};
7use serde_json::Value;
8use std::collections::HashMap;
9use std::sync::Arc;
10
11use crate::error::{MplError, Result, SchemaError};
12use crate::qom::QomMetrics;
13
14/// Default maximum payload size (1 MB)
15pub const DEFAULT_MAX_PAYLOAD_SIZE: usize = 1024 * 1024;
16
17/// Default maximum schema cache size
18pub const DEFAULT_MAX_SCHEMAS: usize = 1000;
19
20/// Default maximum nesting depth for JSON payloads
21pub const DEFAULT_MAX_NESTING_DEPTH: usize = 50;
22
23/// Schema validator with caching for performance
24pub struct SchemaValidator {
25    /// Cached compiled schemas
26    schemas: HashMap<String, Arc<JSONSchema>>,
27    /// Maximum payload size in bytes
28    max_payload_size: usize,
29    /// Maximum number of cached schemas
30    max_schemas: usize,
31}
32
33impl SchemaValidator {
34    /// Create a new validator with default limits
35    pub fn new() -> Self {
36        Self {
37            schemas: HashMap::new(),
38            max_payload_size: DEFAULT_MAX_PAYLOAD_SIZE,
39            max_schemas: DEFAULT_MAX_SCHEMAS,
40        }
41    }
42
43    /// Create a validator with custom limits
44    pub fn with_limits(max_payload_size: usize, max_schemas: usize) -> Self {
45        Self {
46            schemas: HashMap::new(),
47            max_payload_size,
48            max_schemas,
49        }
50    }
51
52    /// Register a schema for an SType
53    pub fn register(&mut self, stype: &str, schema: Value) -> Result<()> {
54        // Enforce schema cache limit (skip if already registered)
55        if !self.schemas.contains_key(stype) && self.schemas.len() >= self.max_schemas {
56            return Err(MplError::Validation(format!(
57                "Schema cache limit reached ({}). Cannot register schema for {}",
58                self.max_schemas, stype
59            )));
60        }
61
62        let compiled = JSONSchema::options()
63            .with_draft(Draft::Draft7)
64            .compile(&schema)
65            .map_err(|e| MplError::Validation(format!("Invalid schema for {}: {}", stype, e)))?;
66
67        self.schemas.insert(stype.to_string(), Arc::new(compiled));
68        Ok(())
69    }
70
71    /// Get current number of registered schemas
72    pub fn schema_count(&self) -> usize {
73        self.schemas.len()
74    }
75
76    /// Get maximum payload size limit
77    pub fn max_payload_size(&self) -> usize {
78        self.max_payload_size
79    }
80
81    /// Register a schema from a JSON string
82    pub fn register_json(&mut self, stype: &str, schema_json: &str) -> Result<()> {
83        let schema: Value = serde_json::from_str(schema_json)?;
84        self.register(stype, schema)
85    }
86
87    /// Check if a schema is registered
88    pub fn has_schema(&self, stype: &str) -> bool {
89        self.schemas.contains_key(stype)
90    }
91
92    /// Validate a payload against its declared SType
93    pub fn validate(&self, stype: &str, payload: &Value) -> Result<ValidationResult> {
94        // Check payload size (estimate based on JSON serialization)
95        let payload_size = estimate_json_size(payload);
96        if payload_size > self.max_payload_size {
97            return Err(MplError::Validation(format!(
98                "Payload size ({} bytes) exceeds maximum ({} bytes) for SType {}",
99                payload_size, self.max_payload_size, stype
100            )));
101        }
102
103        let schema = self.schemas.get(stype).ok_or_else(|| MplError::UnknownStype {
104            stype: stype.to_string(),
105            suggestions: self.suggest_similar(stype),
106        })?;
107
108        let result = schema.validate(payload);
109
110        match result {
111            Ok(_) => Ok(ValidationResult::valid()),
112            Err(errors) => {
113                let schema_errors: Vec<SchemaError> = errors
114                    .map(|e| SchemaError {
115                        path: e.instance_path.to_string(),
116                        message: e.to_string(),
117                        expected: None,
118                        actual: None,
119                    })
120                    .collect();
121
122                Ok(ValidationResult::invalid(schema_errors))
123            }
124        }
125    }
126
127    /// Validate and return QoM metrics
128    pub fn validate_qom(&self, stype: &str, payload: &Value) -> Result<QomMetrics> {
129        let result = self.validate(stype, payload)?;
130        Ok(QomMetrics {
131            schema_fidelity: if result.valid { 1.0 } else { 0.0 },
132            ..Default::default()
133        })
134    }
135
136    /// Validate and return an MplError if invalid
137    pub fn validate_or_error(&self, stype: &str, payload: &Value) -> Result<()> {
138        let result = self.validate(stype, payload)?;
139
140        if result.valid {
141            Ok(())
142        } else {
143            Err(MplError::SchemaFidelity {
144                message: format!("Payload does not conform to {}", stype),
145                stype: stype.to_string(),
146                errors: result.errors,
147                hints: vec![
148                    "Check required fields are present".to_string(),
149                    "Verify field types match schema".to_string(),
150                ],
151            })
152        }
153    }
154
155    /// Suggest similar STypes for typo correction
156    fn suggest_similar(&self, stype: &str) -> Vec<String> {
157        self.schemas
158            .keys()
159            .filter(|k| {
160                // Simple similarity: same suffix or prefix
161                k.ends_with(stype.split('.').last().unwrap_or(""))
162                    || k.starts_with(stype.split('.').next().unwrap_or(""))
163            })
164            .take(3)
165            .cloned()
166            .collect()
167    }
168
169    /// Get all registered STypes
170    pub fn registered_stypes(&self) -> Vec<&str> {
171        self.schemas.keys().map(|s| s.as_str()).collect()
172    }
173}
174
175impl Default for SchemaValidator {
176    fn default() -> Self {
177        Self::new()
178    }
179}
180
181/// Result of schema validation
182#[derive(Debug, Clone)]
183pub struct ValidationResult {
184    /// Whether validation passed
185    pub valid: bool,
186    /// Validation errors (empty if valid)
187    pub errors: Vec<SchemaError>,
188}
189
190impl ValidationResult {
191    /// Create a valid result
192    pub fn valid() -> Self {
193        Self {
194            valid: true,
195            errors: Vec::new(),
196        }
197    }
198
199    /// Create an invalid result
200    pub fn invalid(errors: Vec<SchemaError>) -> Self {
201        Self {
202            valid: false,
203            errors,
204        }
205    }
206
207    /// Convert to QoM metrics
208    pub fn to_qom_metrics(&self) -> QomMetrics {
209        QomMetrics {
210            schema_fidelity: if self.valid { 1.0 } else { 0.0 },
211            ..Default::default()
212        }
213    }
214}
215
216/// Builder for creating validators with common schemas
217pub struct ValidatorBuilder {
218    validator: SchemaValidator,
219}
220
221impl ValidatorBuilder {
222    pub fn new() -> Self {
223        Self {
224            validator: SchemaValidator::new(),
225        }
226    }
227
228    /// Add a schema
229    pub fn with_schema(mut self, stype: &str, schema: Value) -> Result<Self> {
230        self.validator.register(stype, schema)?;
231        Ok(self)
232    }
233
234    /// Add a schema from JSON string
235    pub fn with_schema_json(mut self, stype: &str, schema_json: &str) -> Result<Self> {
236        self.validator.register_json(stype, schema_json)?;
237        Ok(self)
238    }
239
240    /// Build the validator
241    pub fn build(self) -> SchemaValidator {
242        self.validator
243    }
244}
245
246impl Default for ValidatorBuilder {
247    fn default() -> Self {
248        Self::new()
249    }
250}
251
252/// Estimate the size of a JSON value in bytes (rough approximation)
253fn estimate_json_size(value: &Value) -> usize {
254    match value {
255        Value::Null => 4, // "null"
256        Value::Bool(b) => if *b { 4 } else { 5 }, // "true" or "false"
257        Value::Number(n) => n.to_string().len(),
258        Value::String(s) => s.len() + 2, // quotes
259        Value::Array(arr) => {
260            arr.iter().map(estimate_json_size).sum::<usize>() + arr.len() + 2 // commas + brackets
261        }
262        Value::Object(obj) => {
263            obj.iter()
264                .map(|(k, v)| k.len() + 3 + estimate_json_size(v)) // key + quotes + colon
265                .sum::<usize>()
266                + obj.len()
267                + 2 // commas + braces
268        }
269    }
270}
271
272/// Check JSON nesting depth (to prevent stack overflow attacks)
273pub fn check_nesting_depth(value: &Value, max_depth: usize) -> Result<()> {
274    fn check_depth(value: &Value, current: usize, max: usize) -> bool {
275        if current > max {
276            return false;
277        }
278        match value {
279            Value::Array(arr) => arr.iter().all(|v| check_depth(v, current + 1, max)),
280            Value::Object(obj) => obj.values().all(|v| check_depth(v, current + 1, max)),
281            _ => true,
282        }
283    }
284
285    if check_depth(value, 0, max_depth) {
286        Ok(())
287    } else {
288        Err(MplError::Validation(format!(
289            "JSON nesting depth exceeds maximum of {}",
290            max_depth
291        )))
292    }
293}
294
295#[cfg(test)]
296mod tests {
297    use super::*;
298    use serde_json::json;
299
300    fn sample_schema() -> Value {
301        json!({
302            "$schema": "http://json-schema.org/draft-07/schema#",
303            "type": "object",
304            "properties": {
305                "title": {"type": "string"},
306                "start": {"type": "string", "format": "date-time"},
307                "end": {"type": "string", "format": "date-time"}
308            },
309            "required": ["title", "start", "end"],
310            "additionalProperties": false
311        })
312    }
313
314    #[test]
315    fn test_register_and_validate() {
316        let mut validator = SchemaValidator::new();
317        validator
318            .register("org.calendar.Event.v1", sample_schema())
319            .unwrap();
320
321        let valid_payload = json!({
322            "title": "Meeting",
323            "start": "2025-01-01T10:00:00Z",
324            "end": "2025-01-01T11:00:00Z"
325        });
326
327        let result = validator
328            .validate("org.calendar.Event.v1", &valid_payload)
329            .unwrap();
330        assert!(result.valid);
331    }
332
333    #[test]
334    fn test_invalid_payload() {
335        let mut validator = SchemaValidator::new();
336        validator
337            .register("org.calendar.Event.v1", sample_schema())
338            .unwrap();
339
340        let invalid_payload = json!({
341            "title": "Meeting"
342            // missing start and end
343        });
344
345        let result = validator
346            .validate("org.calendar.Event.v1", &invalid_payload)
347            .unwrap();
348        assert!(!result.valid);
349        assert!(!result.errors.is_empty());
350    }
351
352    #[test]
353    fn test_unknown_stype() {
354        let validator = SchemaValidator::new();
355        let result = validator.validate("unknown.Type.v1", &json!({}));
356        assert!(result.is_err());
357        assert!(matches!(result.unwrap_err(), MplError::UnknownStype { .. }));
358    }
359
360    #[test]
361    fn test_qom_metrics() {
362        let mut validator = SchemaValidator::new();
363        validator
364            .register("org.test.Test.v1", json!({"type": "object"}))
365            .unwrap();
366
367        let metrics = validator
368            .validate_qom("org.test.Test.v1", &json!({}))
369            .unwrap();
370        assert_eq!(metrics.schema_fidelity, 1.0);
371    }
372
373    #[test]
374    fn test_builder() {
375        let validator = ValidatorBuilder::new()
376            .with_schema("org.test.Test.v1", json!({"type": "object"}))
377            .unwrap()
378            .build();
379
380        assert!(validator.has_schema("org.test.Test.v1"));
381    }
382}