spikard_core/validation/
mod.rs

1//! Request/response validation using JSON Schema
2
3pub mod error_mapper;
4
5use crate::debug_log_module;
6use jsonschema::Validator;
7use serde_json::Value;
8use std::sync::Arc;
9
10use self::error_mapper::{ErrorCondition, ErrorMapper};
11
12/// Schema validator that compiles and validates JSON Schema
13#[derive(Clone)]
14pub struct SchemaValidator {
15    compiled: Arc<Validator>,
16    schema: Value,
17}
18
19impl SchemaValidator {
20    /// Create a new validator from a JSON Schema
21    pub fn new(schema: Value) -> Result<Self, String> {
22        let compiled = jsonschema::options()
23            .with_draft(jsonschema::Draft::Draft202012)
24            .should_validate_formats(true)
25            .with_pattern_options(jsonschema::PatternOptions::regex())
26            .build(&schema)
27            .map_err(|e| {
28                anyhow::anyhow!("Invalid JSON Schema")
29                    .context(format!("Schema compilation failed: {}", e))
30                    .to_string()
31            })?;
32
33        Ok(Self {
34            compiled: Arc::new(compiled),
35            schema,
36        })
37    }
38
39    /// Get the underlying JSON Schema
40    pub fn schema(&self) -> &Value {
41        &self.schema
42    }
43
44    /// Pre-process data to convert file objects to strings for format: "binary" validation
45    ///
46    /// Files uploaded via multipart are converted to objects like:
47    /// {"filename": "...", "size": N, "content": "...", "content_type": "..."}
48    ///
49    /// But schemas define them as: {"type": "string", "format": "binary"}
50    ///
51    /// This method recursively processes the data and converts file objects to their content strings
52    /// so that validation passes, while preserving the original structure for handlers to use.
53    fn preprocess_binary_fields(&self, data: &Value) -> Value {
54        self.preprocess_value_with_schema(data, &self.schema)
55    }
56
57    #[allow(clippy::only_used_in_recursion)]
58    fn preprocess_value_with_schema(&self, data: &Value, schema: &Value) -> Value {
59        if let Some(schema_obj) = schema.as_object() {
60            let is_string_type = schema_obj.get("type").and_then(|t| t.as_str()) == Some("string");
61            let is_binary_format = schema_obj.get("format").and_then(|f| f.as_str()) == Some("binary");
62
63            #[allow(clippy::collapsible_if)]
64            if is_string_type && is_binary_format {
65                if let Some(data_obj) = data.as_object() {
66                    if data_obj.contains_key("filename")
67                        && data_obj.contains_key("content")
68                        && data_obj.contains_key("size")
69                        && data_obj.contains_key("content_type")
70                    {
71                        return data_obj.get("content").unwrap_or(&Value::Null).clone();
72                    }
73                }
74                return data.clone();
75            }
76
77            #[allow(clippy::collapsible_if)]
78            if schema_obj.get("type").and_then(|t| t.as_str()) == Some("array") {
79                if let Some(items_schema) = schema_obj.get("items") {
80                    if let Some(data_array) = data.as_array() {
81                        let processed_array: Vec<Value> = data_array
82                            .iter()
83                            .map(|item| self.preprocess_value_with_schema(item, items_schema))
84                            .collect();
85                        return Value::Array(processed_array);
86                    }
87                }
88            }
89
90            #[allow(clippy::collapsible_if)]
91            if schema_obj.get("type").and_then(|t| t.as_str()) == Some("object") {
92                if let Some(properties) = schema_obj.get("properties").and_then(|p| p.as_object()) {
93                    if let Some(data_obj) = data.as_object() {
94                        let mut processed_obj = serde_json::Map::new();
95                        for (key, value) in data_obj {
96                            if let Some(prop_schema) = properties.get(key) {
97                                processed_obj
98                                    .insert(key.clone(), self.preprocess_value_with_schema(value, prop_schema));
99                            } else {
100                                processed_obj.insert(key.clone(), value.clone());
101                            }
102                        }
103                        return Value::Object(processed_obj);
104                    }
105                }
106            }
107        }
108
109        data.clone()
110    }
111
112    /// Validate JSON data against the schema
113    pub fn validate(&self, data: &Value) -> Result<(), ValidationError> {
114        let processed_data = self.preprocess_binary_fields(data);
115
116        let validation_errors: Vec<_> = self.compiled.iter_errors(&processed_data).collect();
117
118        if validation_errors.is_empty() {
119            return Ok(());
120        }
121
122        let errors: Vec<ValidationErrorDetail> = validation_errors
123            .into_iter()
124            .map(|err| {
125                let instance_path = err.instance_path().to_string();
126                let schema_path_str = err.schema_path().as_str();
127                let error_msg = err.to_string();
128
129                let param_name = if schema_path_str.ends_with("/required") {
130                    let field_name = if let Some(start) = error_msg.find('"') {
131                        if let Some(end) = error_msg[start + 1..].find('"') {
132                            error_msg[start + 1..start + 1 + end].to_string()
133                        } else {
134                            "".to_string()
135                        }
136                    } else {
137                        "".to_string()
138                    };
139
140                    if !instance_path.is_empty() && instance_path.starts_with('/') && instance_path.len() > 1 {
141                        let base_path = &instance_path[1..];
142                        if !field_name.is_empty() {
143                            format!("{}/{}", base_path, field_name)
144                        } else {
145                            base_path.to_string()
146                        }
147                    } else if !field_name.is_empty() {
148                        field_name
149                    } else {
150                        "body".to_string()
151                    }
152                } else if schema_path_str.contains("/additionalProperties") {
153                    if let Some(start) = error_msg.find('(') {
154                        if let Some(quote_start) = error_msg[start..].find('\'') {
155                            let abs_start = start + quote_start + 1;
156                            if let Some(quote_end) = error_msg[abs_start..].find('\'') {
157                                let property_name = error_msg[abs_start..abs_start + quote_end].to_string();
158                                if !instance_path.is_empty()
159                                    && instance_path.starts_with('/')
160                                    && instance_path.len() > 1
161                                {
162                                    format!("{}/{}", &instance_path[1..], property_name)
163                                } else {
164                                    property_name
165                                }
166                            } else {
167                                instance_path[1..].to_string()
168                            }
169                        } else {
170                            instance_path[1..].to_string()
171                        }
172                    } else if instance_path.starts_with('/') && instance_path.len() > 1 {
173                        instance_path[1..].to_string()
174                    } else {
175                        "body".to_string()
176                    }
177                } else if instance_path.starts_with('/') && instance_path.len() > 1 {
178                    instance_path[1..].to_string()
179                } else if instance_path.is_empty() {
180                    "body".to_string()
181                } else {
182                    instance_path
183                };
184
185                let loc_parts: Vec<String> = if param_name.contains('/') {
186                    let mut parts = vec!["body".to_string()];
187                    parts.extend(param_name.split('/').map(|s| s.to_string()));
188                    parts
189                } else if param_name == "body" {
190                    vec!["body".to_string()]
191                } else {
192                    vec!["body".to_string(), param_name.clone()]
193                };
194
195                let input_value = if schema_path_str == "/required" {
196                    data.clone()
197                } else {
198                    err.instance().clone().into_owned()
199                };
200
201                let schema_prop_path = if param_name.contains('/') {
202                    format!("/properties/{}", param_name.replace('/', "/properties/"))
203                } else {
204                    format!("/properties/{}", param_name)
205                };
206
207                // Use table-driven error mapping
208                let mut error_condition = ErrorCondition::from_schema_error(schema_path_str, &error_msg);
209
210                // Enrich condition with extracted values from schema
211                error_condition = match error_condition {
212                    ErrorCondition::TypeMismatch { .. } => {
213                        let expected_type = self
214                            .schema
215                            .pointer(&format!("{}/type", schema_prop_path))
216                            .and_then(|v| v.as_str())
217                            .unwrap_or("unknown")
218                            .to_string();
219                        ErrorCondition::TypeMismatch { expected_type }
220                    }
221                    ErrorCondition::AdditionalProperties { .. } => {
222                        let unexpected_field = if param_name.contains('/') {
223                            param_name.split('/').next_back().unwrap_or(&param_name).to_string()
224                        } else {
225                            param_name.clone()
226                        };
227                        ErrorCondition::AdditionalProperties {
228                            field: unexpected_field,
229                        }
230                    }
231                    other => other,
232                };
233
234                let (error_type, msg, ctx) =
235                    ErrorMapper::map_error(&error_condition, &self.schema, &schema_prop_path, &error_msg);
236
237                ValidationErrorDetail {
238                    error_type,
239                    loc: loc_parts,
240                    msg,
241                    input: input_value,
242                    ctx,
243                }
244            })
245            .collect();
246
247        debug_log_module!("validation", "Returning {} validation errors", errors.len());
248        for (i, error) in errors.iter().enumerate() {
249            debug_log_module!(
250                "validation",
251                "  Error {}: type={}, loc={:?}, msg={}, input={}, ctx={:?}",
252                i,
253                error.error_type,
254                error.loc,
255                error.msg,
256                error.input,
257                error.ctx
258            );
259        }
260        #[allow(clippy::collapsible_if)]
261        if crate::debug::is_enabled() {
262            if let Ok(json_errors) = serde_json::to_value(&errors) {
263                if let Ok(json_str) = serde_json::to_string_pretty(&json_errors) {
264                    debug_log_module!("validation", "Serialized errors:\n{}", json_str);
265                }
266            }
267        }
268
269        Err(ValidationError { errors })
270    }
271
272    /// Validate and parse JSON bytes
273    pub fn validate_json(&self, json_bytes: &[u8]) -> Result<Value, ValidationError> {
274        let value: Value = serde_json::from_slice(json_bytes).map_err(|e| ValidationError {
275            errors: vec![ValidationErrorDetail {
276                error_type: "json_parse_error".to_string(),
277                loc: vec!["body".to_string()],
278                msg: format!("Invalid JSON: {}", e),
279                input: Value::Null,
280                ctx: None,
281            }],
282        })?;
283
284        self.validate(&value)?;
285
286        Ok(value)
287    }
288}
289
290/// Validation error containing one or more validation failures
291#[derive(Debug, Clone)]
292pub struct ValidationError {
293    pub errors: Vec<ValidationErrorDetail>,
294}
295
296/// Individual validation error detail (FastAPI-compatible format)
297#[derive(Debug, Clone, serde::Serialize)]
298pub struct ValidationErrorDetail {
299    #[serde(rename = "type")]
300    pub error_type: String,
301    pub loc: Vec<String>,
302    pub msg: String,
303    pub input: Value,
304    #[serde(skip_serializing_if = "Option::is_none")]
305    pub ctx: Option<Value>,
306}
307
308impl std::fmt::Display for ValidationError {
309    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
310        write!(f, "Validation failed: {} errors", self.errors.len())
311    }
312}
313
314impl std::error::Error for ValidationError {}
315
316#[cfg(test)]
317mod tests {
318    use super::*;
319    use serde_json::json;
320
321    #[test]
322    fn test_validator_creation() {
323        let schema = json!({
324            "type": "object",
325            "properties": {
326                "name": {"type": "string"},
327                "age": {"type": "integer"}
328            },
329            "required": ["name"]
330        });
331
332        let validator = SchemaValidator::new(schema).unwrap();
333        assert!(validator.compiled.is_valid(&json!({"name": "Alice", "age": 30})));
334    }
335
336    #[test]
337    fn test_validation_success() {
338        let schema = json!({
339            "type": "object",
340            "properties": {
341                "email": {"type": "string", "format": "email"}
342            }
343        });
344
345        let validator = SchemaValidator::new(schema).unwrap();
346        let data = json!({"email": "test@example.com"});
347
348        assert!(validator.validate(&data).is_ok());
349    }
350
351    #[test]
352    fn test_validation_failure() {
353        let schema = json!({
354            "type": "object",
355            "properties": {
356                "age": {"type": "integer", "minimum": 0}
357            },
358            "required": ["age"]
359        });
360
361        let validator = SchemaValidator::new(schema).unwrap();
362        let data = json!({"age": -5});
363
364        assert!(validator.validate(&data).is_err());
365    }
366
367    #[test]
368    fn test_validation_error_serialization() {
369        let schema = json!({
370            "type": "object",
371            "properties": {
372                "name": {
373                    "type": "string",
374                    "maxLength": 10
375                }
376            },
377            "required": ["name"]
378        });
379
380        let validator = SchemaValidator::new(schema).unwrap();
381        let data = json!({"name": "this_is_way_too_long"});
382
383        let result = validator.validate(&data);
384        assert!(result.is_err());
385
386        let err = result.unwrap_err();
387        assert_eq!(err.errors.len(), 1);
388
389        let error_detail = &err.errors[0];
390        assert_eq!(error_detail.error_type, "string_too_long");
391        assert_eq!(error_detail.loc, vec!["body", "name"]);
392        assert_eq!(error_detail.msg, "String should have at most 10 characters");
393        assert_eq!(error_detail.input, Value::String("this_is_way_too_long".to_string()));
394        assert_eq!(error_detail.ctx, Some(json!({"max_length": 10})));
395
396        let json_output = serde_json::to_value(&err.errors).unwrap();
397        println!(
398            "Serialized JSON: {}",
399            serde_json::to_string_pretty(&json_output).unwrap()
400        );
401
402        let serialized_error = &json_output[0];
403        assert!(serialized_error.get("type").is_some());
404        assert!(serialized_error.get("loc").is_some());
405        assert!(serialized_error.get("msg").is_some());
406        assert!(
407            serialized_error.get("input").is_some(),
408            "Missing 'input' field in serialized JSON!"
409        );
410        assert!(
411            serialized_error.get("ctx").is_some(),
412            "Missing 'ctx' field in serialized JSON!"
413        );
414
415        assert_eq!(
416            serialized_error["input"],
417            Value::String("this_is_way_too_long".to_string())
418        );
419        assert_eq!(serialized_error["ctx"], json!({"max_length": 10}));
420    }
421
422    #[test]
423    fn test_exclusive_minimum() {
424        let schema = json!({
425            "$schema": "https://json-schema.org/draft/2020-12/schema",
426            "type": "object",
427            "required": ["id", "name", "price"],
428            "properties": {
429                "id": {
430                    "type": "integer"
431                },
432                "name": {
433                    "type": "string",
434                    "minLength": 3
435                },
436                "price": {
437                    "type": "number",
438                    "exclusiveMinimum": 0
439                }
440            }
441        });
442
443        let validator = SchemaValidator::new(schema).unwrap();
444
445        let data = json!({
446            "id": 1,
447            "name": "X",
448            "price": -10
449        });
450
451        let result = validator.validate(&data);
452        eprintln!("Validation result: {:?}", result);
453
454        assert!(result.is_err(), "Should have validation errors");
455        let err = result.unwrap_err();
456        eprintln!("Errors: {:?}", err.errors);
457        assert_eq!(err.errors.len(), 2, "Should have 2 errors");
458    }
459}