1use super::*;
4use serde_json::Value as JsonValue;
5use regex::Regex;
6use once_cell::sync::Lazy;
7
8#[derive(Debug, Clone)]
10pub struct SchemaValidator {
11 schema: JsonSchema,
13 config: ValidationConfig,
15}
16
17#[derive(Debug, Clone)]
19pub struct ValidationConfig {
20 pub strict_mode: bool,
22 pub validate_formats: bool,
24 pub max_depth: usize,
26 pub allow_additional_properties: bool,
28}
29
30#[derive(Debug, Clone)]
32pub struct ValidationResult {
33 pub valid: bool,
35 pub errors: Vec<ValidationError>,
37 pub warnings: Vec<ValidationWarning>,
39 pub metadata: ValidationMetadata,
41}
42
43#[derive(Debug, Clone)]
45pub struct ValidationError {
46 pub code: String,
48 pub message: String,
50 pub instance_path: String,
52 pub schema_path: String,
54 pub invalid_value: Option<JsonValue>,
56}
57
58#[derive(Debug, Clone)]
60pub struct ValidationWarning {
61 pub code: String,
63 pub message: String,
65 pub instance_path: String,
67 pub suggestion: Option<String>,
69}
70
71#[derive(Debug, Clone)]
73pub struct ValidationMetadata {
74 pub properties_validated: usize,
76 pub validation_time: std::time::Duration,
78 pub schema_complexity: f64,
80 pub data_complexity: f64,
82}
83
84static ISRC_PATTERN: Lazy<Regex> = Lazy::new(|| {
86 Regex::new(r"^[A-Z]{2}[A-Z0-9]{3}\d{7}$").unwrap()
87});
88
89static UPC_PATTERN: Lazy<Regex> = Lazy::new(|| {
90 Regex::new(r"^\d{12}$").unwrap()
91});
92
93static LANGUAGE_CODE_PATTERN: Lazy<Regex> = Lazy::new(|| {
94 Regex::new(r"^[a-z]{2}(-[A-Z]{2})?$").unwrap()
95});
96
97static TERRITORY_CODE_PATTERN: Lazy<Regex> = Lazy::new(|| {
98 Regex::new(r"^[A-Z]{2}|Worldwide$").unwrap()
99});
100
101static DURATION_PATTERN: Lazy<Regex> = Lazy::new(|| {
102 Regex::new(r"^PT(?:\d+H)?(?:\d+M)?(?:\d+(?:\.\d+)?S)?$").unwrap()
103});
104
105impl Default for ValidationConfig {
106 fn default() -> Self {
107 Self {
108 strict_mode: false,
109 validate_formats: true,
110 max_depth: 100,
111 allow_additional_properties: false,
112 }
113 }
114}
115
116impl SchemaValidator {
117 pub fn new(schema: JsonSchema) -> Self {
119 Self {
120 schema,
121 config: ValidationConfig::default(),
122 }
123 }
124
125 pub fn with_config(schema: JsonSchema, config: ValidationConfig) -> Self {
127 Self { schema, config }
128 }
129
130 pub fn validate(&self, data: &JsonValue) -> ValidationResult {
132 let start_time = std::time::Instant::now();
133 let mut errors = Vec::new();
134 let mut warnings = Vec::new();
135 let mut properties_validated = 0;
136
137 self.validate_recursive(
138 data,
139 &self.schema,
140 "",
141 "",
142 0,
143 &mut errors,
144 &mut warnings,
145 &mut properties_validated
146 );
147
148 let validation_time = start_time.elapsed();
149 let valid = errors.is_empty() && (!self.config.strict_mode || warnings.is_empty());
150
151 ValidationResult {
152 valid,
153 errors,
154 warnings,
155 metadata: ValidationMetadata {
156 properties_validated,
157 validation_time,
158 schema_complexity: self.calculate_schema_complexity(&self.schema),
159 data_complexity: self.calculate_data_complexity(data),
160 },
161 }
162 }
163
164 pub fn validate_with_paths(&self, data: &JsonValue) -> ValidationResult {
166 self.validate(data)
168 }
169
170 fn validate_recursive(
173 &self,
174 data: &JsonValue,
175 schema: &JsonSchema,
176 instance_path: &str,
177 schema_path: &str,
178 depth: usize,
179 errors: &mut Vec<ValidationError>,
180 warnings: &mut Vec<ValidationWarning>,
181 properties_validated: &mut usize,
182 ) {
183 if depth > self.config.max_depth {
184 errors.push(ValidationError {
185 code: "MAX_DEPTH_EXCEEDED".to_string(),
186 message: format!("Maximum validation depth {} exceeded", self.config.max_depth),
187 instance_path: instance_path.to_string(),
188 schema_path: schema_path.to_string(),
189 invalid_value: Some(data.clone()),
190 });
191 return;
192 }
193
194 *properties_validated += 1;
195
196 if let Some(ref reference) = schema.reference {
198 if let Some(resolved_schema) = self.resolve_reference(reference) {
199 return self.validate_recursive(
200 data, &resolved_schema, instance_path,
201 &format!("{}/{}", schema_path, reference),
202 depth + 1, errors, warnings, properties_validated
203 );
204 } else {
205 errors.push(ValidationError {
206 code: "UNRESOLVED_REFERENCE".to_string(),
207 message: format!("Cannot resolve schema reference: {}", reference),
208 instance_path: instance_path.to_string(),
209 schema_path: schema_path.to_string(),
210 invalid_value: None,
211 });
212 return;
213 }
214 }
215
216 if let Some(ref expected_type) = schema.schema_type {
218 self.validate_type(data, expected_type, instance_path, schema_path, errors);
219 }
220
221 match data {
222 JsonValue::Object(obj) => {
223 self.validate_object(obj, schema, instance_path, schema_path, depth, errors, warnings, properties_validated);
224 },
225 JsonValue::Array(arr) => {
226 self.validate_array(arr, schema, instance_path, schema_path, depth, errors, warnings, properties_validated);
227 },
228 JsonValue::String(s) => {
229 self.validate_string(s, schema, instance_path, schema_path, errors, warnings);
230 },
231 JsonValue::Number(_) => {
232 self.validate_number(data, schema, instance_path, schema_path, errors, warnings);
233 },
234 _ => {}
235 }
236
237 if let Some(ref enum_values) = schema.enum_values {
239 if !enum_values.contains(data) {
240 errors.push(ValidationError {
241 code: "ENUM_VIOLATION".to_string(),
242 message: format!("Value must be one of: {}",
243 enum_values.iter().map(|v| v.to_string()).collect::<Vec<_>>().join(", ")),
244 instance_path: instance_path.to_string(),
245 schema_path: format!("{}/enum", schema_path),
246 invalid_value: Some(data.clone()),
247 });
248 }
249 }
250
251 self.validate_conditionals(data, schema, instance_path, schema_path, depth, errors, warnings, properties_validated);
253 }
254
255 fn validate_object(
256 &self,
257 obj: &serde_json::Map<String, JsonValue>,
258 schema: &JsonSchema,
259 instance_path: &str,
260 schema_path: &str,
261 depth: usize,
262 errors: &mut Vec<ValidationError>,
263 warnings: &mut Vec<ValidationWarning>,
264 properties_validated: &mut usize,
265 ) {
266 if let Some(ref required) = schema.required {
268 for required_prop in required {
269 if !obj.contains_key(required_prop) {
270 errors.push(ValidationError {
271 code: "REQUIRED_PROPERTY_MISSING".to_string(),
272 message: format!("Required property '{}' is missing", required_prop),
273 instance_path: instance_path.to_string(),
274 schema_path: format!("{}/required", schema_path),
275 invalid_value: None,
276 });
277 }
278 }
279 }
280
281 if let Some(ref properties) = schema.properties {
283 for (prop_name, prop_value) in obj {
284 let new_instance_path = if instance_path.is_empty() {
285 prop_name.clone()
286 } else {
287 format!("{}/{}", instance_path, prop_name)
288 };
289
290 if let Some(prop_schema) = properties.get(prop_name) {
291 self.validate_recursive(
292 prop_value, prop_schema, &new_instance_path,
293 &format!("{}/properties/{}", schema_path, prop_name),
294 depth + 1, errors, warnings, properties_validated
295 );
296 } else if !self.config.allow_additional_properties &&
297 schema.additional_properties.unwrap_or(true) == false {
298 errors.push(ValidationError {
299 code: "ADDITIONAL_PROPERTY_NOT_ALLOWED".to_string(),
300 message: format!("Additional property '{}' is not allowed", prop_name),
301 instance_path: new_instance_path,
302 schema_path: format!("{}/additionalProperties", schema_path),
303 invalid_value: Some(prop_value.clone()),
304 });
305 }
306 }
307 }
308 }
309
310 fn validate_array(
311 &self,
312 arr: &Vec<JsonValue>,
313 schema: &JsonSchema,
314 instance_path: &str,
315 schema_path: &str,
316 depth: usize,
317 errors: &mut Vec<ValidationError>,
318 warnings: &mut Vec<ValidationWarning>,
319 properties_validated: &mut usize,
320 ) {
321 if let Some(min_length) = schema.min_length {
323 if arr.len() < min_length {
324 errors.push(ValidationError {
325 code: "ARRAY_TOO_SHORT".to_string(),
326 message: format!("Array must have at least {} items, has {}", min_length, arr.len()),
327 instance_path: instance_path.to_string(),
328 schema_path: format!("{}/minLength", schema_path),
329 invalid_value: Some(JsonValue::Array(arr.clone())),
330 });
331 }
332 }
333
334 if let Some(max_length) = schema.max_length {
335 if arr.len() > max_length {
336 errors.push(ValidationError {
337 code: "ARRAY_TOO_LONG".to_string(),
338 message: format!("Array must have at most {} items, has {}", max_length, arr.len()),
339 instance_path: instance_path.to_string(),
340 schema_path: format!("{}/maxLength", schema_path),
341 invalid_value: Some(JsonValue::Array(arr.clone())),
342 });
343 }
344 }
345
346 if let Some(ref items_schema) = schema.items {
348 for (index, item) in arr.iter().enumerate() {
349 let new_instance_path = format!("{}/{}", instance_path, index);
350 self.validate_recursive(
351 item, items_schema, &new_instance_path,
352 &format!("{}/items", schema_path),
353 depth + 1, errors, warnings, properties_validated
354 );
355 }
356 }
357 }
358
359 fn validate_string(
360 &self,
361 s: &str,
362 schema: &JsonSchema,
363 instance_path: &str,
364 schema_path: &str,
365 errors: &mut Vec<ValidationError>,
366 warnings: &mut Vec<ValidationWarning>,
367 ) {
368 if let Some(min_length) = schema.min_length {
370 if s.len() < min_length {
371 errors.push(ValidationError {
372 code: "STRING_TOO_SHORT".to_string(),
373 message: format!("String must be at least {} characters, is {}", min_length, s.len()),
374 instance_path: instance_path.to_string(),
375 schema_path: format!("{}/minLength", schema_path),
376 invalid_value: Some(JsonValue::String(s.to_string())),
377 });
378 }
379 }
380
381 if let Some(max_length) = schema.max_length {
382 if s.len() > max_length {
383 errors.push(ValidationError {
384 code: "STRING_TOO_LONG".to_string(),
385 message: format!("String must be at most {} characters, is {}", max_length, s.len()),
386 instance_path: instance_path.to_string(),
387 schema_path: format!("{}/maxLength", schema_path),
388 invalid_value: Some(JsonValue::String(s.to_string())),
389 });
390 }
391 }
392
393 if let Some(ref pattern) = schema.pattern {
395 if let Ok(regex) = Regex::new(pattern) {
396 if !regex.is_match(s) {
397 errors.push(ValidationError {
398 code: "PATTERN_MISMATCH".to_string(),
399 message: format!("String does not match required pattern: {}", pattern),
400 instance_path: instance_path.to_string(),
401 schema_path: format!("{}/pattern", schema_path),
402 invalid_value: Some(JsonValue::String(s.to_string())),
403 });
404 }
405 }
406 }
407
408 if let Some(ref format) = schema.format {
410 self.validate_format(s, format, instance_path, schema_path, errors, warnings);
411 }
412
413 self.validate_ddex_codes(s, instance_path, errors, warnings);
415 }
416
417 fn validate_number(
418 &self,
419 _num: &JsonValue,
420 _schema: &JsonSchema,
421 _instance_path: &str,
422 _schema_path: &str,
423 _errors: &mut Vec<ValidationError>,
424 _warnings: &mut Vec<ValidationWarning>,
425 ) {
426 }
429
430 fn validate_type(
431 &self,
432 data: &JsonValue,
433 expected_type: &str,
434 instance_path: &str,
435 schema_path: &str,
436 errors: &mut Vec<ValidationError>,
437 ) {
438 let actual_type = match data {
439 JsonValue::Null => "null",
440 JsonValue::Bool(_) => "boolean",
441 JsonValue::Number(_) => "number",
442 JsonValue::String(_) => "string",
443 JsonValue::Array(_) => "array",
444 JsonValue::Object(_) => "object",
445 };
446
447 if actual_type != expected_type {
448 errors.push(ValidationError {
449 code: "TYPE_MISMATCH".to_string(),
450 message: format!("Expected type '{}', got '{}'", expected_type, actual_type),
451 instance_path: instance_path.to_string(),
452 schema_path: format!("{}/type", schema_path),
453 invalid_value: Some(data.clone()),
454 });
455 }
456 }
457
458 fn validate_format(
459 &self,
460 s: &str,
461 format: &str,
462 instance_path: &str,
463 schema_path: &str,
464 errors: &mut Vec<ValidationError>,
465 warnings: &mut Vec<ValidationWarning>,
466 ) {
467 if !self.config.validate_formats {
468 return;
469 }
470
471 match format {
472 "date" => {
473 if chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_err() {
474 errors.push(ValidationError {
475 code: "INVALID_DATE_FORMAT".to_string(),
476 message: "Invalid date format, expected YYYY-MM-DD".to_string(),
477 instance_path: instance_path.to_string(),
478 schema_path: format!("{}/format", schema_path),
479 invalid_value: Some(JsonValue::String(s.to_string())),
480 });
481 }
482 },
483 "date-time" => {
484 if chrono::DateTime::parse_from_rfc3339(s).is_err() {
485 errors.push(ValidationError {
486 code: "INVALID_DATETIME_FORMAT".to_string(),
487 message: "Invalid date-time format, expected ISO 8601/RFC 3339".to_string(),
488 instance_path: instance_path.to_string(),
489 schema_path: format!("{}/format", schema_path),
490 invalid_value: Some(JsonValue::String(s.to_string())),
491 });
492 }
493 },
494 "uri" => {
495 if url::Url::parse(s).is_err() {
496 errors.push(ValidationError {
497 code: "INVALID_URI_FORMAT".to_string(),
498 message: "Invalid URI format".to_string(),
499 instance_path: instance_path.to_string(),
500 schema_path: format!("{}/format", schema_path),
501 invalid_value: Some(JsonValue::String(s.to_string())),
502 });
503 }
504 },
505 _ => {
506 warnings.push(ValidationWarning {
507 code: "UNKNOWN_FORMAT".to_string(),
508 message: format!("Unknown format specifier: {}", format),
509 instance_path: instance_path.to_string(),
510 suggestion: Some("Check schema for supported format types".to_string()),
511 });
512 }
513 }
514 }
515
516 fn validate_ddex_codes(
517 &self,
518 s: &str,
519 instance_path: &str,
520 errors: &mut Vec<ValidationError>,
521 warnings: &mut Vec<ValidationWarning>,
522 ) {
523 if instance_path.contains("isrc") {
525 if !ISRC_PATTERN.is_match(s) {
526 errors.push(ValidationError {
527 code: "INVALID_ISRC".to_string(),
528 message: "Invalid ISRC format, expected format: CC-XXX-YY-NNNNN".to_string(),
529 instance_path: instance_path.to_string(),
530 schema_path: "pattern".to_string(),
531 invalid_value: Some(JsonValue::String(s.to_string())),
532 });
533 }
534 }
535
536 if instance_path.contains("upc") {
538 if !UPC_PATTERN.is_match(s) {
539 errors.push(ValidationError {
540 code: "INVALID_UPC".to_string(),
541 message: "Invalid UPC format, expected 12 digits".to_string(),
542 instance_path: instance_path.to_string(),
543 schema_path: "pattern".to_string(),
544 invalid_value: Some(JsonValue::String(s.to_string())),
545 });
546 }
547 }
548
549 if instance_path.contains("duration") {
551 if !DURATION_PATTERN.is_match(s) {
552 errors.push(ValidationError {
553 code: "INVALID_DURATION".to_string(),
554 message: "Invalid duration format, expected ISO 8601 duration (PT#M#S)".to_string(),
555 instance_path: instance_path.to_string(),
556 schema_path: "pattern".to_string(),
557 invalid_value: Some(JsonValue::String(s.to_string())),
558 });
559 }
560 }
561
562 if instance_path.contains("language_code") {
564 if !LANGUAGE_CODE_PATTERN.is_match(s) {
565 warnings.push(ValidationWarning {
566 code: "SUSPICIOUS_LANGUAGE_CODE".to_string(),
567 message: "Language code does not match ISO 639 format".to_string(),
568 instance_path: instance_path.to_string(),
569 suggestion: Some("Use ISO 639-1 language codes (e.g., 'en', 'fr', 'en-US')".to_string()),
570 });
571 }
572 }
573
574 if instance_path.contains("territory_code") {
576 if !TERRITORY_CODE_PATTERN.is_match(s) {
577 warnings.push(ValidationWarning {
578 code: "SUSPICIOUS_TERRITORY_CODE".to_string(),
579 message: "Territory code should be ISO 3166 country code or 'Worldwide'".to_string(),
580 instance_path: instance_path.to_string(),
581 suggestion: Some("Use ISO 3166-1 alpha-2 country codes (e.g., 'US', 'GB') or 'Worldwide'".to_string()),
582 });
583 }
584 }
585 }
586
587 fn validate_conditionals(
588 &self,
589 data: &JsonValue,
590 schema: &JsonSchema,
591 instance_path: &str,
592 schema_path: &str,
593 depth: usize,
594 errors: &mut Vec<ValidationError>,
595 warnings: &mut Vec<ValidationWarning>,
596 properties_validated: &mut usize,
597 ) {
598 if let Some(ref if_schema) = schema.if_schema {
600 let condition_result = self.test_condition(data, if_schema);
601
602 if condition_result {
603 if let Some(ref then_schema) = schema.then_schema {
604 self.validate_recursive(
605 data, then_schema, instance_path,
606 &format!("{}/then", schema_path),
607 depth + 1, errors, warnings, properties_validated
608 );
609 }
610 } else if let Some(ref else_schema) = schema.else_schema {
611 self.validate_recursive(
612 data, else_schema, instance_path,
613 &format!("{}/else", schema_path),
614 depth + 1, errors, warnings, properties_validated
615 );
616 }
617 }
618
619 if let Some(ref all_of) = schema.all_of {
621 for (index, sub_schema) in all_of.iter().enumerate() {
622 self.validate_recursive(
623 data, sub_schema, instance_path,
624 &format!("{}/allOf/{}", schema_path, index),
625 depth + 1, errors, warnings, properties_validated
626 );
627 }
628 }
629
630 if let Some(ref any_of) = schema.any_of {
631 let mut any_valid = false;
632 for sub_schema in any_of {
633 let mut temp_errors = Vec::new();
634 let mut temp_warnings = Vec::new();
635 let mut temp_count = 0;
636
637 self.validate_recursive(
638 data, sub_schema, instance_path, schema_path,
639 depth + 1, &mut temp_errors, &mut temp_warnings, &mut temp_count
640 );
641
642 if temp_errors.is_empty() {
643 any_valid = true;
644 break;
645 }
646 }
647
648 if !any_valid {
649 errors.push(ValidationError {
650 code: "ANY_OF_FAILED".to_string(),
651 message: "Data does not match any of the specified schemas".to_string(),
652 instance_path: instance_path.to_string(),
653 schema_path: format!("{}/anyOf", schema_path),
654 invalid_value: Some(data.clone()),
655 });
656 }
657 }
658
659 if let Some(ref one_of) = schema.one_of {
660 let mut valid_count = 0;
661 for sub_schema in one_of {
662 let mut temp_errors = Vec::new();
663 let mut temp_warnings = Vec::new();
664 let mut temp_count = 0;
665
666 self.validate_recursive(
667 data, sub_schema, instance_path, schema_path,
668 depth + 1, &mut temp_errors, &mut temp_warnings, &mut temp_count
669 );
670
671 if temp_errors.is_empty() {
672 valid_count += 1;
673 }
674 }
675
676 if valid_count != 1 {
677 errors.push(ValidationError {
678 code: "ONE_OF_FAILED".to_string(),
679 message: format!("Data matches {} schemas, expected exactly 1", valid_count),
680 instance_path: instance_path.to_string(),
681 schema_path: format!("{}/oneOf", schema_path),
682 invalid_value: Some(data.clone()),
683 });
684 }
685 }
686 }
687
688 fn test_condition(&self, _data: &JsonValue, _if_schema: &JsonSchema) -> bool {
689 true
691 }
692
693 fn resolve_reference(&self, reference: &str) -> Option<JsonSchema> {
694 if reference.starts_with("#/$defs/") {
696 let def_name = &reference[8..];
697 if let Some(ref definitions) = self.schema.definitions {
698 return definitions.get(def_name).cloned();
699 }
700 }
701 None
702 }
703
704 fn calculate_schema_complexity(&self, schema: &JsonSchema) -> f64 {
705 let mut complexity = 0.0;
706
707 if let Some(ref properties) = schema.properties {
708 complexity += properties.len() as f64;
709 }
710 if let Some(ref definitions) = schema.definitions {
711 complexity += definitions.len() as f64 * 2.0;
712 }
713 if schema.all_of.is_some() { complexity += 3.0; }
714 if schema.any_of.is_some() { complexity += 4.0; }
715 if schema.one_of.is_some() { complexity += 5.0; }
716 if schema.if_schema.is_some() { complexity += 6.0; }
717
718 complexity
719 }
720
721 fn calculate_data_complexity(&self, data: &JsonValue) -> f64 {
722 match data {
723 JsonValue::Object(obj) => {
724 obj.len() as f64 + obj.values().map(|v| self.calculate_data_complexity(v) * 0.5).sum::<f64>()
725 },
726 JsonValue::Array(arr) => {
727 arr.len() as f64 + arr.iter().map(|v| self.calculate_data_complexity(v) * 0.3).sum::<f64>()
728 },
729 _ => 1.0,
730 }
731 }
732}
733
734impl ValidationResult {
736 pub fn is_valid(&self) -> bool {
738 self.valid
739 }
740
741 pub fn error_messages(&self) -> Vec<String> {
743 self.errors.iter()
744 .map(|e| format!("{}: {} (at {})", e.code, e.message, e.instance_path))
745 .collect()
746 }
747
748 pub fn warning_messages(&self) -> Vec<String> {
750 self.warnings.iter()
751 .map(|w| format!("{}: {} (at {})", w.code, w.message, w.instance_path))
752 .collect()
753 }
754
755 pub fn summary(&self) -> String {
757 format!(
758 "Validation {}: {} errors, {} warnings, {} properties validated in {:?}",
759 if self.valid { "PASSED" } else { "FAILED" },
760 self.errors.len(),
761 self.warnings.len(),
762 self.metadata.properties_validated,
763 self.metadata.validation_time
764 )
765 }
766}