1use super::*;
4use once_cell::sync::Lazy;
5use regex::Regex;
6use serde_json::Value as JsonValue;
7
8#[derive(Debug, Clone)]
10pub struct SchemaValidator {
11 schema: JsonSchema,
13 config: ValidationConfig,
15}
16
17#[derive(Debug, Clone)]
19pub struct ValidationConfig {
20 pub strict_mode: bool,
22 pub validate_formats: bool,
24 pub max_depth: usize,
26 pub allow_additional_properties: bool,
28}
29
30#[derive(Debug, Clone)]
32pub struct ValidationResult {
33 pub valid: bool,
35 pub errors: Vec<ValidationError>,
37 pub warnings: Vec<ValidationWarning>,
39 pub metadata: ValidationMetadata,
41}
42
43#[derive(Debug, Clone)]
45pub struct ValidationError {
46 pub code: String,
48 pub message: String,
50 pub instance_path: String,
52 pub schema_path: String,
54 pub invalid_value: Option<JsonValue>,
56}
57
58#[derive(Debug, Clone)]
60pub struct ValidationWarning {
61 pub code: String,
63 pub message: String,
65 pub instance_path: String,
67 pub suggestion: Option<String>,
69}
70
71#[derive(Debug, Clone)]
73pub struct ValidationMetadata {
74 pub properties_validated: usize,
76 pub validation_time: std::time::Duration,
78 pub schema_complexity: f64,
80 pub data_complexity: f64,
82}
83
84static ISRC_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[A-Z]{2}[A-Z0-9]{3}\d{7}$").unwrap());
86
87static UPC_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{12}$").unwrap());
88
89static LANGUAGE_CODE_PATTERN: Lazy<Regex> =
90 Lazy::new(|| Regex::new(r"^[a-z]{2}(-[A-Z]{2})?$").unwrap());
91
92static TERRITORY_CODE_PATTERN: Lazy<Regex> =
93 Lazy::new(|| Regex::new(r"^[A-Z]{2}|Worldwide$").unwrap());
94
95static DURATION_PATTERN: Lazy<Regex> =
96 Lazy::new(|| Regex::new(r"^PT(?:\d+H)?(?:\d+M)?(?:\d+(?:\.\d+)?S)?$").unwrap());
97
98impl Default for ValidationConfig {
99 fn default() -> Self {
100 Self {
101 strict_mode: false,
102 validate_formats: true,
103 max_depth: 100,
104 allow_additional_properties: false,
105 }
106 }
107}
108
109impl SchemaValidator {
110 pub fn new(schema: JsonSchema) -> Self {
112 Self {
113 schema,
114 config: ValidationConfig::default(),
115 }
116 }
117
118 pub fn with_config(schema: JsonSchema, config: ValidationConfig) -> Self {
120 Self { schema, config }
121 }
122
123 pub fn validate(&self, data: &JsonValue) -> ValidationResult {
125 let start_time = std::time::Instant::now();
126 let mut errors = Vec::new();
127 let mut warnings = Vec::new();
128 let mut properties_validated = 0;
129
130 self.validate_recursive(
131 data,
132 &self.schema,
133 "",
134 "",
135 0,
136 &mut errors,
137 &mut warnings,
138 &mut properties_validated,
139 );
140
141 let validation_time = start_time.elapsed();
142 let valid = errors.is_empty() && (!self.config.strict_mode || warnings.is_empty());
143
144 ValidationResult {
145 valid,
146 errors,
147 warnings,
148 metadata: ValidationMetadata {
149 properties_validated,
150 validation_time,
151 schema_complexity: self.calculate_schema_complexity(&self.schema),
152 data_complexity: self.calculate_data_complexity(data),
153 },
154 }
155 }
156
157 pub fn validate_with_paths(&self, data: &JsonValue) -> ValidationResult {
159 self.validate(data)
161 }
162
163 fn validate_recursive(
166 &self,
167 data: &JsonValue,
168 schema: &JsonSchema,
169 instance_path: &str,
170 schema_path: &str,
171 depth: usize,
172 errors: &mut Vec<ValidationError>,
173 warnings: &mut Vec<ValidationWarning>,
174 properties_validated: &mut usize,
175 ) {
176 if depth > self.config.max_depth {
177 errors.push(ValidationError {
178 code: "MAX_DEPTH_EXCEEDED".to_string(),
179 message: format!(
180 "Maximum validation depth {} exceeded",
181 self.config.max_depth
182 ),
183 instance_path: instance_path.to_string(),
184 schema_path: schema_path.to_string(),
185 invalid_value: Some(data.clone()),
186 });
187 return;
188 }
189
190 *properties_validated += 1;
191
192 if let Some(ref reference) = schema.reference {
194 if let Some(resolved_schema) = self.resolve_reference(reference) {
195 return self.validate_recursive(
196 data,
197 &resolved_schema,
198 instance_path,
199 &format!("{}/{}", schema_path, reference),
200 depth + 1,
201 errors,
202 warnings,
203 properties_validated,
204 );
205 } else {
206 errors.push(ValidationError {
207 code: "UNRESOLVED_REFERENCE".to_string(),
208 message: format!("Cannot resolve schema reference: {}", reference),
209 instance_path: instance_path.to_string(),
210 schema_path: schema_path.to_string(),
211 invalid_value: None,
212 });
213 return;
214 }
215 }
216
217 if let Some(ref expected_type) = schema.schema_type {
219 self.validate_type(data, expected_type, instance_path, schema_path, errors);
220 }
221
222 match data {
223 JsonValue::Object(obj) => {
224 self.validate_object(
225 obj,
226 schema,
227 instance_path,
228 schema_path,
229 depth,
230 errors,
231 warnings,
232 properties_validated,
233 );
234 }
235 JsonValue::Array(arr) => {
236 self.validate_array(
237 arr,
238 schema,
239 instance_path,
240 schema_path,
241 depth,
242 errors,
243 warnings,
244 properties_validated,
245 );
246 }
247 JsonValue::String(s) => {
248 self.validate_string(s, schema, instance_path, schema_path, errors, warnings);
249 }
250 JsonValue::Number(_) => {
251 self.validate_number(data, schema, instance_path, schema_path, errors, warnings);
252 }
253 _ => {}
254 }
255
256 if let Some(ref enum_values) = schema.enum_values {
258 if !enum_values.contains(data) {
259 errors.push(ValidationError {
260 code: "ENUM_VIOLATION".to_string(),
261 message: format!(
262 "Value must be one of: {}",
263 enum_values
264 .iter()
265 .map(|v| v.to_string())
266 .collect::<Vec<_>>()
267 .join(", ")
268 ),
269 instance_path: instance_path.to_string(),
270 schema_path: format!("{}/enum", schema_path),
271 invalid_value: Some(data.clone()),
272 });
273 }
274 }
275
276 self.validate_conditionals(
278 data,
279 schema,
280 instance_path,
281 schema_path,
282 depth,
283 errors,
284 warnings,
285 properties_validated,
286 );
287 }
288
289 fn validate_object(
290 &self,
291 obj: &serde_json::Map<String, JsonValue>,
292 schema: &JsonSchema,
293 instance_path: &str,
294 schema_path: &str,
295 depth: usize,
296 errors: &mut Vec<ValidationError>,
297 warnings: &mut Vec<ValidationWarning>,
298 properties_validated: &mut usize,
299 ) {
300 if let Some(ref required) = schema.required {
302 for required_prop in required {
303 if !obj.contains_key(required_prop) {
304 errors.push(ValidationError {
305 code: "REQUIRED_PROPERTY_MISSING".to_string(),
306 message: format!("Required property '{}' is missing", required_prop),
307 instance_path: instance_path.to_string(),
308 schema_path: format!("{}/required", schema_path),
309 invalid_value: None,
310 });
311 }
312 }
313 }
314
315 if let Some(ref properties) = schema.properties {
317 for (prop_name, prop_value) in obj {
318 let new_instance_path = if instance_path.is_empty() {
319 prop_name.clone()
320 } else {
321 format!("{}/{}", instance_path, prop_name)
322 };
323
324 if let Some(prop_schema) = properties.get(prop_name) {
325 self.validate_recursive(
326 prop_value,
327 prop_schema,
328 &new_instance_path,
329 &format!("{}/properties/{}", schema_path, prop_name),
330 depth + 1,
331 errors,
332 warnings,
333 properties_validated,
334 );
335 } else if !self.config.allow_additional_properties
336 && schema.additional_properties.unwrap_or(true) == false
337 {
338 errors.push(ValidationError {
339 code: "ADDITIONAL_PROPERTY_NOT_ALLOWED".to_string(),
340 message: format!("Additional property '{}' is not allowed", prop_name),
341 instance_path: new_instance_path,
342 schema_path: format!("{}/additionalProperties", schema_path),
343 invalid_value: Some(prop_value.clone()),
344 });
345 }
346 }
347 }
348 }
349
350 fn validate_array(
351 &self,
352 arr: &Vec<JsonValue>,
353 schema: &JsonSchema,
354 instance_path: &str,
355 schema_path: &str,
356 depth: usize,
357 errors: &mut Vec<ValidationError>,
358 warnings: &mut Vec<ValidationWarning>,
359 properties_validated: &mut usize,
360 ) {
361 if let Some(min_length) = schema.min_length {
363 if arr.len() < min_length {
364 errors.push(ValidationError {
365 code: "ARRAY_TOO_SHORT".to_string(),
366 message: format!(
367 "Array must have at least {} items, has {}",
368 min_length,
369 arr.len()
370 ),
371 instance_path: instance_path.to_string(),
372 schema_path: format!("{}/minLength", schema_path),
373 invalid_value: Some(JsonValue::Array(arr.clone())),
374 });
375 }
376 }
377
378 if let Some(max_length) = schema.max_length {
379 if arr.len() > max_length {
380 errors.push(ValidationError {
381 code: "ARRAY_TOO_LONG".to_string(),
382 message: format!(
383 "Array must have at most {} items, has {}",
384 max_length,
385 arr.len()
386 ),
387 instance_path: instance_path.to_string(),
388 schema_path: format!("{}/maxLength", schema_path),
389 invalid_value: Some(JsonValue::Array(arr.clone())),
390 });
391 }
392 }
393
394 if let Some(ref items_schema) = schema.items {
396 for (index, item) in arr.iter().enumerate() {
397 let new_instance_path = format!("{}/{}", instance_path, index);
398 self.validate_recursive(
399 item,
400 items_schema,
401 &new_instance_path,
402 &format!("{}/items", schema_path),
403 depth + 1,
404 errors,
405 warnings,
406 properties_validated,
407 );
408 }
409 }
410 }
411
412 fn validate_string(
413 &self,
414 s: &str,
415 schema: &JsonSchema,
416 instance_path: &str,
417 schema_path: &str,
418 errors: &mut Vec<ValidationError>,
419 warnings: &mut Vec<ValidationWarning>,
420 ) {
421 if let Some(min_length) = schema.min_length {
423 if s.len() < min_length {
424 errors.push(ValidationError {
425 code: "STRING_TOO_SHORT".to_string(),
426 message: format!(
427 "String must be at least {} characters, is {}",
428 min_length,
429 s.len()
430 ),
431 instance_path: instance_path.to_string(),
432 schema_path: format!("{}/minLength", schema_path),
433 invalid_value: Some(JsonValue::String(s.to_string())),
434 });
435 }
436 }
437
438 if let Some(max_length) = schema.max_length {
439 if s.len() > max_length {
440 errors.push(ValidationError {
441 code: "STRING_TOO_LONG".to_string(),
442 message: format!(
443 "String must be at most {} characters, is {}",
444 max_length,
445 s.len()
446 ),
447 instance_path: instance_path.to_string(),
448 schema_path: format!("{}/maxLength", schema_path),
449 invalid_value: Some(JsonValue::String(s.to_string())),
450 });
451 }
452 }
453
454 if let Some(ref pattern) = schema.pattern {
456 if let Ok(regex) = Regex::new(pattern) {
457 if !regex.is_match(s) {
458 errors.push(ValidationError {
459 code: "PATTERN_MISMATCH".to_string(),
460 message: format!("String does not match required pattern: {}", pattern),
461 instance_path: instance_path.to_string(),
462 schema_path: format!("{}/pattern", schema_path),
463 invalid_value: Some(JsonValue::String(s.to_string())),
464 });
465 }
466 }
467 }
468
469 if let Some(ref format) = schema.format {
471 self.validate_format(s, format, instance_path, schema_path, errors, warnings);
472 }
473
474 self.validate_ddex_codes(s, instance_path, errors, warnings);
476 }
477
478 fn validate_number(
479 &self,
480 _num: &JsonValue,
481 _schema: &JsonSchema,
482 _instance_path: &str,
483 _schema_path: &str,
484 _errors: &mut Vec<ValidationError>,
485 _warnings: &mut Vec<ValidationWarning>,
486 ) {
487 }
490
491 fn validate_type(
492 &self,
493 data: &JsonValue,
494 expected_type: &str,
495 instance_path: &str,
496 schema_path: &str,
497 errors: &mut Vec<ValidationError>,
498 ) {
499 let actual_type = match data {
500 JsonValue::Null => "null",
501 JsonValue::Bool(_) => "boolean",
502 JsonValue::Number(_) => "number",
503 JsonValue::String(_) => "string",
504 JsonValue::Array(_) => "array",
505 JsonValue::Object(_) => "object",
506 };
507
508 if actual_type != expected_type {
509 errors.push(ValidationError {
510 code: "TYPE_MISMATCH".to_string(),
511 message: format!("Expected type '{}', got '{}'", expected_type, actual_type),
512 instance_path: instance_path.to_string(),
513 schema_path: format!("{}/type", schema_path),
514 invalid_value: Some(data.clone()),
515 });
516 }
517 }
518
519 fn validate_format(
520 &self,
521 s: &str,
522 format: &str,
523 instance_path: &str,
524 schema_path: &str,
525 errors: &mut Vec<ValidationError>,
526 warnings: &mut Vec<ValidationWarning>,
527 ) {
528 if !self.config.validate_formats {
529 return;
530 }
531
532 match format {
533 "date" => {
534 if chrono::NaiveDate::parse_from_str(s, "%Y-%m-%d").is_err() {
535 errors.push(ValidationError {
536 code: "INVALID_DATE_FORMAT".to_string(),
537 message: "Invalid date format, expected YYYY-MM-DD".to_string(),
538 instance_path: instance_path.to_string(),
539 schema_path: format!("{}/format", schema_path),
540 invalid_value: Some(JsonValue::String(s.to_string())),
541 });
542 }
543 }
544 "date-time" => {
545 if chrono::DateTime::parse_from_rfc3339(s).is_err() {
546 errors.push(ValidationError {
547 code: "INVALID_DATETIME_FORMAT".to_string(),
548 message: "Invalid date-time format, expected ISO 8601/RFC 3339".to_string(),
549 instance_path: instance_path.to_string(),
550 schema_path: format!("{}/format", schema_path),
551 invalid_value: Some(JsonValue::String(s.to_string())),
552 });
553 }
554 }
555 "uri" => {
556 if url::Url::parse(s).is_err() {
557 errors.push(ValidationError {
558 code: "INVALID_URI_FORMAT".to_string(),
559 message: "Invalid URI format".to_string(),
560 instance_path: instance_path.to_string(),
561 schema_path: format!("{}/format", schema_path),
562 invalid_value: Some(JsonValue::String(s.to_string())),
563 });
564 }
565 }
566 _ => {
567 warnings.push(ValidationWarning {
568 code: "UNKNOWN_FORMAT".to_string(),
569 message: format!("Unknown format specifier: {}", format),
570 instance_path: instance_path.to_string(),
571 suggestion: Some("Check schema for supported format types".to_string()),
572 });
573 }
574 }
575 }
576
577 fn validate_ddex_codes(
578 &self,
579 s: &str,
580 instance_path: &str,
581 errors: &mut Vec<ValidationError>,
582 warnings: &mut Vec<ValidationWarning>,
583 ) {
584 if instance_path.contains("isrc") {
586 if !ISRC_PATTERN.is_match(s) {
587 errors.push(ValidationError {
588 code: "INVALID_ISRC".to_string(),
589 message: "Invalid ISRC format, expected format: CC-XXX-YY-NNNNN".to_string(),
590 instance_path: instance_path.to_string(),
591 schema_path: "pattern".to_string(),
592 invalid_value: Some(JsonValue::String(s.to_string())),
593 });
594 }
595 }
596
597 if instance_path.contains("upc") {
599 if !UPC_PATTERN.is_match(s) {
600 errors.push(ValidationError {
601 code: "INVALID_UPC".to_string(),
602 message: "Invalid UPC format, expected 12 digits".to_string(),
603 instance_path: instance_path.to_string(),
604 schema_path: "pattern".to_string(),
605 invalid_value: Some(JsonValue::String(s.to_string())),
606 });
607 }
608 }
609
610 if instance_path.contains("duration") {
612 if !DURATION_PATTERN.is_match(s) {
613 errors.push(ValidationError {
614 code: "INVALID_DURATION".to_string(),
615 message: "Invalid duration format, expected ISO 8601 duration (PT#M#S)"
616 .to_string(),
617 instance_path: instance_path.to_string(),
618 schema_path: "pattern".to_string(),
619 invalid_value: Some(JsonValue::String(s.to_string())),
620 });
621 }
622 }
623
624 if instance_path.contains("language_code") {
626 if !LANGUAGE_CODE_PATTERN.is_match(s) {
627 warnings.push(ValidationWarning {
628 code: "SUSPICIOUS_LANGUAGE_CODE".to_string(),
629 message: "Language code does not match ISO 639 format".to_string(),
630 instance_path: instance_path.to_string(),
631 suggestion: Some(
632 "Use ISO 639-1 language codes (e.g., 'en', 'fr', 'en-US')".to_string(),
633 ),
634 });
635 }
636 }
637
638 if instance_path.contains("territory_code") {
640 if !TERRITORY_CODE_PATTERN.is_match(s) {
641 warnings.push(ValidationWarning {
642 code: "SUSPICIOUS_TERRITORY_CODE".to_string(),
643 message: "Territory code should be ISO 3166 country code or 'Worldwide'"
644 .to_string(),
645 instance_path: instance_path.to_string(),
646 suggestion: Some(
647 "Use ISO 3166-1 alpha-2 country codes (e.g., 'US', 'GB') or 'Worldwide'"
648 .to_string(),
649 ),
650 });
651 }
652 }
653 }
654
655 fn validate_conditionals(
656 &self,
657 data: &JsonValue,
658 schema: &JsonSchema,
659 instance_path: &str,
660 schema_path: &str,
661 depth: usize,
662 errors: &mut Vec<ValidationError>,
663 warnings: &mut Vec<ValidationWarning>,
664 properties_validated: &mut usize,
665 ) {
666 if let Some(ref if_schema) = schema.if_schema {
668 let condition_result = self.test_condition(data, if_schema);
669
670 if condition_result {
671 if let Some(ref then_schema) = schema.then_schema {
672 self.validate_recursive(
673 data,
674 then_schema,
675 instance_path,
676 &format!("{}/then", schema_path),
677 depth + 1,
678 errors,
679 warnings,
680 properties_validated,
681 );
682 }
683 } else if let Some(ref else_schema) = schema.else_schema {
684 self.validate_recursive(
685 data,
686 else_schema,
687 instance_path,
688 &format!("{}/else", schema_path),
689 depth + 1,
690 errors,
691 warnings,
692 properties_validated,
693 );
694 }
695 }
696
697 if let Some(ref all_of) = schema.all_of {
699 for (index, sub_schema) in all_of.iter().enumerate() {
700 self.validate_recursive(
701 data,
702 sub_schema,
703 instance_path,
704 &format!("{}/allOf/{}", schema_path, index),
705 depth + 1,
706 errors,
707 warnings,
708 properties_validated,
709 );
710 }
711 }
712
713 if let Some(ref any_of) = schema.any_of {
714 let mut any_valid = false;
715 for sub_schema in any_of {
716 let mut temp_errors = Vec::new();
717 let mut temp_warnings = Vec::new();
718 let mut temp_count = 0;
719
720 self.validate_recursive(
721 data,
722 sub_schema,
723 instance_path,
724 schema_path,
725 depth + 1,
726 &mut temp_errors,
727 &mut temp_warnings,
728 &mut temp_count,
729 );
730
731 if temp_errors.is_empty() {
732 any_valid = true;
733 break;
734 }
735 }
736
737 if !any_valid {
738 errors.push(ValidationError {
739 code: "ANY_OF_FAILED".to_string(),
740 message: "Data does not match any of the specified schemas".to_string(),
741 instance_path: instance_path.to_string(),
742 schema_path: format!("{}/anyOf", schema_path),
743 invalid_value: Some(data.clone()),
744 });
745 }
746 }
747
748 if let Some(ref one_of) = schema.one_of {
749 let mut valid_count = 0;
750 for sub_schema in one_of {
751 let mut temp_errors = Vec::new();
752 let mut temp_warnings = Vec::new();
753 let mut temp_count = 0;
754
755 self.validate_recursive(
756 data,
757 sub_schema,
758 instance_path,
759 schema_path,
760 depth + 1,
761 &mut temp_errors,
762 &mut temp_warnings,
763 &mut temp_count,
764 );
765
766 if temp_errors.is_empty() {
767 valid_count += 1;
768 }
769 }
770
771 if valid_count != 1 {
772 errors.push(ValidationError {
773 code: "ONE_OF_FAILED".to_string(),
774 message: format!("Data matches {} schemas, expected exactly 1", valid_count),
775 instance_path: instance_path.to_string(),
776 schema_path: format!("{}/oneOf", schema_path),
777 invalid_value: Some(data.clone()),
778 });
779 }
780 }
781 }
782
783 fn test_condition(&self, _data: &JsonValue, _if_schema: &JsonSchema) -> bool {
784 true
786 }
787
788 fn resolve_reference(&self, reference: &str) -> Option<JsonSchema> {
789 if reference.starts_with("#/$defs/") {
791 let def_name = &reference[8..];
792 if let Some(ref definitions) = self.schema.definitions {
793 return definitions.get(def_name).cloned();
794 }
795 }
796 None
797 }
798
799 fn calculate_schema_complexity(&self, schema: &JsonSchema) -> f64 {
800 let mut complexity = 0.0;
801
802 if let Some(ref properties) = schema.properties {
803 complexity += properties.len() as f64;
804 }
805 if let Some(ref definitions) = schema.definitions {
806 complexity += definitions.len() as f64 * 2.0;
807 }
808 if schema.all_of.is_some() {
809 complexity += 3.0;
810 }
811 if schema.any_of.is_some() {
812 complexity += 4.0;
813 }
814 if schema.one_of.is_some() {
815 complexity += 5.0;
816 }
817 if schema.if_schema.is_some() {
818 complexity += 6.0;
819 }
820
821 complexity
822 }
823
824 fn calculate_data_complexity(&self, data: &JsonValue) -> f64 {
825 match data {
826 JsonValue::Object(obj) => {
827 obj.len() as f64
828 + obj
829 .values()
830 .map(|v| self.calculate_data_complexity(v) * 0.5)
831 .sum::<f64>()
832 }
833 JsonValue::Array(arr) => {
834 arr.len() as f64
835 + arr
836 .iter()
837 .map(|v| self.calculate_data_complexity(v) * 0.3)
838 .sum::<f64>()
839 }
840 _ => 1.0,
841 }
842 }
843}
844
845impl ValidationResult {
847 pub fn is_valid(&self) -> bool {
849 self.valid
850 }
851
852 pub fn error_messages(&self) -> Vec<String> {
854 self.errors
855 .iter()
856 .map(|e| format!("{}: {} (at {})", e.code, e.message, e.instance_path))
857 .collect()
858 }
859
860 pub fn warning_messages(&self) -> Vec<String> {
862 self.warnings
863 .iter()
864 .map(|w| format!("{}: {} (at {})", w.code, w.message, w.instance_path))
865 .collect()
866 }
867
868 pub fn summary(&self) -> String {
870 format!(
871 "Validation {}: {} errors, {} warnings, {} properties validated in {:?}",
872 if self.valid { "PASSED" } else { "FAILED" },
873 self.errors.len(),
874 self.warnings.len(),
875 self.metadata.properties_validated,
876 self.metadata.validation_time
877 )
878 }
879}