Skip to main content

data_protocol_validator/
validator.rs

1use std::collections::{HashMap, HashSet};
2
3use serde_json::Value;
4
5use crate::errors::create_error;
6use crate::format::validate_format;
7use crate::suggestions::{
8    suggest_array_fix, suggest_missing_required, suggest_number_fix, suggest_remove_additional,
9    suggest_string_fix, suggest_type_fix,
10};
11use crate::types::{
12    Suggestion, ValidationError, ValidationOptions, ValidationResult, ValidationStats,
13};
14
15// ---------------------------------------------------------------------------
16// Walker context
17// ---------------------------------------------------------------------------
18
19struct WalkerContext<'a> {
20    errors: Vec<ValidationError>,
21    root_schema: &'a Value,
22    fields_checked: u64,
23    fields_valid: u64,
24    fields_invalid: u64,
25    options: ValidationOptions,
26}
27
28// ---------------------------------------------------------------------------
29// Helpers
30// ---------------------------------------------------------------------------
31
32/// Return the JSON type name for a value, compatible with the TypeScript
33/// implementation's `getJsonType`.
34fn get_json_type(value: &Value) -> &'static str {
35    match value {
36        Value::Null => "null",
37        Value::Bool(_) => "boolean",
38        Value::Number(_) => "number",
39        Value::String(_) => "string",
40        Value::Array(_) => "array",
41        Value::Object(_) => "object",
42    }
43}
44
45/// Check whether a value matches a single schema type keyword.
46fn matches_type(type_name: &str, data: &Value) -> bool {
47    match type_name {
48        "string" => data.is_string(),
49        "number" => data.is_number(),
50        "integer" => data.is_number() && data.as_f64().map(|n| n.fract() == 0.0).unwrap_or(false),
51        "boolean" => data.is_boolean(),
52        "null" => data.is_null(),
53        "object" => data.is_object(),
54        "array" => data.is_array(),
55        _ => false,
56    }
57}
58
59/// Check whether `current_path` is relevant in partial validation mode.
60fn is_in_partial_paths(current_path: &str, paths: &[String]) -> bool {
61    paths.iter().any(|p| {
62        current_path == p
63            || current_path.starts_with(&format!("{}/", p))
64            || p.starts_with(&format!("{}/", current_path))
65    })
66}
67
68/// Add an error to the context, optionally attaching a suggestion.
69fn add_error(ctx: &mut WalkerContext, mut error: ValidationError, suggestion: Option<Suggestion>) {
70    if let Some(s) = suggestion {
71        error.suggestion = Some(s);
72    }
73    ctx.errors.push(error);
74    ctx.fields_invalid += 1;
75}
76
77/// Resolve a `$ref` string against the root schema's `$defs`.
78fn resolve_ref<'a>(ref_str: &str, root_schema: &'a Value) -> Option<&'a Value> {
79    let prefix = "#/$defs/";
80    if !ref_str.starts_with(prefix) {
81        return None;
82    }
83    let def_name = &ref_str[prefix.len()..];
84    root_schema.get("$defs").and_then(|defs| defs.get(def_name))
85}
86
87// ---------------------------------------------------------------------------
88// Core recursive walker
89// ---------------------------------------------------------------------------
90
91fn walk_schema(schema: &Value, data: &Value, path: &str, ctx: &mut WalkerContext) {
92    // Partial mode: skip paths not in the paths list
93    if ctx.options.mode == "partial" && !ctx.options.paths.is_empty() && !is_in_partial_paths(path, &ctx.options.paths) {
94        return;
95    }
96
97    // Resolve $ref
98    if let Some(ref_str) = schema.get("$ref").and_then(|v| v.as_str()) {
99        match resolve_ref(ref_str, ctx.root_schema) {
100            Some(resolved) => {
101                walk_schema(resolved, data, path, ctx);
102                return;
103            }
104            None => {
105                let mut context = HashMap::new();
106                context.insert("ref", ref_str.to_string());
107                add_error(ctx, create_error("E011", path, context), None);
108                return;
109            }
110        }
111    }
112
113    // Check x-deprecated (warning only)
114    if let Some(dep) = schema.get("x-deprecated") {
115        if !dep.is_null() && dep.as_bool() != Some(false) {
116            let reason = if let Some(s) = dep.as_str() {
117                s.to_string()
118            } else {
119                "deprecated".to_string()
120            };
121            let mut context = HashMap::new();
122            context.insert("field", path.to_string());
123            context.insert("reason", reason);
124            let warning = create_error("W001", path, context);
125            ctx.errors.push(warning);
126        }
127    }
128
129    // Composition: allOf, anyOf, oneOf
130    if let Some(all_of) = schema.get("allOf").and_then(|v| v.as_array()) {
131        validate_all_of(all_of, data, path, ctx);
132    }
133    if let Some(any_of) = schema.get("anyOf").and_then(|v| v.as_array()) {
134        validate_any_of(any_of, data, path, ctx);
135    }
136    if let Some(one_of) = schema.get("oneOf").and_then(|v| v.as_array()) {
137        validate_one_of(one_of, data, path, ctx);
138    }
139
140    // Enum
141    if let Some(enum_values) = schema.get("enum").and_then(|v| v.as_array()) {
142        ctx.fields_checked += 1;
143        if !enum_values.iter().any(|e| e == data) {
144            let data_str = serde_json::to_string(data).unwrap_or_default();
145            let enum_str = enum_values
146                .iter()
147                .map(|e| serde_json::to_string(e).unwrap_or_default())
148                .collect::<Vec<_>>()
149                .join(", ");
150            let mut context = HashMap::new();
151            context.insert("value", data_str);
152            context.insert("constraint", format!("enum [{}]", enum_str));
153            add_error(ctx, create_error("E009", path, context), None);
154        } else {
155            ctx.fields_valid += 1;
156        }
157        return;
158    }
159
160    // Const
161    if schema.get("const").is_some() {
162        let const_val = &schema["const"];
163        ctx.fields_checked += 1;
164        if const_val != data {
165            let data_str = serde_json::to_string(data).unwrap_or_default();
166            let const_str = serde_json::to_string(const_val).unwrap_or_default();
167            let mut context = HashMap::new();
168            context.insert("value", data_str);
169            context.insert("constraint", format!("const {}", const_str));
170            add_error(ctx, create_error("E009", path, context), None);
171        } else {
172            ctx.fields_valid += 1;
173        }
174        return;
175    }
176
177    // Type check
178    if let Some(type_val) = schema.get("type") {
179        let types: Vec<String> = if let Some(arr) = type_val.as_array() {
180            arr.iter()
181                .filter_map(|v| v.as_str().map(|s| s.to_string()))
182                .collect()
183        } else if let Some(s) = type_val.as_str() {
184            vec![s.to_string()]
185        } else {
186            vec![]
187        };
188
189        let type_matches = types.iter().any(|t| matches_type(t, data));
190
191        if !type_matches {
192            ctx.fields_checked += 1;
193            let actual_type = get_json_type(data);
194            let suggestion = suggest_type_fix(data, &types[0]);
195            let mut context = HashMap::new();
196            context.insert("expected", types.join(" | "));
197            context.insert("actual", actual_type.to_string());
198            add_error(ctx, create_error("E001", path, context), suggestion);
199            return; // Don't check constraints if type doesn't match
200        }
201    }
202
203    // Type-specific constraint validation
204    let actual_type = get_json_type(data);
205
206    match actual_type {
207        "string" => {
208            if let Some(s) = data.as_str() {
209                validate_string(schema, s, path, ctx);
210            }
211        }
212        "number" => {
213            if let Some(n) = data.as_f64() {
214                validate_number(schema, n, path, ctx);
215            }
216        }
217        "array" => {
218            if let Some(arr) = data.as_array() {
219                validate_array(schema, arr, path, ctx);
220            }
221        }
222        "object" => {
223            if let Some(obj) = data.as_object() {
224                validate_object(schema, obj, path, ctx);
225            }
226        }
227        _ => {
228            // Primitive types with no constraints (boolean, null)
229            ctx.fields_checked += 1;
230            ctx.fields_valid += 1;
231        }
232    }
233}
234
235// ---------------------------------------------------------------------------
236// String validation
237// ---------------------------------------------------------------------------
238
239fn validate_string(schema: &Value, data: &str, path: &str, ctx: &mut WalkerContext) {
240    ctx.fields_checked += 1;
241    let mut valid = true;
242
243    if let Some(min_len) = schema.get("minLength").and_then(|v| v.as_u64()) {
244        if (data.len() as u64) < min_len {
245            let mut context = HashMap::new();
246            context.insert(
247                "constraint",
248                format!("minLength {}, got length {}", min_len, data.len()),
249            );
250            add_error(ctx, create_error("E004", path, context), None);
251            valid = false;
252        }
253    }
254
255    if let Some(max_len) = schema.get("maxLength").and_then(|v| v.as_u64()) {
256        if (data.len() as u64) > max_len {
257            let suggestion = suggest_string_fix(data, schema);
258            let mut context = HashMap::new();
259            context.insert(
260                "constraint",
261                format!("maxLength {}, got length {}", max_len, data.len()),
262            );
263            add_error(ctx, create_error("E004", path, context), suggestion);
264            valid = false;
265        }
266    }
267
268    if let Some(pattern) = schema.get("pattern").and_then(|v| v.as_str()) {
269        if let Ok(re) = regex::Regex::new(pattern) {
270            if !re.is_match(data) {
271                let mut context = HashMap::new();
272                context.insert(
273                    "constraint",
274                    format!("pattern \"{}\" does not match", pattern),
275                );
276                add_error(ctx, create_error("E004", path, context), None);
277                valid = false;
278            }
279        }
280    }
281
282    if let Some(format) = schema.get("format").and_then(|v| v.as_str()) {
283        if !validate_format(data, format) {
284            let mut context = HashMap::new();
285            context.insert("format", format.to_string());
286            context.insert("value", data.to_string());
287            add_error(ctx, create_error("E008", path, context), None);
288            valid = false;
289        }
290    }
291
292    if valid {
293        ctx.fields_valid += 1;
294    }
295}
296
297// ---------------------------------------------------------------------------
298// Number validation
299// ---------------------------------------------------------------------------
300
301fn validate_number(schema: &Value, data: f64, path: &str, ctx: &mut WalkerContext) {
302    ctx.fields_checked += 1;
303    let mut valid = true;
304
305    if let Some(min) = schema.get("minimum").and_then(|v| v.as_f64()) {
306        if data < min {
307            let suggestion = suggest_number_fix(data, schema);
308            let mut context = HashMap::new();
309            context.insert(
310                "constraint",
311                format!("minimum {}, got {}", format_num(min), format_num(data)),
312            );
313            add_error(ctx, create_error("E005", path, context), suggestion);
314            valid = false;
315        }
316    }
317
318    if let Some(max) = schema.get("maximum").and_then(|v| v.as_f64()) {
319        if data > max {
320            let suggestion = suggest_number_fix(data, schema);
321            let mut context = HashMap::new();
322            context.insert(
323                "constraint",
324                format!("maximum {}, got {}", format_num(max), format_num(data)),
325            );
326            add_error(ctx, create_error("E005", path, context), suggestion);
327            valid = false;
328        }
329    }
330
331    if let Some(exc_min) = schema.get("exclusiveMinimum").and_then(|v| v.as_f64()) {
332        if data <= exc_min {
333            let suggestion = suggest_number_fix(data, schema);
334            let mut context = HashMap::new();
335            context.insert(
336                "constraint",
337                format!(
338                    "exclusiveMinimum {}, got {}",
339                    format_num(exc_min),
340                    format_num(data)
341                ),
342            );
343            add_error(ctx, create_error("E005", path, context), suggestion);
344            valid = false;
345        }
346    }
347
348    if let Some(exc_max) = schema.get("exclusiveMaximum").and_then(|v| v.as_f64()) {
349        if data >= exc_max {
350            let suggestion = suggest_number_fix(data, schema);
351            let mut context = HashMap::new();
352            context.insert(
353                "constraint",
354                format!(
355                    "exclusiveMaximum {}, got {}",
356                    format_num(exc_max),
357                    format_num(data)
358                ),
359            );
360            add_error(ctx, create_error("E005", path, context), suggestion);
361            valid = false;
362        }
363    }
364
365    if let Some(multiple_of) = schema.get("multipleOf").and_then(|v| v.as_f64()) {
366        let remainder = (data % multiple_of).abs();
367        let tolerance = 1e-10;
368        if remainder > tolerance && (remainder - multiple_of).abs() > tolerance {
369            let mut context = HashMap::new();
370            context.insert(
371                "constraint",
372                format!(
373                    "multipleOf {}, got {}",
374                    format_num(multiple_of),
375                    format_num(data)
376                ),
377            );
378            add_error(ctx, create_error("E005", path, context), None);
379            valid = false;
380        }
381    }
382
383    if valid {
384        ctx.fields_valid += 1;
385    }
386}
387
388/// Format a number for display in error messages, matching JS behaviour.
389fn format_num(n: f64) -> String {
390    if n.fract() == 0.0 && n.abs() < 1e15 {
391        format!("{}", n as i64)
392    } else {
393        format!("{}", n)
394    }
395}
396
397// ---------------------------------------------------------------------------
398// Array validation
399// ---------------------------------------------------------------------------
400
401fn validate_array(schema: &Value, data: &[Value], path: &str, ctx: &mut WalkerContext) {
402    ctx.fields_checked += 1;
403    let mut valid = true;
404
405    if let Some(min_items) = schema.get("minItems").and_then(|v| v.as_u64()) {
406        if (data.len() as u64) < min_items {
407            let mut context = HashMap::new();
408            context.insert(
409                "constraint",
410                format!("minItems {}, got {}", min_items, data.len()),
411            );
412            add_error(ctx, create_error("E006", path, context), None);
413            valid = false;
414        }
415    }
416
417    if let Some(max_items) = schema.get("maxItems").and_then(|v| v.as_u64()) {
418        if (data.len() as u64) > max_items {
419            let suggestion = suggest_array_fix(schema);
420            let mut context = HashMap::new();
421            context.insert(
422                "constraint",
423                format!("maxItems {}, got {}", max_items, data.len()),
424            );
425            add_error(ctx, create_error("E006", path, context), suggestion);
426            valid = false;
427        }
428    }
429
430    if schema
431        .get("uniqueItems")
432        .and_then(|v| v.as_bool())
433        .unwrap_or(false)
434    {
435        let mut seen: Vec<&Value> = Vec::new();
436        for item in data {
437            if seen.contains(&item) {
438                let mut context = HashMap::new();
439                context.insert(
440                    "constraint",
441                    "uniqueItems: array contains duplicates".to_string(),
442                );
443                add_error(ctx, create_error("E006", path, context), None);
444                valid = false;
445                break;
446            }
447            seen.push(item);
448        }
449    }
450
451    if valid {
452        ctx.fields_valid += 1;
453    }
454
455    // Validate items
456    if let Some(items_schema) = schema.get("items") {
457        for (i, item) in data.iter().enumerate() {
458            let item_path = format!("{}/{}", path, i);
459            walk_schema(items_schema, item, &item_path, ctx);
460        }
461    }
462}
463
464// ---------------------------------------------------------------------------
465// Object validation
466// ---------------------------------------------------------------------------
467
468fn validate_object(
469    schema: &Value,
470    data: &serde_json::Map<String, Value>,
471    path: &str,
472    ctx: &mut WalkerContext,
473) {
474    ctx.fields_checked += 1;
475    let mut valid = true;
476    let keys: Vec<&String> = data.keys().collect();
477
478    // Required
479    if let Some(required) = schema.get("required").and_then(|v| v.as_array()) {
480        for prop in required {
481            if let Some(prop_name) = prop.as_str() {
482                if !data.contains_key(prop_name) {
483                    let suggestion = suggest_missing_required(prop_name);
484                    let mut context = HashMap::new();
485                    context.insert("property", prop_name.to_string());
486                    add_error(ctx, create_error("E002", path, context), Some(suggestion));
487                    valid = false;
488                }
489            }
490        }
491    }
492
493    // minProperties / maxProperties
494    if let Some(min_props) = schema.get("minProperties").and_then(|v| v.as_u64()) {
495        if (keys.len() as u64) < min_props {
496            let mut context = HashMap::new();
497            context.insert(
498                "constraint",
499                format!("minProperties {}, got {}", min_props, keys.len()),
500            );
501            add_error(ctx, create_error("E007", path, context), None);
502            valid = false;
503        }
504    }
505
506    if let Some(max_props) = schema.get("maxProperties").and_then(|v| v.as_u64()) {
507        if (keys.len() as u64) > max_props {
508            let mut context = HashMap::new();
509            context.insert(
510                "constraint",
511                format!("maxProperties {}, got {}", max_props, keys.len()),
512            );
513            add_error(ctx, create_error("E007", path, context), None);
514            valid = false;
515        }
516    }
517
518    // additionalProperties
519    if let Some(additional) = schema.get("additionalProperties") {
520        if additional != &Value::Bool(true) {
521            let defined: HashSet<&str> = schema
522                .get("properties")
523                .and_then(|v| v.as_object())
524                .map(|obj| obj.keys().map(|k| k.as_str()).collect())
525                .unwrap_or_default();
526
527            for key in &keys {
528                if !defined.contains(key.as_str()) {
529                    if additional == &Value::Bool(false) {
530                        let suggestion = suggest_remove_additional(key);
531                        let mut context = HashMap::new();
532                        context.insert("property", key.to_string());
533                        add_error(ctx, create_error("E003", path, context), Some(suggestion));
534                        valid = false;
535                    } else if additional.is_object() {
536                        // Validate against additionalProperties schema
537                        let prop_path = format!("{}/{}", path, key);
538                        walk_schema(additional, &data[key.as_str()], &prop_path, ctx);
539                    }
540                }
541            }
542        }
543    }
544
545    if valid {
546        ctx.fields_valid += 1;
547    }
548
549    // Validate individual properties
550    if let Some(properties) = schema.get("properties").and_then(|v| v.as_object()) {
551        for (prop_name, prop_schema) in properties {
552            if let Some(prop_data) = data.get(prop_name) {
553                let prop_path = format!("{}/{}", path, prop_name);
554                walk_schema(prop_schema, prop_data, &prop_path, ctx);
555            }
556        }
557    }
558}
559
560// ---------------------------------------------------------------------------
561// Composition
562// ---------------------------------------------------------------------------
563
564fn create_sub_context<'a>(parent_ctx: &WalkerContext<'a>) -> WalkerContext<'a> {
565    WalkerContext {
566        errors: Vec::new(),
567        root_schema: parent_ctx.root_schema,
568        fields_checked: 0,
569        fields_valid: 0,
570        fields_invalid: 0,
571        options: parent_ctx.options.clone(),
572    }
573}
574
575fn validate_all_of(schemas: &[Value], data: &Value, path: &str, ctx: &mut WalkerContext) {
576    for sub_schema in schemas {
577        let mut sub_ctx = create_sub_context(ctx);
578        walk_schema(sub_schema, data, path, &mut sub_ctx);
579        if !sub_ctx.errors.is_empty() {
580            let mut context = HashMap::new();
581            context.insert("keyword", "allOf".to_string());
582            add_error(ctx, create_error("E010", path, context), None);
583            return;
584        }
585    }
586}
587
588fn validate_any_of(schemas: &[Value], data: &Value, path: &str, ctx: &mut WalkerContext) {
589    for sub_schema in schemas {
590        let mut sub_ctx = create_sub_context(ctx);
591        walk_schema(sub_schema, data, path, &mut sub_ctx);
592        if sub_ctx.errors.is_empty() {
593            return; // at least one matched
594        }
595    }
596    let mut context = HashMap::new();
597    context.insert("keyword", "anyOf".to_string());
598    add_error(ctx, create_error("E010", path, context), None);
599}
600
601fn validate_one_of(schemas: &[Value], data: &Value, path: &str, ctx: &mut WalkerContext) {
602    let mut match_count = 0;
603    for sub_schema in schemas {
604        let mut sub_ctx = create_sub_context(ctx);
605        walk_schema(sub_schema, data, path, &mut sub_ctx);
606        if sub_ctx.errors.is_empty() {
607            match_count += 1;
608        }
609    }
610    if match_count != 1 {
611        let mut context = HashMap::new();
612        context.insert("keyword", "oneOf".to_string());
613        add_error(ctx, create_error("E010", path, context), None);
614    }
615}
616
617// ---------------------------------------------------------------------------
618// Public API
619// ---------------------------------------------------------------------------
620
621/// Validate data against a full protocol envelope (must have a `schema` key).
622pub fn validate(
623    data: &Value,
624    protocol: &Value,
625    options: Option<ValidationOptions>,
626) -> ValidationResult {
627    let schema = protocol
628        .get("schema")
629        .cloned()
630        .unwrap_or(Value::Object(serde_json::Map::new()));
631
632    let opts = options.unwrap_or(ValidationOptions {
633        mode: "full".to_string(),
634        paths: vec![],
635    });
636
637    let mut ctx = WalkerContext {
638        errors: Vec::new(),
639        root_schema: &schema,
640        fields_checked: 0,
641        fields_valid: 0,
642        fields_invalid: 0,
643        options: opts.clone(),
644    };
645
646    walk_schema(&schema, data, "", &mut ctx);
647
648    ValidationResult {
649        valid: ctx.errors.iter().filter(|e| e.severity == "error").count() == 0,
650        mode: opts.mode,
651        errors: ctx.errors,
652        stats: ValidationStats {
653            fields_checked: ctx.fields_checked,
654            fields_valid: ctx.fields_valid,
655            fields_invalid: ctx.fields_invalid,
656        },
657    }
658}
659
660/// Validate data against a bare schema (no protocol envelope).
661pub fn validate_schema(
662    data: &Value,
663    schema: &Value,
664    options: Option<ValidationOptions>,
665) -> ValidationResult {
666    let protocol = serde_json::json!({
667        "$protocol": "https://dataprotocol.dev/v1",
668        "name": "__inline__",
669        "version": "0.0.0",
670        "schema": schema
671    });
672    validate(data, &protocol, options)
673}