Skip to main content

json_schema_ast/
ast.rs

1use anyhow::{Result, anyhow};
2use percent_encoding::percent_decode_str;
3use serde_json::Value;
4use std::cell::{Ref, RefCell, RefMut};
5use std::collections::{HashMap, HashSet};
6use std::fmt;
7use std::rc::Rc;
8
9/// Shared, interior-mutable representation of a JSON Schema node.  Using
10/// reference counting allows multiple parents to point to the same node which
11/// is required to faithfully model schemas containing recursive `$ref`s.
12#[derive(Clone)]
13pub struct SchemaNode(Rc<RefCell<SchemaNodeKind>>);
14
15impl SchemaNode {
16    pub fn new(kind: SchemaNodeKind) -> Self {
17        Self(Rc::new(RefCell::new(kind)))
18    }
19
20    pub fn bool_schema(value: bool) -> Self {
21        Self::new(SchemaNodeKind::BoolSchema(value))
22    }
23
24    pub fn any() -> Self {
25        Self::new(SchemaNodeKind::Any)
26    }
27
28    pub fn borrow(&self) -> Ref<'_, SchemaNodeKind> {
29        self.0.borrow()
30    }
31
32    pub fn borrow_mut(&self) -> RefMut<'_, SchemaNodeKind> {
33        self.0.borrow_mut()
34    }
35
36    fn ptr_id(&self) -> usize {
37        Rc::as_ptr(&self.0) as usize
38    }
39
40    pub fn ptr_eq(&self, other: &SchemaNode) -> bool {
41        Rc::ptr_eq(&self.0, &other.0)
42    }
43
44    /// Convert the AST node back into a *minimal* JSON representation.  This
45    /// is **lossy** for complex scenarios but is sufficient for the validator
46    /// tests and fuzz harness (which only relies on the subset of keywords we
47    /// explicitly generate).
48    pub fn to_json(&self) -> Value {
49        use SchemaNodeKind::*;
50
51        match &*self.borrow() {
52            BoolSchema(b) => Value::Bool(*b),
53            Any => Value::Object(serde_json::Map::new()),
54
55            Enum(values) => {
56                let mut obj = serde_json::Map::new();
57                obj.insert("enum".into(), Value::Array(values.clone()));
58                Value::Object(obj)
59            }
60
61            String {
62                min_length,
63                max_length,
64                pattern,
65                enumeration,
66            } => {
67                let mut obj = serde_json::Map::new();
68                obj.insert("type".into(), Value::String("string".into()));
69                if let Some(m) = min_length {
70                    obj.insert("minLength".into(), Value::Number((*m).into()));
71                }
72                if let Some(m) = max_length {
73                    obj.insert("maxLength".into(), Value::Number((*m).into()));
74                }
75                if let Some(p) = pattern {
76                    obj.insert("pattern".into(), Value::String(p.clone()));
77                }
78                if let Some(e) = enumeration {
79                    obj.insert("enum".into(), Value::Array(e.clone()));
80                }
81                Value::Object(obj)
82            }
83
84            Number {
85                minimum,
86                maximum,
87                exclusive_minimum,
88                exclusive_maximum,
89                multiple_of,
90                enumeration,
91            } => {
92                let mut obj = serde_json::Map::new();
93                obj.insert("type".into(), Value::String("number".into()));
94                if let Some(m) = minimum {
95                    obj.insert(
96                        "minimum".into(),
97                        Value::Number(serde_json::Number::from_f64(*m).unwrap()),
98                    );
99                }
100                if let Some(m) = maximum {
101                    obj.insert(
102                        "maximum".into(),
103                        Value::Number(serde_json::Number::from_f64(*m).unwrap()),
104                    );
105                }
106                if *exclusive_minimum && let Some(m) = minimum {
107                    obj.insert(
108                        "exclusiveMinimum".into(),
109                        Value::Number(serde_json::Number::from_f64(*m).unwrap()),
110                    );
111                }
112                if *exclusive_maximum && let Some(m) = maximum {
113                    obj.insert(
114                        "exclusiveMaximum".into(),
115                        Value::Number(serde_json::Number::from_f64(*m).unwrap()),
116                    );
117                }
118                if let Some(mo) = multiple_of {
119                    obj.insert(
120                        "multipleOf".into(),
121                        Value::Number(serde_json::Number::from_f64(*mo).unwrap()),
122                    );
123                }
124                if let Some(e) = enumeration {
125                    obj.insert("enum".into(), Value::Array(e.clone()));
126                }
127                Value::Object(obj)
128            }
129
130            Integer {
131                minimum,
132                maximum,
133                exclusive_minimum,
134                exclusive_maximum,
135                multiple_of,
136                enumeration,
137            } => {
138                let mut obj = serde_json::Map::new();
139                obj.insert("type".into(), Value::String("integer".into()));
140                if let Some(m) = minimum {
141                    obj.insert("minimum".into(), Value::Number((*m).into()));
142                }
143                if let Some(m) = maximum {
144                    obj.insert("maximum".into(), Value::Number((*m).into()));
145                }
146                if *exclusive_minimum && let Some(m) = minimum {
147                    obj.insert("exclusiveMinimum".into(), Value::Number((*m).into()));
148                }
149                if *exclusive_maximum && let Some(m) = maximum {
150                    obj.insert("exclusiveMaximum".into(), Value::Number((*m).into()));
151                }
152                if let Some(e) = enumeration {
153                    obj.insert("enum".into(), Value::Array(e.clone()));
154                }
155                if let Some(mo) = multiple_of {
156                    obj.insert(
157                        "multipleOf".into(),
158                        Value::Number(serde_json::Number::from_f64(*mo).unwrap()),
159                    );
160                }
161                Value::Object(obj)
162            }
163
164            Boolean { enumeration } => {
165                let mut obj = serde_json::Map::new();
166                obj.insert("type".into(), Value::String("boolean".into()));
167                if let Some(e) = enumeration {
168                    obj.insert("enum".into(), Value::Array(e.clone()));
169                }
170                Value::Object(obj)
171            }
172
173            Null { enumeration } => {
174                let mut obj = serde_json::Map::new();
175                obj.insert("type".into(), Value::String("null".into()));
176                if let Some(e) = enumeration {
177                    obj.insert("enum".into(), Value::Array(e.clone()));
178                }
179                Value::Object(obj)
180            }
181
182            AllOf(subs) => {
183                let arr = subs.iter().map(|s| s.to_json()).collect();
184                let mut obj = serde_json::Map::new();
185                obj.insert("allOf".into(), Value::Array(arr));
186                Value::Object(obj)
187            }
188            AnyOf(subs) => {
189                let arr = subs.iter().map(|s| s.to_json()).collect();
190                let mut obj = serde_json::Map::new();
191                obj.insert("anyOf".into(), Value::Array(arr));
192                Value::Object(obj)
193            }
194            OneOf(subs) => {
195                let arr = subs.iter().map(|s| s.to_json()).collect();
196                let mut obj = serde_json::Map::new();
197                obj.insert("oneOf".into(), Value::Array(arr));
198                Value::Object(obj)
199            }
200            Not(sub) => {
201                let mut obj = serde_json::Map::new();
202                obj.insert("not".into(), sub.to_json());
203                Value::Object(obj)
204            }
205            IfThenElse {
206                if_schema,
207                then_schema,
208                else_schema,
209            } => {
210                let mut obj = serde_json::Map::new();
211                obj.insert("if".into(), if_schema.to_json());
212                if let Some(t) = then_schema {
213                    obj.insert("then".into(), t.to_json());
214                }
215                if let Some(e) = else_schema {
216                    obj.insert("else".into(), e.to_json());
217                }
218                Value::Object(obj)
219            }
220
221            Array {
222                items,
223                min_items,
224                max_items,
225                contains,
226                enumeration,
227            } => {
228                let mut obj = serde_json::Map::new();
229                obj.insert("type".into(), Value::String("array".into()));
230                if !matches!(&*items.borrow(), SchemaNodeKind::Any) {
231                    obj.insert("items".into(), items.to_json());
232                }
233                if let Some(mi) = min_items {
234                    obj.insert("minItems".into(), Value::Number((*mi).into()));
235                }
236                if let Some(ma) = max_items {
237                    obj.insert("maxItems".into(), Value::Number((*ma).into()));
238                }
239                if let Some(c) = contains {
240                    obj.insert("contains".into(), c.to_json());
241                }
242                if let Some(e) = enumeration {
243                    obj.insert("enum".into(), Value::Array(e.clone()));
244                }
245                Value::Object(obj)
246            }
247
248            Object {
249                properties,
250                required,
251                additional,
252                property_names,
253                min_properties,
254                max_properties,
255                dependent_required,
256                enumeration,
257            } => {
258                let mut obj = serde_json::Map::new();
259                obj.insert("type".into(), Value::String("object".into()));
260
261                if !properties.is_empty() {
262                    let mut props_map = serde_json::Map::new();
263                    for (k, v) in properties {
264                        props_map.insert(k.clone(), v.to_json());
265                    }
266                    obj.insert("properties".into(), Value::Object(props_map));
267                }
268
269                if !required.is_empty() {
270                    let mut sorted: Vec<_> = required.iter().cloned().collect();
271                    sorted.sort();
272                    obj.insert(
273                        "required".into(),
274                        Value::Array(sorted.into_iter().map(Value::String).collect()),
275                    );
276                }
277
278                match &*additional.borrow() {
279                    SchemaNodeKind::Any => {}
280                    SchemaNodeKind::BoolSchema(b) => {
281                        obj.insert("additionalProperties".into(), Value::Bool(*b));
282                    }
283                    _ => {
284                        obj.insert("additionalProperties".into(), additional.to_json());
285                    }
286                }
287
288                match &*property_names.borrow() {
289                    SchemaNodeKind::Any | SchemaNodeKind::BoolSchema(true) => {}
290                    SchemaNodeKind::BoolSchema(b) => {
291                        obj.insert("propertyNames".into(), Value::Bool(*b));
292                    }
293                    _ => {
294                        obj.insert("propertyNames".into(), property_names.to_json());
295                    }
296                }
297
298                if let Some(mp) = min_properties {
299                    obj.insert("minProperties".into(), Value::Number((*mp).into()));
300                }
301                if let Some(mp) = max_properties {
302                    obj.insert("maxProperties".into(), Value::Number((*mp).into()));
303                }
304
305                if !dependent_required.is_empty() {
306                    let mut dr_map = serde_json::Map::new();
307                    for (k, v) in dependent_required {
308                        dr_map.insert(
309                            k.clone(),
310                            Value::Array(v.iter().cloned().map(Value::String).collect()),
311                        );
312                    }
313                    obj.insert("dependentRequired".into(), Value::Object(dr_map));
314                }
315
316                if let Some(e) = enumeration {
317                    obj.insert("enum".into(), Value::Array(e.clone()));
318                }
319
320                Value::Object(obj)
321            }
322
323            Defs(map) => {
324                let mut defs_obj = serde_json::Map::new();
325                for (k, v) in map {
326                    defs_obj.insert(k.clone(), v.to_json());
327                }
328                let mut obj = serde_json::Map::new();
329                obj.insert("$defs".into(), Value::Object(defs_obj));
330                Value::Object(obj)
331            }
332
333            Const(v) => {
334                let mut obj = serde_json::Map::new();
335                obj.insert("const".into(), v.clone());
336                Value::Object(obj)
337            }
338            Type(t) => {
339                let mut obj = serde_json::Map::new();
340                obj.insert("type".into(), Value::String(t.clone()));
341                Value::Object(obj)
342            }
343            Minimum(m) => {
344                let mut obj = serde_json::Map::new();
345                obj.insert(
346                    "minimum".into(),
347                    Value::Number(serde_json::Number::from_f64(*m).unwrap()),
348                );
349                Value::Object(obj)
350            }
351            Maximum(m) => {
352                let mut obj = serde_json::Map::new();
353                obj.insert(
354                    "maximum".into(),
355                    Value::Number(serde_json::Number::from_f64(*m).unwrap()),
356                );
357                Value::Object(obj)
358            }
359            Required(reqs) => {
360                let mut sorted = reqs.clone();
361                sorted.sort();
362                let mut obj = serde_json::Map::new();
363                obj.insert(
364                    "required".into(),
365                    Value::Array(sorted.into_iter().map(Value::String).collect()),
366                );
367                Value::Object(obj)
368            }
369            AdditionalProperties(schema) => {
370                let mut obj = serde_json::Map::new();
371                obj.insert("additionalProperties".into(), schema.to_json());
372                Value::Object(obj)
373            }
374
375            Format(f) => {
376                let mut obj = serde_json::Map::new();
377                obj.insert("format".into(), Value::String(f.clone()));
378                Value::Object(obj)
379            }
380            ContentEncoding(c) => {
381                let mut obj = serde_json::Map::new();
382                obj.insert("contentEncoding".into(), Value::String(c.clone()));
383                Value::Object(obj)
384            }
385            ContentMediaType(c) => {
386                let mut obj = serde_json::Map::new();
387                obj.insert("contentMediaType".into(), Value::String(c.clone()));
388                Value::Object(obj)
389            }
390
391            Title(t) => {
392                let mut obj = serde_json::Map::new();
393                obj.insert("title".into(), Value::String(t.clone()));
394                Value::Object(obj)
395            }
396            Description(d) => {
397                let mut obj = serde_json::Map::new();
398                obj.insert("description".into(), Value::String(d.clone()));
399                Value::Object(obj)
400            }
401            Default(def) => {
402                let mut obj = serde_json::Map::new();
403                obj.insert("default".into(), def.clone());
404                Value::Object(obj)
405            }
406            Examples(ex) => {
407                let mut obj = serde_json::Map::new();
408                obj.insert("examples".into(), Value::Array(ex.clone()));
409                Value::Object(obj)
410            }
411            ReadOnly(b) => {
412                let mut obj = serde_json::Map::new();
413                obj.insert("readOnly".into(), Value::Bool(*b));
414                Value::Object(obj)
415            }
416            WriteOnly(b) => {
417                let mut obj = serde_json::Map::new();
418                obj.insert("writeOnly".into(), Value::Bool(*b));
419                Value::Object(obj)
420            }
421
422            Ref(r) => {
423                let mut obj = serde_json::Map::new();
424                obj.insert("$ref".into(), Value::String(r.clone()));
425                Value::Object(obj)
426            }
427        }
428    }
429}
430
431impl fmt::Debug for SchemaNode {
432    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
433        f.debug_struct("SchemaNode")
434            .field("id", &self.ptr_id())
435            .finish()
436    }
437}
438
439impl PartialEq for SchemaNode {
440    fn eq(&self, other: &Self) -> bool {
441        fn eq_inner(a: &SchemaNode, b: &SchemaNode, seen: &mut HashSet<(usize, usize)>) -> bool {
442            use SchemaNodeKind::*;
443
444            let key = (a.ptr_id(), b.ptr_id());
445            if !seen.insert(key) {
446                return true;
447            }
448
449            let a_kind = a.borrow();
450            let b_kind = b.borrow();
451
452            match (&*a_kind, &*b_kind) {
453                (BoolSchema(ax), BoolSchema(bx)) => ax == bx,
454                (Any, Any) => true,
455                (
456                    String {
457                        min_length: ax,
458                        max_length: ay,
459                        pattern: ap,
460                        enumeration: ae,
461                    },
462                    String {
463                        min_length: bx,
464                        max_length: by,
465                        pattern: bp,
466                        enumeration: be,
467                    },
468                ) => ax == bx && ay == by && ap == bp && ae == be,
469                (
470                    Number {
471                        minimum: amin,
472                        maximum: amax,
473                        exclusive_minimum: aexmin,
474                        exclusive_maximum: aexmax,
475                        multiple_of: amul,
476                        enumeration: aenum,
477                    },
478                    Number {
479                        minimum: bmin,
480                        maximum: bmax,
481                        exclusive_minimum: bexmin,
482                        exclusive_maximum: bexmax,
483                        multiple_of: bmul,
484                        enumeration: benum,
485                    },
486                ) => {
487                    amin == bmin
488                        && amax == bmax
489                        && aexmin == bexmin
490                        && aexmax == bexmax
491                        && amul == bmul
492                        && aenum == benum
493                }
494                (
495                    Integer {
496                        minimum: amin,
497                        maximum: amax,
498                        exclusive_minimum: aexmin,
499                        exclusive_maximum: aexmax,
500                        multiple_of: amul,
501                        enumeration: aenum,
502                    },
503                    Integer {
504                        minimum: bmin,
505                        maximum: bmax,
506                        exclusive_minimum: bexmin,
507                        exclusive_maximum: bexmax,
508                        multiple_of: bmul,
509                        enumeration: benum,
510                    },
511                ) => {
512                    amin == bmin
513                        && amax == bmax
514                        && aexmin == bexmin
515                        && aexmax == bexmax
516                        && amul == bmul
517                        && aenum == benum
518                }
519                (Boolean { enumeration: ae }, Boolean { enumeration: be }) => ae == be,
520                (Null { enumeration: ae }, Null { enumeration: be }) => ae == be,
521                (
522                    Object {
523                        properties: aprops,
524                        required: areq,
525                        additional: aaddl,
526                        property_names: apropnames,
527                        min_properties: amin,
528                        max_properties: amax,
529                        dependent_required: adep,
530                        enumeration: aenum,
531                    },
532                    Object {
533                        properties: bprops,
534                        required: breq,
535                        additional: baddl,
536                        property_names: bpropnames,
537                        min_properties: bmin,
538                        max_properties: bmax,
539                        dependent_required: bdep,
540                        enumeration: benum,
541                    },
542                ) => {
543                    if areq != breq
544                        || amin != bmin
545                        || amax != bmax
546                        || adep != bdep
547                        || aenum != benum
548                        || !eq_inner(apropnames, bpropnames, seen)
549                        || aprops.len() != bprops.len()
550                    {
551                        return false;
552                    }
553                    for (k, aval) in aprops {
554                        let Some(bval) = bprops.get(k) else {
555                            return false;
556                        };
557                        if !eq_inner(aval, bval, seen) {
558                            return false;
559                        }
560                    }
561                    eq_inner(aaddl, baddl, seen)
562                }
563                (
564                    Array {
565                        items: aitems,
566                        min_items: amin,
567                        max_items: amax,
568                        contains: acontains,
569                        enumeration: aenum,
570                    },
571                    Array {
572                        items: bitems,
573                        min_items: bmin,
574                        max_items: bmax,
575                        contains: bcontains,
576                        enumeration: benum,
577                    },
578                ) => {
579                    if amin != bmin || amax != bmax || aenum != benum {
580                        return false;
581                    }
582                    if !eq_inner(aitems, bitems, seen) {
583                        return false;
584                    }
585                    match (acontains, bcontains) {
586                        (None, None) => true,
587                        (Some(av), Some(bv)) => eq_inner(av, bv, seen),
588                        _ => false,
589                    }
590                }
591                (Defs(a), Defs(b)) => {
592                    if a.len() != b.len() {
593                        return false;
594                    }
595                    for (k, aval) in a {
596                        let Some(bval) = b.get(k) else {
597                            return false;
598                        };
599                        if !eq_inner(aval, bval, seen) {
600                            return false;
601                        }
602                    }
603                    true
604                }
605                (AllOf(a), AllOf(b)) | (AnyOf(a), AnyOf(b)) | (OneOf(a), OneOf(b)) => {
606                    if a.len() != b.len() {
607                        return false;
608                    }
609                    for (av, bv) in a.iter().zip(b.iter()) {
610                        if !eq_inner(av, bv, seen) {
611                            return false;
612                        }
613                    }
614                    true
615                }
616                (Not(a), Not(b)) => eq_inner(a, b, seen),
617                (
618                    IfThenElse {
619                        if_schema: a_if,
620                        then_schema: a_then,
621                        else_schema: a_else,
622                    },
623                    IfThenElse {
624                        if_schema: b_if,
625                        then_schema: b_then,
626                        else_schema: b_else,
627                    },
628                ) => {
629                    if !eq_inner(a_if, b_if, seen) {
630                        return false;
631                    }
632                    match (a_then, b_then) {
633                        (None, None) => {}
634                        (Some(av), Some(bv)) => {
635                            if !eq_inner(av, bv, seen) {
636                                return false;
637                            }
638                        }
639                        _ => return false,
640                    }
641                    match (a_else, b_else) {
642                        (None, None) => true,
643                        (Some(av), Some(bv)) => eq_inner(av, bv, seen),
644                        _ => false,
645                    }
646                }
647                (Const(a), Const(b)) => a == b,
648                (Enum(a), Enum(b)) => a == b,
649                (Type(a), Type(b)) => a == b,
650                (Minimum(a), Minimum(b)) => a == b,
651                (Maximum(a), Maximum(b)) => a == b,
652                (Required(a), Required(b)) => a == b,
653                (AdditionalProperties(a), AdditionalProperties(b)) => eq_inner(a, b, seen),
654                (Format(a), Format(b)) => a == b,
655                (ContentEncoding(a), ContentEncoding(b)) => a == b,
656                (ContentMediaType(a), ContentMediaType(b)) => a == b,
657                (Title(a), Title(b)) => a == b,
658                (Description(a), Description(b)) => a == b,
659                (Default(a), Default(b)) => a == b,
660                (Examples(a), Examples(b)) => a == b,
661                (ReadOnly(a), ReadOnly(b)) => a == b,
662                (WriteOnly(a), WriteOnly(b)) => a == b,
663                (Ref(a), Ref(b)) => a == b,
664                _ => false,
665            }
666        }
667
668        eq_inner(self, other, &mut HashSet::new())
669    }
670}
671
672impl Eq for SchemaNode {}
673
674/// An internal Abstract Syntax Tree (AST) representing a fully-resolved JSON
675/// Schema draft-2020-12 document.  The node types are deliberately *very*
676/// close to the JSON Schema specification so that higher-level crates (e.g.
677/// the back-compat checker or fuzz generator) can reason about schemas
678/// without constantly reparsing raw JSON values.
679#[derive(Debug, Clone)]
680pub enum SchemaNodeKind {
681    BoolSchema(bool),
682    Any,
683
684    String {
685        min_length: Option<u64>,
686        max_length: Option<u64>,
687        pattern: Option<String>,
688        enumeration: Option<Vec<Value>>,
689    },
690    Number {
691        minimum: Option<f64>,
692        maximum: Option<f64>,
693        exclusive_minimum: bool,
694        exclusive_maximum: bool,
695        multiple_of: Option<f64>,
696        enumeration: Option<Vec<Value>>,
697    },
698    Integer {
699        minimum: Option<i64>,
700        maximum: Option<i64>,
701        exclusive_minimum: bool,
702        exclusive_maximum: bool,
703        multiple_of: Option<f64>,
704        enumeration: Option<Vec<Value>>,
705    },
706    Boolean {
707        enumeration: Option<Vec<Value>>,
708    },
709    Null {
710        enumeration: Option<Vec<Value>>,
711    },
712
713    Object {
714        properties: HashMap<String, SchemaNode>,
715        required: HashSet<String>,
716        additional: SchemaNode,
717        property_names: SchemaNode,
718        min_properties: Option<usize>,
719        max_properties: Option<usize>,
720        dependent_required: HashMap<String, Vec<String>>,
721        enumeration: Option<Vec<Value>>,
722    },
723    Array {
724        items: SchemaNode,
725        min_items: Option<u64>,
726        max_items: Option<u64>,
727        contains: Option<SchemaNode>,
728        enumeration: Option<Vec<Value>>,
729    },
730
731    Defs(HashMap<String, SchemaNode>),
732
733    AllOf(Vec<SchemaNode>),
734    AnyOf(Vec<SchemaNode>),
735    OneOf(Vec<SchemaNode>),
736    Not(SchemaNode),
737    IfThenElse {
738        if_schema: SchemaNode,
739        then_schema: Option<SchemaNode>,
740        else_schema: Option<SchemaNode>,
741    },
742
743    Const(Value),
744    Enum(Vec<Value>),
745    Type(String),
746    Minimum(f64),
747    Maximum(f64),
748    Required(Vec<String>),
749    AdditionalProperties(SchemaNode),
750
751    Format(String),
752    ContentEncoding(String),
753    ContentMediaType(String),
754
755    Title(String),
756    Description(String),
757    Default(Value),
758    Examples(Vec<Value>),
759    ReadOnly(bool),
760    WriteOnly(bool),
761
762    Ref(String),
763}
764
765/// Build and fully resolve a schema node from raw JSON + a base URL.
766pub fn build_and_resolve_schema(raw: &Value) -> Result<SchemaNode> {
767    let mut root = build_schema_ast(raw)?;
768    resolve_refs(&mut root, raw, &[])?;
769    Ok(root)
770}
771
772/// Build the high-level AST without immediately resolving references.
773pub fn build_schema_ast(raw: &Value) -> Result<SchemaNode> {
774    if let Some(b) = raw.as_bool() {
775        return Ok(SchemaNode::bool_schema(b));
776    }
777    if !raw.is_object() {
778        return Ok(SchemaNode::any());
779    }
780
781    let obj = raw.as_object().unwrap();
782
783    if let Some(Value::String(r)) = obj.get("$ref") {
784        return Ok(SchemaNode::new(SchemaNodeKind::Ref(r.to_owned())));
785    }
786
787    if let Some(Value::Array(e)) = obj.get("enum") {
788        return Ok(SchemaNode::new(SchemaNodeKind::Enum(e.clone())));
789    }
790
791    if let Some(c) = obj.get("const") {
792        return Ok(SchemaNode::new(SchemaNodeKind::Const(c.clone())));
793    }
794
795    if obj.contains_key("if") || obj.contains_key("then") || obj.contains_key("else") {
796        if let Some(cond) = obj.get("if") {
797            let if_schema = build_schema_ast(cond)?;
798            let then_schema = match obj.get("then") {
799                Some(v) => Some(build_schema_ast(v)?),
800                None => None,
801            };
802            let else_schema = match obj.get("else") {
803                Some(v) => Some(build_schema_ast(v)?),
804                None => None,
805            };
806            let mut base = obj.clone();
807            base.remove("if");
808            base.remove("then");
809            base.remove("else");
810            const META_KEYS: [&str; 4] = ["$schema", "$id", "$comment", "$defs"];
811            for key in META_KEYS {
812                base.remove(key);
813            }
814            let cond_node = SchemaNode::new(SchemaNodeKind::IfThenElse {
815                if_schema,
816                then_schema,
817                else_schema,
818            });
819            if !base.is_empty() {
820                let subs = vec![build_schema_ast(&Value::Object(base))?, cond_node.clone()];
821                return Ok(SchemaNode::new(SchemaNodeKind::AllOf(subs)));
822            } else {
823                return Ok(cond_node);
824            }
825        } else {
826            let mut base = obj.clone();
827            base.remove("then");
828            base.remove("else");
829            return build_schema_ast(&Value::Object(base));
830        }
831    }
832
833    if let Some(Value::Array(subs)) = obj.get("allOf") {
834        let mut list = Vec::new();
835        if obj.len() > 1 {
836            let mut base = obj.clone();
837            base.remove("allOf");
838            const META_KEYS: [&str; 4] = ["$schema", "$id", "$comment", "$defs"];
839            for key in META_KEYS {
840                base.remove(key);
841            }
842            if !base.is_empty() {
843                list.push(build_schema_ast(&Value::Object(base))?);
844            }
845        }
846        for s in subs {
847            list.push(build_schema_ast(s)?);
848        }
849        return Ok(SchemaNode::new(SchemaNodeKind::AllOf(list)));
850    }
851    if let Some(Value::Array(subs)) = obj.get("anyOf") {
852        let parsed = subs
853            .iter()
854            .map(build_schema_ast)
855            .collect::<Result<Vec<_>>>()?;
856        return Ok(SchemaNode::new(SchemaNodeKind::AnyOf(parsed)));
857    }
858    if let Some(Value::Array(subs)) = obj.get("oneOf") {
859        let parsed = subs
860            .iter()
861            .map(build_schema_ast)
862            .collect::<Result<Vec<_>>>()?;
863        return Ok(SchemaNode::new(SchemaNodeKind::OneOf(parsed)));
864    }
865    if let Some(n) = obj.get("not") {
866        return Ok(SchemaNode::new(SchemaNodeKind::Not(build_schema_ast(n)?)));
867    }
868
869    match obj.get("type") {
870        Some(Value::String(t)) => match t.as_str() {
871            "string" => parse_string_schema(obj),
872            "number" => parse_number_schema(obj, false),
873            "integer" => parse_number_schema(obj, true),
874            "boolean" => parse_boolean_schema(obj),
875            "null" => parse_null_schema(obj),
876            "object" => parse_object_schema(obj),
877            "array" => parse_array_schema(obj),
878            _ => Ok(SchemaNode::any()),
879        },
880        Some(Value::Array(arr)) => {
881            let mut variants = Vec::new();
882            for t_val in arr {
883                if let Some(t_str) = t_val.as_str() {
884                    let mut cloned = obj.clone();
885                    cloned.insert("type".into(), Value::String(t_str.into()));
886                    variants.push(build_schema_ast(&Value::Object(cloned))?);
887                }
888            }
889            if variants.len() == 1 {
890                Ok(variants.remove(0))
891            } else {
892                Ok(SchemaNode::new(SchemaNodeKind::AnyOf(variants)))
893            }
894        }
895        _ => {
896            if obj.contains_key("properties")
897                || obj.contains_key("minProperties")
898                || obj.contains_key("maxProperties")
899                || obj.contains_key("required")
900            {
901                parse_object_schema(obj)
902            } else if obj.contains_key("items") {
903                parse_array_schema(obj)
904            } else if obj.contains_key("minLength")
905                || obj.contains_key("maxLength")
906                || obj.contains_key("pattern")
907            {
908                parse_string_schema(obj)
909            } else {
910                Ok(SchemaNode::any())
911            }
912        }
913    }
914}
915
916fn parse_string_schema(obj: &serde_json::Map<String, Value>) -> Result<SchemaNode> {
917    let min_length = obj.get("minLength").and_then(|v| v.as_u64());
918    let max_length = obj.get("maxLength").and_then(|v| v.as_u64());
919    let pattern = obj
920        .get("pattern")
921        .and_then(|v| v.as_str())
922        .map(|s| s.to_owned());
923    let enumeration = obj.get("enum").and_then(|v| v.as_array()).cloned();
924
925    Ok(SchemaNode::new(SchemaNodeKind::String {
926        min_length,
927        max_length,
928        pattern,
929        enumeration,
930    }))
931}
932
933fn parse_number_schema(obj: &serde_json::Map<String, Value>, integer: bool) -> Result<SchemaNode> {
934    let mut minimum = obj.get("minimum").and_then(|v| v.as_f64());
935    let mut maximum = obj.get("maximum").and_then(|v| v.as_f64());
936
937    let exclusive_minimum = if let Some(Value::Number(n)) = obj.get("exclusiveMinimum") {
938        minimum = n.as_f64();
939        true
940    } else {
941        false
942    };
943
944    let exclusive_maximum = if let Some(Value::Number(n)) = obj.get("exclusiveMaximum") {
945        maximum = n.as_f64();
946        true
947    } else {
948        false
949    };
950    let enumeration = obj.get("enum").and_then(|v| v.as_array()).cloned();
951
952    let multiple_of = obj
953        .get("multipleOf")
954        .and_then(|v| v.as_f64())
955        .filter(|m| *m > 0.0);
956
957    if integer {
958        let min_i = minimum.map(|m| m as i64);
959        let max_i = maximum.map(|m| m as i64);
960        Ok(SchemaNode::new(SchemaNodeKind::Integer {
961            minimum: min_i,
962            maximum: max_i,
963            exclusive_minimum,
964            exclusive_maximum,
965            multiple_of,
966            enumeration,
967        }))
968    } else {
969        Ok(SchemaNode::new(SchemaNodeKind::Number {
970            minimum,
971            maximum,
972            exclusive_minimum,
973            exclusive_maximum,
974            multiple_of,
975            enumeration,
976        }))
977    }
978}
979
980fn parse_boolean_schema(obj: &serde_json::Map<String, Value>) -> Result<SchemaNode> {
981    let enumeration = obj.get("enum").and_then(|v| v.as_array()).cloned();
982    Ok(SchemaNode::new(SchemaNodeKind::Boolean { enumeration }))
983}
984
985fn parse_null_schema(obj: &serde_json::Map<String, Value>) -> Result<SchemaNode> {
986    let enumeration = obj.get("enum").and_then(|v| v.as_array()).cloned();
987    Ok(SchemaNode::new(SchemaNodeKind::Null { enumeration }))
988}
989
990fn parse_object_schema(obj: &serde_json::Map<String, Value>) -> Result<SchemaNode> {
991    let mut properties = HashMap::new();
992    if let Some(Value::Object(props)) = obj.get("properties") {
993        for (k, v) in props {
994            properties.insert(k.clone(), build_schema_ast(v)?);
995        }
996    }
997    let required: HashSet<String> = obj
998        .get("required")
999        .and_then(|v| v.as_array())
1000        .map(|arr| {
1001            arr.iter()
1002                .filter_map(|v| v.as_str().map(|s| s.to_owned()))
1003                .collect()
1004        })
1005        .unwrap_or_default();
1006
1007    for name in &required {
1008        if !properties.contains_key(name) {
1009            properties.insert(name.clone(), SchemaNode::any());
1010        }
1011    }
1012
1013    let additional = match obj.get("additionalProperties") {
1014        None => SchemaNode::any(),
1015        Some(Value::Bool(b)) => SchemaNode::bool_schema(*b),
1016        Some(other) => build_schema_ast(other)?,
1017    };
1018
1019    let property_names = match obj.get("propertyNames") {
1020        None => SchemaNode::any(),
1021        Some(Value::Bool(b)) => SchemaNode::bool_schema(*b),
1022        Some(other) => build_schema_ast(other)?,
1023    };
1024
1025    let min_properties = obj
1026        .get("minProperties")
1027        .and_then(|v| v.as_u64())
1028        .map(|v| v as usize);
1029    let max_properties = obj
1030        .get("maxProperties")
1031        .and_then(|v| v.as_u64())
1032        .map(|v| v as usize);
1033    let dependent_required = obj
1034        .get("dependentRequired")
1035        .and_then(|v| v.as_object())
1036        .map(|map| {
1037            map.iter()
1038                .map(|(k, v)| {
1039                    let deps = v
1040                        .as_array()
1041                        .map(|arr| {
1042                            arr.iter()
1043                                .filter_map(|v| v.as_str().map(|s| s.to_owned()))
1044                                .collect::<Vec<_>>()
1045                        })
1046                        .unwrap_or_default();
1047                    (k.clone(), deps)
1048                })
1049                .collect::<HashMap<_, _>>()
1050        })
1051        .unwrap_or_default();
1052    let enumeration = obj.get("enum").and_then(|v| v.as_array()).cloned();
1053
1054    Ok(SchemaNode::new(SchemaNodeKind::Object {
1055        properties,
1056        required,
1057        additional,
1058        property_names,
1059        min_properties,
1060        max_properties,
1061        dependent_required,
1062        enumeration,
1063    }))
1064}
1065
1066fn parse_array_schema(obj: &serde_json::Map<String, Value>) -> Result<SchemaNode> {
1067    let items_node = match obj.get("items") {
1068        None => SchemaNode::any(),
1069        Some(Value::Array(arr)) => {
1070            if arr.is_empty() {
1071                SchemaNode::any()
1072            } else if arr.len() == 1 {
1073                build_schema_ast(&arr[0])?
1074            } else {
1075                let subnodes = arr
1076                    .iter()
1077                    .map(build_schema_ast)
1078                    .collect::<Result<Vec<SchemaNode>>>()?;
1079                SchemaNode::new(SchemaNodeKind::AllOf(subnodes))
1080            }
1081        }
1082        Some(other) => build_schema_ast(other)?,
1083    };
1084    let min_items = obj.get("minItems").and_then(|v| v.as_u64());
1085    let max_items = obj.get("maxItems").and_then(|v| v.as_u64());
1086    let enumeration = obj.get("enum").and_then(|v| v.as_array()).cloned();
1087
1088    let contains_node = match obj.get("contains") {
1089        None => None,
1090        Some(v) => Some(build_schema_ast(v)?),
1091    };
1092
1093    Ok(SchemaNode::new(SchemaNodeKind::Array {
1094        items: items_node,
1095        min_items,
1096        max_items,
1097        contains: contains_node,
1098        enumeration,
1099    }))
1100}
1101
1102/// Recursively resolves `SchemaNode::Ref` by looking up fragments in `root_json`.
1103pub fn resolve_refs(node: &mut SchemaNode, root_json: &Value, visited: &[String]) -> Result<()> {
1104    let mut stack = visited.to_vec();
1105    let mut cache: HashMap<String, SchemaNode> = HashMap::new();
1106    resolve_refs_internal(node, root_json, &mut stack, &mut cache)
1107}
1108
1109fn resolve_refs_internal(
1110    node: &mut SchemaNode,
1111    root_json: &Value,
1112    stack: &mut Vec<String>,
1113    cache: &mut HashMap<String, SchemaNode>,
1114) -> Result<()> {
1115    let ref_path = {
1116        let guard = node.borrow();
1117        if let SchemaNodeKind::Ref(p) = &*guard {
1118            Some(p.clone())
1119        } else {
1120            None
1121        }
1122    };
1123
1124    if let Some(path) = ref_path {
1125        if let Some(existing) = cache.get(&path) {
1126            *node = existing.clone();
1127            return Ok(());
1128        }
1129
1130        if let Some(stripped) = path.strip_prefix("#/") {
1131            let parts: Vec<String> = stripped
1132                .split('/')
1133                .map(|token| {
1134                    let mut decoded = percent_decode_str(token).decode_utf8_lossy().into_owned();
1135                    decoded = decoded.replace("~1", "/");
1136                    decoded.replace("~0", "~")
1137                })
1138                .collect();
1139            let mut current = root_json;
1140            for p in &parts {
1141                if let Some(next) = current.get(p.as_str()) {
1142                    current = next;
1143                } else {
1144                    return Err(anyhow!("Unresolved reference: {}", path));
1145                }
1146            }
1147            let mut resolved = build_schema_ast(current)?;
1148            cache.insert(path.clone(), resolved.clone());
1149            stack.push(path.clone());
1150            resolve_refs_internal(&mut resolved, root_json, stack, cache)?;
1151            stack.pop();
1152            cache.insert(path.clone(), resolved.clone());
1153            *node = resolved;
1154        } else {
1155            *node.borrow_mut() = SchemaNodeKind::BoolSchema(true);
1156        }
1157        return Ok(());
1158    }
1159
1160    if let SchemaNodeKind::AllOf(children) = &mut *node.borrow_mut() {
1161        for child in children.iter_mut() {
1162            resolve_refs_internal(child, root_json, stack, cache)?;
1163        }
1164        return Ok(());
1165    }
1166    if let SchemaNodeKind::AnyOf(children) = &mut *node.borrow_mut() {
1167        for child in children.iter_mut() {
1168            resolve_refs_internal(child, root_json, stack, cache)?;
1169        }
1170        return Ok(());
1171    }
1172    if let SchemaNodeKind::OneOf(children) = &mut *node.borrow_mut() {
1173        for child in children.iter_mut() {
1174            resolve_refs_internal(child, root_json, stack, cache)?;
1175        }
1176        return Ok(());
1177    }
1178    if let SchemaNodeKind::IfThenElse {
1179        if_schema,
1180        then_schema,
1181        else_schema,
1182    } = &mut *node.borrow_mut()
1183    {
1184        resolve_refs_internal(if_schema, root_json, stack, cache)?;
1185        if let Some(t) = then_schema {
1186            resolve_refs_internal(t, root_json, stack, cache)?;
1187        }
1188        if let Some(e) = else_schema {
1189            resolve_refs_internal(e, root_json, stack, cache)?;
1190        }
1191        return Ok(());
1192    }
1193    if let SchemaNodeKind::Not(sub) = &mut *node.borrow_mut() {
1194        resolve_refs_internal(sub, root_json, stack, cache)?;
1195        return Ok(());
1196    }
1197    if let SchemaNodeKind::Object {
1198        properties,
1199        additional,
1200        property_names,
1201        ..
1202    } = &mut *node.borrow_mut()
1203    {
1204        for child in properties.values_mut() {
1205            resolve_refs_internal(child, root_json, stack, cache)?;
1206        }
1207        resolve_refs_internal(additional, root_json, stack, cache)?;
1208        resolve_refs_internal(property_names, root_json, stack, cache)?;
1209        return Ok(());
1210    }
1211    if let SchemaNodeKind::Array {
1212        items, contains, ..
1213    } = &mut *node.borrow_mut()
1214    {
1215        resolve_refs_internal(items, root_json, stack, cache)?;
1216        if let Some(child) = contains {
1217            resolve_refs_internal(child, root_json, stack, cache)?;
1218        }
1219        return Ok(());
1220    }
1221    if let SchemaNodeKind::AdditionalProperties(schema) = &mut *node.borrow_mut() {
1222        resolve_refs_internal(schema, root_json, stack, cache)?;
1223        return Ok(());
1224    }
1225    if let SchemaNodeKind::Defs(map) = &mut *node.borrow_mut() {
1226        for child in map.values_mut() {
1227            resolve_refs_internal(child, root_json, stack, cache)?;
1228        }
1229        return Ok(());
1230    }
1231
1232    Ok(())
1233}
1234
1235/// Minimal check if an *instance* `val` is valid against `schema`.
1236///
1237/// This helper purposefully supports only the keyword subset that the fuzz
1238/// generator and back-compat checker rely on.  It is **not** a full JSON
1239/// Schema validator.
1240pub fn instance_is_valid_against(val: &Value, schema: &SchemaNode) -> bool {
1241    use SchemaNodeKind::*;
1242
1243    match &*schema.borrow() {
1244        BoolSchema(false) => false,
1245        BoolSchema(true) | Any => true,
1246
1247        Enum(e) => e.contains(val),
1248
1249        AllOf(subs) => subs.iter().all(|s| instance_is_valid_against(val, s)),
1250        AnyOf(subs) => subs.iter().any(|s| instance_is_valid_against(val, s)),
1251        OneOf(subs) => {
1252            let mut count = 0;
1253            for s in subs {
1254                if instance_is_valid_against(val, s) {
1255                    count += 1;
1256                }
1257            }
1258            count == 1
1259        }
1260        Not(sub) => !instance_is_valid_against(val, sub),
1261        IfThenElse {
1262            if_schema,
1263            then_schema,
1264            else_schema,
1265        } => {
1266            if instance_is_valid_against(val, if_schema) {
1267                if let Some(t) = then_schema {
1268                    instance_is_valid_against(val, t)
1269                } else {
1270                    true
1271                }
1272            } else if let Some(e) = else_schema {
1273                instance_is_valid_against(val, e)
1274            } else {
1275                true
1276            }
1277        }
1278
1279        String { enumeration, .. } => {
1280            if let Some(e) = enumeration
1281                && !e.contains(val)
1282            {
1283                return false;
1284            }
1285            val.is_string()
1286        }
1287        Number { enumeration, .. } => {
1288            if let Some(e) = enumeration
1289                && !e.contains(val)
1290            {
1291                return false;
1292            }
1293            val.is_number()
1294        }
1295        Integer { enumeration, .. } => {
1296            if let Some(e) = enumeration
1297                && !e.contains(val)
1298            {
1299                return false;
1300            }
1301            val.as_i64().is_some()
1302        }
1303        Boolean { enumeration } => {
1304            if let Some(e) = enumeration
1305                && !e.contains(val)
1306            {
1307                return false;
1308            }
1309            val.is_boolean()
1310        }
1311        Null { enumeration } => {
1312            if let Some(e) = enumeration
1313                && !e.contains(val)
1314            {
1315                return false;
1316            }
1317            val.is_null()
1318        }
1319        Object { .. } | Array { .. } => true,
1320
1321        _ => true,
1322    }
1323}