llguidance/json/
schema.rs

1use crate::{regex_to_lark, HashMap, JsonCompileOptions};
2use anyhow::{anyhow, bail, ensure, Result};
3use derivre::RegexAst;
4use indexmap::{IndexMap, IndexSet};
5use serde_json::Value;
6use std::mem;
7
8use super::context::{Context, Draft, PreContext, ResourceRef};
9use super::formats::lookup_format;
10use super::numeric::Decimal;
11use super::shared_context::BuiltSchema;
12
13const TYPES: [&str; 6] = ["null", "boolean", "number", "string", "array", "object"];
14
15// Keywords that are implemented in this module
16pub(crate) const IMPLEMENTED: [&str; 27] = [
17    // Core
18    "anyOf",
19    "oneOf",
20    "allOf",
21    "$ref",
22    "const",
23    "enum",
24    "type",
25    // Array
26    "items",
27    "additionalItems",
28    "prefixItems",
29    "minItems",
30    "maxItems",
31    // Object
32    "properties",
33    "additionalProperties",
34    "patternProperties",
35    "required",
36    "minProperties",
37    "maxProperties",
38    // String
39    "minLength",
40    "maxLength",
41    "pattern",
42    "format",
43    // Number
44    "minimum",
45    "maximum",
46    "exclusiveMinimum",
47    "exclusiveMaximum",
48    "multipleOf",
49];
50
51// Keywords that are used for metadata or annotations, not directly driving validation.
52// Note that some keywords like $id and $schema affect the behavior of other keywords, but
53// they can safely be ignored if other keywords aren't present
54pub(crate) const META_AND_ANNOTATIONS: [&str; 15] = [
55    "$anchor",
56    "$defs",
57    "definitions",
58    "$schema",
59    "$id",
60    "id",
61    "$comment",
62    "title",
63    "description",
64    "default",
65    "readOnly",
66    "writeOnly",
67    "examples",
68    "contentMediaType",
69    "contentEncoding",
70];
71
72fn limited_str(node: &Value) -> String {
73    let s = node.to_string();
74    if s.len() > 100 {
75        format!("{}...", &s[..100])
76    } else {
77        s
78    }
79}
80
81#[derive(Debug, Clone)]
82pub enum Schema {
83    Any,
84    Unsatisfiable(String),
85    Null,
86    Number(NumberSchema),
87    String(StringSchema),
88    Array(ArraySchema),
89    Object(ObjectSchema),
90    Boolean(Option<bool>),
91    AnyOf(Vec<Schema>),
92    OneOf(Vec<Schema>),
93    Ref(String),
94}
95
96#[derive(Debug, Clone, Default)]
97pub struct NumberSchema {
98    pub minimum: Option<f64>,
99    pub maximum: Option<f64>,
100    pub exclusive_minimum: Option<f64>,
101    pub exclusive_maximum: Option<f64>,
102    pub integer: bool,
103    pub multiple_of: Option<Decimal>,
104}
105
106impl NumberSchema {
107    pub fn get_minimum(&self) -> (Option<f64>, bool) {
108        match (self.minimum, self.exclusive_minimum) {
109            (Some(min), Some(xmin)) => {
110                if xmin >= min {
111                    (Some(xmin), true)
112                } else {
113                    (Some(min), false)
114                }
115            }
116            (Some(min), None) => (Some(min), false),
117            (None, Some(xmin)) => (Some(xmin), true),
118            (None, None) => (None, false),
119        }
120    }
121
122    pub fn get_maximum(&self) -> (Option<f64>, bool) {
123        match (self.maximum, self.exclusive_maximum) {
124            (Some(max), Some(xmax)) => {
125                if xmax <= max {
126                    (Some(xmax), true)
127                } else {
128                    (Some(max), false)
129                }
130            }
131            (Some(max), None) => (Some(max), false),
132            (None, Some(xmax)) => (Some(xmax), true),
133            (None, None) => (None, false),
134        }
135    }
136}
137
138#[derive(Debug, Clone)]
139pub struct StringSchema {
140    pub min_length: usize,
141    pub max_length: Option<usize>,
142    pub regex: Option<RegexAst>,
143}
144
145#[derive(Debug, Clone)]
146pub struct ArraySchema {
147    pub min_items: usize,
148    pub max_items: Option<usize>,
149    pub prefix_items: Vec<Schema>,
150    pub items: Option<Box<Schema>>,
151}
152
153#[derive(Debug, Clone)]
154pub struct ObjectSchema {
155    pub properties: IndexMap<String, Schema>,
156    pub pattern_properties: IndexMap<String, Schema>,
157    pub additional_properties: Option<Box<Schema>>,
158    pub required: IndexSet<String>,
159    pub min_properties: usize,
160    pub max_properties: Option<usize>,
161}
162
163pub trait OptSchemaExt {
164    fn schema(&self) -> Schema;
165    fn schema_ref(&self) -> &Schema;
166}
167
168impl OptSchemaExt for Option<Box<Schema>> {
169    fn schema(&self) -> Schema {
170        match self {
171            Some(schema) => schema.as_ref().clone(),
172            None => Schema::Any,
173        }
174    }
175
176    fn schema_ref(&self) -> &Schema {
177        match self {
178            Some(schema) => schema.as_ref(),
179            None => &Schema::Any,
180        }
181    }
182}
183
184impl Schema {
185    pub fn unsat(reason: &str) -> Schema {
186        Schema::Unsatisfiable(reason.to_string())
187    }
188
189    pub fn false_schema() -> Schema {
190        Self::unsat("schema is false")
191    }
192
193    pub fn any_box() -> Option<Box<Schema>> {
194        Some(Box::new(Schema::Any))
195    }
196
197    pub fn is_unsat(&self) -> bool {
198        matches!(self, Schema::Unsatisfiable(_))
199    }
200
201    /// Shallowly normalize the schema, removing any unnecessary nesting or empty options.
202    fn normalize(self, ctx: &Context) -> Schema {
203        match self {
204            Schema::AnyOf(options) => {
205                let mut unsats = Vec::new();
206                let mut valid = Vec::new();
207                for option in options.into_iter() {
208                    match option {
209                        Schema::Any => {
210                            return Schema::Any;
211                        }
212                        Schema::Unsatisfiable(reason) => unsats.push(Schema::Unsatisfiable(reason)),
213                        Schema::AnyOf(nested) => valid.extend(nested),
214                        other => valid.push(other),
215                    }
216                }
217                if valid.is_empty() {
218                    // Return the first unsatisfiable schema for debug-ability
219                    if let Some(unsat) = unsats.into_iter().next() {
220                        return unsat;
221                    }
222                    // We must not have had any schemas to begin with
223                    return Schema::unsat("anyOf is empty");
224                }
225                if valid.len() == 1 {
226                    // Unwrap singleton
227                    return valid.swap_remove(0);
228                }
229                Schema::AnyOf(valid)
230            }
231            Schema::OneOf(options) => {
232                let mut unsats = Vec::new();
233                let mut valid = Vec::new();
234                for option in options.into_iter() {
235                    match option {
236                        Schema::Unsatisfiable(reason) => unsats.push(Schema::Unsatisfiable(reason)),
237                        // Flatten nested oneOfs: (A⊕B)⊕(C⊕D) = A⊕B⊕C⊕D
238                        Schema::OneOf(nested) => valid.extend(nested),
239                        other => valid.push(other),
240                    }
241                }
242                if valid.is_empty() {
243                    // Return the first unsatisfiable schema for debug-ability
244                    if let Some(unsat) = unsats.into_iter().next() {
245                        return unsat;
246                    }
247                    // We must not have had any schemas to begin with
248                    return Schema::unsat("oneOf is empty");
249                }
250                if valid.len() == 1 {
251                    // Unwrap singleton
252                    return valid.swap_remove(0);
253                }
254                if valid.iter().enumerate().all(|(i, x)| {
255                    valid
256                        .iter()
257                        .skip(i + 1) // "upper diagonal"
258                        .all(|y| x.is_verifiably_disjoint_from(y, ctx))
259                }) {
260                    Schema::AnyOf(valid)
261                } else {
262                    Schema::OneOf(valid)
263                }
264            }
265            other_schema => other_schema,
266        }
267    }
268
269    /// Intersect two schemas, returning a new (normalized) schema that represents the intersection of the two.
270    fn intersect(self, other: Schema, ctx: &Context, stack_level: usize) -> Result<Schema> {
271        ctx.increment()?;
272        if stack_level > ctx.options.max_stack_level {
273            bail!("Schema intersection stack level exceeded");
274        }
275
276        let merged = match (self, other) {
277            (Schema::Any, schema1) => schema1,
278            (schema0, Schema::Any) => schema0,
279            (Schema::Unsatisfiable(reason), _) => Schema::Unsatisfiable(reason),
280            (_, Schema::Unsatisfiable(reason)) => Schema::Unsatisfiable(reason),
281            (Schema::Ref(uri), schema1) => {
282                intersect_ref(ctx, &uri, schema1, true, stack_level + 1)?
283            }
284            (schema0, Schema::Ref(uri)) => {
285                intersect_ref(ctx, &uri, schema0, false, stack_level + 1)?
286            }
287            (Schema::OneOf(options), schema1) => Schema::OneOf(
288                options
289                    .into_iter()
290                    .map(|opt| opt.intersect(schema1.clone(), ctx, stack_level + 1))
291                    .collect::<Result<Vec<_>>>()?,
292            ),
293
294            (schema0, Schema::OneOf(options)) => Schema::OneOf(
295                options
296                    .into_iter()
297                    .map(|opt| schema0.clone().intersect(opt, ctx, stack_level + 1))
298                    .collect::<Result<Vec<_>>>()?,
299            ),
300            (Schema::AnyOf(options), schema1) => Schema::AnyOf(
301                options
302                    .into_iter()
303                    .map(|opt| opt.intersect(schema1.clone(), ctx, stack_level + 1))
304                    .collect::<Result<Vec<_>>>()?,
305            ),
306            (schema0, Schema::AnyOf(options)) => Schema::AnyOf(
307                options
308                    .into_iter()
309                    .map(|opt| schema0.clone().intersect(opt, ctx, stack_level + 1))
310                    .collect::<Result<Vec<_>>>()?,
311            ),
312            (Schema::Null, Schema::Null) => Schema::Null,
313            (Schema::Boolean(value1), Schema::Boolean(value2)) => {
314                if value1 == value2 || value2.is_none() {
315                    Schema::Boolean(value1)
316                } else if value1.is_none() {
317                    Schema::Boolean(value2)
318                } else {
319                    Schema::unsat("incompatible boolean values")
320                }
321            }
322
323            (Schema::Number(n1), Schema::Number(n2)) => Schema::Number(NumberSchema {
324                minimum: opt_max(n1.minimum, n2.minimum),
325                maximum: opt_min(n1.maximum, n2.maximum),
326                exclusive_minimum: opt_max(n1.exclusive_minimum, n2.exclusive_minimum),
327                exclusive_maximum: opt_min(n1.exclusive_maximum, n2.exclusive_maximum),
328                integer: n1.integer || n2.integer,
329                multiple_of: match (n1.multiple_of, n2.multiple_of) {
330                    (None, None) => None,
331                    (None, Some(m)) | (Some(m), None) => Some(m),
332                    (Some(m1), Some(m2)) => Some(m1.lcm(&m2)),
333                },
334            }),
335
336            (Schema::String(s1), Schema::String(s2)) => Schema::String(StringSchema {
337                min_length: s1.min_length.max(s2.min_length),
338                max_length: opt_min(s1.max_length, s2.max_length),
339                regex: match (s1.regex, s2.regex) {
340                    (None, None) => None,
341                    (None, Some(r)) | (Some(r), None) => Some(r),
342                    (Some(r1), Some(r2)) => Some(RegexAst::And(vec![r1, r2])),
343                },
344            }),
345
346            (Schema::Array(mut a1), Schema::Array(mut a2)) => Schema::Array(ArraySchema {
347                min_items: a1.min_items.max(a2.min_items),
348                max_items: opt_min(a1.max_items, a2.max_items),
349                prefix_items: {
350                    let len = a1.prefix_items.len().max(a2.prefix_items.len());
351                    a1.prefix_items.resize_with(len, || a1.items.schema());
352                    a2.prefix_items.resize_with(len, || a2.items.schema());
353                    a1.prefix_items
354                        .into_iter()
355                        .zip(a2.prefix_items.into_iter())
356                        .map(|(item1, item2)| item1.intersect(item2, ctx, stack_level + 1))
357                        .collect::<Result<Vec<_>>>()?
358                },
359                items: match (a1.items, a2.items) {
360                    (None, None) => None,
361                    (None, Some(item)) | (Some(item), None) => Some(item),
362                    (Some(item1), Some(item2)) => {
363                        Some(Box::new(item1.intersect(*item2, ctx, stack_level + 1)?))
364                    }
365                },
366            }),
367
368            (Schema::Object(mut o1), Schema::Object(mut o2)) => {
369                let mut properties = IndexMap::new();
370                for (key, prop1) in std::mem::take(&mut o1.properties).into_iter() {
371                    let prop2 = ctx.property_schema(&o2, &key)?;
372                    properties.insert(key, prop1.intersect(prop2.clone(), ctx, stack_level + 1)?);
373                }
374                for (key, prop2) in o2.properties.into_iter() {
375                    if properties.contains_key(&key) {
376                        continue;
377                    }
378                    let prop1 = ctx.property_schema(&o1, &key)?;
379                    properties.insert(key, prop1.clone().intersect(prop2, ctx, stack_level + 1)?);
380                }
381                let mut required = o1.required;
382                required.extend(o2.required);
383
384                let mut pattern_properties = IndexMap::new();
385                for (key, prop1) in o1.pattern_properties.into_iter() {
386                    if let Some(prop2) = o2.pattern_properties.get_mut(&key) {
387                        let prop2 = std::mem::replace(prop2, Schema::Null);
388                        pattern_properties.insert(
389                            key.clone(),
390                            prop1.intersect(prop2.clone(), ctx, stack_level + 1)?,
391                        );
392                    } else {
393                        pattern_properties.insert(key.clone(), prop1);
394                    }
395                }
396                for (key, prop2) in o2.pattern_properties.into_iter() {
397                    if pattern_properties.contains_key(&key) {
398                        continue;
399                    }
400                    pattern_properties.insert(key.clone(), prop2);
401                }
402
403                let keys = pattern_properties.keys().collect::<Vec<_>>();
404                if !keys.is_empty() {
405                    ctx.check_disjoint_pattern_properties(&keys)?;
406                }
407
408                let additional_properties =
409                    match (o1.additional_properties, o2.additional_properties) {
410                        (None, None) => None,
411                        (None, Some(p)) | (Some(p), None) => Some(p),
412                        (Some(p1), Some(p2)) => {
413                            Some(Box::new((*p1).intersect(*p2, ctx, stack_level + 1)?))
414                        }
415                    };
416
417                let min_properties = o1.min_properties.max(o2.min_properties);
418                let max_properties = opt_min(o1.max_properties, o2.max_properties);
419
420                mk_object_schema(ObjectSchema {
421                    properties,
422                    pattern_properties,
423                    additional_properties,
424                    required,
425                    min_properties,
426                    max_properties,
427                })
428            }
429
430            //TODO: get types for error message
431            _ => Schema::unsat("incompatible types"),
432        };
433        Ok(merged.normalize(ctx))
434    }
435
436    fn is_verifiably_disjoint_from(&self, other: &Schema, ctx: &Context) -> bool {
437        match (self, other) {
438            (Schema::Unsatisfiable(_), _) => true,
439            (_, Schema::Unsatisfiable(_)) => true,
440            (Schema::Any, _) => false,
441            (_, Schema::Any) => false,
442            (Schema::Ref(_), _) => false, // TODO: could resolve
443            (_, Schema::Ref(_)) => false, // TODO: could resolve
444            (Schema::Boolean(value1), Schema::Boolean(value2)) => {
445                value1.is_some() && value2.is_some() && value1 != value2
446            }
447            (Schema::AnyOf(options), _) => options
448                .iter()
449                .all(|opt| opt.is_verifiably_disjoint_from(other, ctx)),
450            (_, Schema::AnyOf(options)) => options
451                .iter()
452                .all(|opt| self.is_verifiably_disjoint_from(opt, ctx)),
453            (Schema::OneOf(options), _) => options
454                .iter()
455                .all(|opt| opt.is_verifiably_disjoint_from(other, ctx)),
456            (_, Schema::OneOf(options)) => options
457                .iter()
458                .all(|opt| self.is_verifiably_disjoint_from(opt, ctx)),
459            // TODO: could actually compile the regexes and check for overlap
460            (
461                Schema::String(StringSchema {
462                    regex: Some(RegexAst::Literal(lit1)),
463                    ..
464                }),
465                Schema::String(StringSchema {
466                    regex: Some(RegexAst::Literal(lit2)),
467                    ..
468                }),
469            ) => lit1 != lit2,
470            (Schema::Object(o1), Schema::Object(o2)) => {
471                o1.required.union(&o2.required).any(|key| {
472                    let prop1 = ctx.property_schema(o1, key).unwrap_or(&Schema::Any);
473                    let prop2 = ctx.property_schema(o2, key).unwrap_or(&Schema::Any);
474                    prop1.is_verifiably_disjoint_from(prop2, ctx)
475                })
476            }
477            _ => {
478                // Except for in the cases above, it should suffice to check that the types are different
479                mem::discriminant(self) != mem::discriminant(other)
480            }
481        }
482    }
483
484    fn apply(self, applicator: (&str, &Value), ctx: &Context) -> Result<Schema> {
485        let mut result = self;
486        let (k, v) = applicator;
487        match k {
488            // TODO: Do const and enum really belong here? Maybe they should always take precedence as they are "literal" constraints?
489            "const" => {
490                let schema = compile_const(v)?;
491                result = result.intersect(schema, ctx, 0)?
492            }
493            "enum" => {
494                let instances = v
495                    .as_array()
496                    .ok_or_else(|| anyhow!("enum must be an array"))?;
497                let options = instances
498                    .iter()
499                    .map(compile_const)
500                    .collect::<Result<Vec<_>>>()?;
501                result = result.intersect(Schema::AnyOf(options), ctx, 0)?;
502            }
503            "allOf" => {
504                let all_of = v
505                    .as_array()
506                    .ok_or_else(|| anyhow!("allOf must be an array"))?;
507                for value in all_of {
508                    let schema = compile_resource(ctx, ctx.as_resource_ref(value))?;
509                    result = result.intersect(schema, ctx, 0)?;
510                }
511            }
512            "anyOf" => {
513                let any_of = v
514                    .as_array()
515                    .ok_or_else(|| anyhow!("anyOf must be an array"))?;
516                let options = any_of
517                    .iter()
518                    .map(|value| compile_resource(ctx, ctx.as_resource_ref(value)))
519                    .collect::<Result<Vec<_>>>()?;
520                result = result.intersect(Schema::AnyOf(options), ctx, 0)?;
521            }
522            "oneOf" => {
523                let one_of = v
524                    .as_array()
525                    .ok_or_else(|| anyhow!("oneOf must be an array"))?;
526                let options = one_of
527                    .iter()
528                    .map(|value| compile_resource(ctx, ctx.as_resource_ref(value)))
529                    .collect::<Result<Vec<_>>>()?;
530                result = result.intersect(Schema::OneOf(options), ctx, 0)?;
531            }
532            "$ref" => {
533                let reference = v
534                    .as_str()
535                    .ok_or_else(|| anyhow!("$ref must be a string, got {}", limited_str(v)))?
536                    .to_string();
537                let uri: String = ctx.normalize_ref(&reference)?;
538                if matches!(result, Schema::Any) {
539                    define_ref(ctx, &uri)?;
540                    result = Schema::Ref(uri);
541                } else {
542                    result = intersect_ref(ctx, &uri, result, false, 0)?;
543                }
544            }
545            _ => bail!("Unknown applicator: {}", applicator.0),
546        };
547        Ok(result)
548    }
549}
550
551#[derive(Clone)]
552pub struct SchemaBuilderOptions {
553    pub max_size: usize,
554    pub max_stack_level: usize,
555    pub lenient: bool,
556}
557
558impl Default for SchemaBuilderOptions {
559    fn default() -> Self {
560        SchemaBuilderOptions {
561            max_size: 50_000,
562            max_stack_level: 128, // consumes ~2.5k of stack per level
563            lenient: false,
564        }
565    }
566}
567
568pub fn build_schema(contents: Value, options: &JsonCompileOptions) -> Result<BuiltSchema> {
569    if let Some(b) = contents.as_bool() {
570        let s = if b {
571            Schema::Any
572        } else {
573            Schema::false_schema()
574        };
575        return Ok(BuiltSchema::simple(s));
576    }
577
578    let pre_ctx = PreContext::new(contents, options.retriever.clone())?;
579    let mut ctx = Context::new(&pre_ctx)?;
580
581    ctx.options.lenient = options.lenient;
582
583    let root_resource = ctx.lookup_resource(&pre_ctx.base_uri)?;
584    let schema = compile_resource(&ctx, root_resource)?;
585    Ok(ctx.into_result(schema))
586}
587
588fn compile_resource(ctx: &Context, resource: ResourceRef) -> Result<Schema> {
589    let ctx = ctx.in_subresource(resource)?;
590    compile_contents(&ctx, resource.contents())
591}
592
593fn compile_contents(ctx: &Context, contents: &Value) -> Result<Schema> {
594    compile_contents_inner(ctx, contents).map(|schema| schema.normalize(ctx))
595}
596
597fn compile_contents_inner(ctx: &Context, contents: &Value) -> Result<Schema> {
598    if let Some(b) = contents.as_bool() {
599        if b {
600            return Ok(Schema::Any);
601        } else {
602            return Ok(Schema::false_schema());
603        }
604    }
605
606    // Get the schema as an object
607    // TODO: validate against metaschema & check for unimplemented keys
608    let schemadict = contents
609        .as_object()
610        .ok_or_else(|| anyhow!("schema must be an object or boolean"))?;
611
612    // Make a mutable copy of the schema so we can modify it
613    let schemadict = schemadict
614        .iter()
615        .map(|(k, v)| (k.as_str(), v))
616        .collect::<IndexMap<_, _>>();
617
618    compile_contents_map(ctx, schemadict)
619}
620
621fn compile_contents_map(ctx: &Context, schemadict: IndexMap<&str, &Value>) -> Result<Schema> {
622    ctx.increment()?;
623
624    // We don't need to compile the schema if it's just meta and annotations
625    if schemadict
626        .keys()
627        .all(|k| META_AND_ANNOTATIONS.contains(k) || !ctx.draft.is_known_keyword(k))
628    {
629        return Ok(Schema::Any);
630    }
631
632    // Check for unimplemented keys and bail if any are found
633    let mut unimplemented_keys = schemadict
634        .keys()
635        .filter(|k| !ctx.is_valid_keyword(k))
636        .collect::<Vec<_>>();
637    if !unimplemented_keys.is_empty() {
638        // ensure consistent order for tests
639        unimplemented_keys.sort();
640        let msg = format!("Unimplemented keys: {:?}", unimplemented_keys);
641        if ctx.options.lenient {
642            ctx.record_warning(msg);
643        } else {
644            bail!(msg);
645        }
646    }
647
648    // Some dummy values to use for properties and prefixItems if we need to apply additionalProperties or items
649    // before we know what they are. Define them here so we can reference them in the loop below without borrow-checker
650    // issues.
651    let dummy_properties = match schemadict.get("properties") {
652        Some(properties) => {
653            let properties = properties
654                .as_object()
655                .ok_or_else(|| anyhow!("properties must be an object"))?;
656            Some(Value::from_iter(
657                properties
658                    .iter()
659                    .map(|(k, _)| (k.as_str(), Value::Bool(true))),
660            ))
661        }
662        None => None,
663    };
664    let dummy_prefix_items = match schemadict.get("prefixItems") {
665        Some(prefix_items) => {
666            let prefix_items = prefix_items
667                .as_array()
668                .ok_or_else(|| anyhow!("prefixItems must be an array"))?;
669            Some(Value::from_iter(
670                prefix_items.iter().map(|_| Value::Bool(true)),
671            ))
672        }
673        None => None,
674    };
675
676    let mut result = Schema::Any;
677    let mut current = HashMap::default();
678    let in_place_applicator_kwds = ["const", "enum", "allOf", "anyOf", "oneOf", "$ref"];
679    for (k, v) in schemadict.iter() {
680        if in_place_applicator_kwds.contains(k) {
681            if !current.is_empty() {
682                if let Some(&types) = schemadict.get("type") {
683                    // Make sure we always give type information to ensure we get the smallest union we can
684                    current.insert("type", types);
685                }
686                let current_schema = compile_contents_simple(ctx, std::mem::take(&mut current))?;
687                result = result.intersect(current_schema, ctx, 0)?;
688            }
689            // Finally apply the applicator
690            result = result.apply((k, v), ctx)?;
691        } else if !META_AND_ANNOTATIONS.contains(k) {
692            current.insert(k, v);
693            if *k == "additionalProperties" && !current.contains_key("properties") {
694                // additionalProperties needs to know about properties
695                // Insert a dummy version of properties into current
696                // (not the real deal, as we don't want to intersect it out of order)
697                if let Some(dummy_props) = &dummy_properties {
698                    current.insert("properties", dummy_props);
699                }
700            }
701            if *k == "items" && !current.contains_key("prefixItems") {
702                // items needs to know about prefixItems
703                // Insert a dummy version of prefixItems into current
704                // (not the real deal, as we don't want to intersect it out of order)
705                if let Some(dummy_itms) = &dummy_prefix_items {
706                    current.insert("prefixItems", dummy_itms);
707                }
708            }
709        }
710    }
711    if !current.is_empty() {
712        if let Some(&types) = schemadict.get("type") {
713            // Make sure we always give type information to ensure we get the smallest union we can
714            current.insert("type", types);
715        }
716        let current_schema = compile_contents_simple(ctx, std::mem::take(&mut current))?;
717        result = result.intersect(current_schema, ctx, 0)?;
718    }
719    Ok(result)
720}
721
722fn compile_contents_simple(ctx: &Context, schemadict: HashMap<&str, &Value>) -> Result<Schema> {
723    if schemadict.is_empty() {
724        Ok(Schema::Any)
725    } else {
726        let types = schemadict.get("type");
727        match types {
728            Some(Value::String(tp)) => compile_type(ctx, tp, &schemadict),
729            Some(Value::Array(types)) => {
730                let options = types
731                    .iter()
732                    .map(|type_value| {
733                        type_value
734                            .as_str()
735                            .ok_or_else(|| anyhow!("type must be a string"))
736                    })
737                    .collect::<Result<Vec<_>>>()?;
738                compile_types(ctx, options, &schemadict)
739            }
740            None => compile_types(ctx, TYPES.to_vec(), &schemadict),
741            Some(_) => {
742                bail!("type must be a string or array of strings");
743            }
744        }
745    }
746}
747
748fn define_ref(ctx: &Context, ref_uri: &str) -> Result<()> {
749    if !ctx.been_seen(ref_uri) {
750        ctx.mark_seen(ref_uri);
751        let resource = ctx.lookup_resource(ref_uri)?;
752        let resolved_schema = compile_resource(ctx, resource)?;
753        ctx.insert_ref(ref_uri, resolved_schema);
754    }
755    Ok(())
756}
757
758fn intersect_ref(
759    ctx: &Context,
760    ref_uri: &str,
761    schema: Schema,
762    ref_first: bool,
763    stack_level: usize,
764) -> Result<Schema> {
765    define_ref(ctx, ref_uri)?;
766    let resolved_schema = ctx
767        .get_ref_cloned(ref_uri)
768        // The ref might not have been defined if we're in a recursive loop and every ref in the loop
769        // has a sibling key.
770        // TODO: add an extra layer of indirection by defining a URI for the current location (e.g. by hashing the serialized sibling schema)
771        // and returning a ref to that URI here to break the loop.
772        .ok_or_else(|| {
773            anyhow!(
774                "circular references with sibling keys are not supported: {}",
775                ref_uri
776            )
777        })?;
778    if ref_first {
779        resolved_schema.intersect(schema, ctx, stack_level + 1)
780    } else {
781        schema.intersect(resolved_schema, ctx, stack_level + 1)
782    }
783}
784
785fn compile_const(instance: &Value) -> Result<Schema> {
786    match instance {
787        Value::Null => Ok(Schema::Null),
788        Value::Bool(b) => Ok(Schema::Boolean(Some(*b))),
789        Value::Number(n) => {
790            let value = n.as_f64().ok_or_else(|| {
791                anyhow!(
792                    "Expected f64 for numeric const, got {}",
793                    limited_str(instance)
794                )
795            })?;
796            Ok(Schema::Number(NumberSchema {
797                minimum: Some(value),
798                maximum: Some(value),
799                exclusive_minimum: None,
800                exclusive_maximum: None,
801                integer: n.is_i64(),
802                multiple_of: None,
803            }))
804        }
805        Value::String(s) => Ok(Schema::String(StringSchema {
806            min_length: 0,
807            max_length: None,
808            regex: Some(RegexAst::Literal(s.to_string())),
809        })),
810        Value::Array(items) => {
811            let prefix_items = items
812                .iter()
813                .map(compile_const)
814                .collect::<Result<Vec<Schema>>>()?;
815            Ok(Schema::Array(ArraySchema {
816                min_items: prefix_items.len(),
817                max_items: Some(prefix_items.len()),
818                prefix_items,
819                items: Some(Box::new(Schema::false_schema())),
820            }))
821        }
822        Value::Object(mapping) => {
823            let properties = mapping
824                .iter()
825                .map(|(k, v)| Ok((k.clone(), compile_const(v)?)))
826                .collect::<Result<IndexMap<String, Schema>>>()?;
827            let required = properties.keys().cloned().collect();
828            Ok(Schema::Object(ObjectSchema {
829                properties,
830                pattern_properties: IndexMap::default(),
831                additional_properties: Some(Box::new(Schema::false_schema())),
832                required,
833                min_properties: 0,
834                max_properties: None,
835            }))
836        }
837    }
838}
839
840fn compile_types(
841    ctx: &Context,
842    types: Vec<&str>,
843    schema: &HashMap<&str, &Value>,
844) -> Result<Schema> {
845    let mut options = Vec::new();
846    for tp in types {
847        let option = compile_type(ctx, tp, schema)?;
848        options.push(option);
849    }
850    if options.len() == 1 {
851        Ok(options.swap_remove(0))
852    } else {
853        Ok(Schema::AnyOf(options))
854    }
855}
856
857fn compile_type(ctx: &Context, tp: &str, schema: &HashMap<&str, &Value>) -> Result<Schema> {
858    ctx.increment()?;
859
860    match tp {
861        "null" => Ok(Schema::Null),
862        "boolean" => Ok(Schema::Boolean(None)),
863        "number" | "integer" => compile_numeric(schema, tp == "integer"),
864        "string" => compile_string(ctx, schema),
865        "array" => compile_array(ctx, schema),
866        "object" => compile_object(ctx, schema),
867        _ => bail!("Invalid type: {}", tp),
868    }
869}
870
871fn compile_numeric(schema: &HashMap<&str, &Value>, integer: bool) -> Result<Schema> {
872    let minimum = schema.get("minimum").copied();
873    let maximum = schema.get("maximum").copied();
874    let exclusive_minimum = schema.get("exclusiveMinimum").copied();
875    let exclusive_maximum = schema.get("exclusiveMaximum").copied();
876    let multiple_of = schema.get("multipleOf").copied();
877
878    let minimum = match minimum {
879        None => None,
880        Some(val) => Some(
881            val.as_f64()
882                .ok_or_else(|| anyhow!("Expected f64 for 'minimum', got {}", limited_str(val)))?,
883        ),
884    };
885    let maximum = match maximum {
886        None => None,
887        Some(val) => Some(
888            val.as_f64()
889                .ok_or_else(|| anyhow!("Expected f64 for 'maximum', got {}", limited_str(val)))?,
890        ),
891    };
892    // TODO: actually use ctx.draft to determine which style of exclusiveMinimum/Maximum to use
893    let exclusive_minimum = match exclusive_minimum {
894        // Draft4-style boolean values
895        None | Some(Value::Bool(false)) => None,
896        Some(Value::Bool(true)) => minimum,
897        // Draft2020-12-style numeric values
898        Some(value) => Some(value.as_f64().ok_or_else(|| {
899            anyhow!(
900                "Expected f64 for 'exclusiveMinimum', got {}",
901                limited_str(value)
902            )
903        })?),
904    };
905    let exclusive_maximum = match exclusive_maximum {
906        // Draft4-style boolean values
907        None | Some(Value::Bool(false)) => None,
908        Some(Value::Bool(true)) => maximum,
909        // Draft2020-12-style numeric values
910        Some(value) => Some(value.as_f64().ok_or_else(|| {
911            anyhow!(
912                "Expected f64 for 'exclusiveMaximum', got {}",
913                limited_str(value)
914            )
915        })?),
916    };
917    let multiple_of = match multiple_of {
918        None => None,
919        Some(val) => {
920            let f = val.as_f64().ok_or_else(|| {
921                anyhow!("Expected f64 for 'multipleOf', got {}", limited_str(val))
922            })?;
923            // Can discard the sign of f
924            Some(Decimal::try_from(f.abs())?)
925        }
926    };
927    Ok(Schema::Number(NumberSchema {
928        minimum,
929        maximum,
930        exclusive_minimum,
931        exclusive_maximum,
932        integer,
933        multiple_of,
934    }))
935}
936
937fn compile_string(ctx: &Context, schema: &HashMap<&str, &Value>) -> Result<Schema> {
938    let pattern = schema.get("pattern").copied();
939    let format = schema.get("format").copied();
940
941    let min_length = get_usize(schema, "minLength")?.unwrap_or(0);
942    let max_length = get_usize(schema, "maxLength")?;
943
944    let pattern_rx = match pattern {
945        None => None,
946        Some(val) => Some({
947            let s = val
948                .as_str()
949                .ok_or_else(|| anyhow!("Expected string for 'pattern', got {}", limited_str(val)))?
950                .to_string();
951            RegexAst::SearchRegex(regex_to_lark(&s, "dw"))
952        }),
953    };
954    let format_rx = match format {
955        None => None,
956        Some(val) => {
957            let key = val
958                .as_str()
959                .ok_or_else(|| anyhow!("Expected string for 'format', got {}", limited_str(val)))?
960                .to_string();
961
962            if let Some(fmt) = lookup_format(&key) {
963                Some(RegexAst::Regex(fmt.to_string()))
964            } else {
965                let msg = format!("Unknown format: {}", key);
966                if ctx.options.lenient {
967                    ctx.record_warning(msg);
968                    None
969                } else {
970                    bail!(msg);
971                }
972            }
973        }
974    };
975    let regex = match (pattern_rx, format_rx) {
976        (None, None) => None,
977        (None, Some(fmt)) => Some(fmt),
978        (Some(pat), None) => Some(pat),
979        (Some(pat), Some(fmt)) => Some(RegexAst::And(vec![pat, fmt])),
980    };
981    Ok(Schema::String(StringSchema {
982        min_length,
983        max_length,
984        regex,
985    }))
986}
987
988fn compile_array(ctx: &Context, schema: &HashMap<&str, &Value>) -> Result<Schema> {
989    let min_items = get_usize(schema, "minItems")?.unwrap_or(0);
990    let max_items = get_usize(schema, "maxItems")?;
991    let prefix_items = schema.get("prefixItems").copied();
992    let items = schema.get("items").copied();
993    let additional_items = schema.get("additionalItems").copied();
994
995    let (prefix_items, items) = {
996        // Note that draft detection falls back to Draft202012 if the draft is unknown, so let's relax the draft constraint a bit
997        // and assume we're in an old draft if additionalItems is present or items is an array
998        if ctx.draft <= Draft::Draft201909
999            || additional_items.is_some()
1000            || matches!(items, Some(Value::Array(..)))
1001        {
1002            match (items, additional_items) {
1003                // Treat array items as prefixItems and additionalItems as items in draft 2019-09 and earlier
1004                (Some(Value::Array(..)), _) => (items, additional_items),
1005                // items is treated as items, and additionalItems is ignored if items is not an array (or is missing)
1006                _ => (None, items),
1007            }
1008        } else {
1009            (prefix_items, items)
1010        }
1011    };
1012    let prefix_items = match prefix_items {
1013        None => vec![],
1014        Some(val) => val
1015            .as_array()
1016            .ok_or_else(|| anyhow!("Expected array for 'prefixItems', got {}", limited_str(val)))?
1017            .iter()
1018            .map(|item| compile_resource(ctx, ctx.as_resource_ref(item)))
1019            .collect::<Result<Vec<Schema>>>()?,
1020    };
1021    let items = match items {
1022        None => None,
1023        Some(val) => Some(Box::new(compile_resource(ctx, ctx.as_resource_ref(val))?)),
1024    };
1025    Ok(Schema::Array(ArraySchema {
1026        min_items,
1027        max_items,
1028        prefix_items,
1029        items,
1030    }))
1031}
1032
1033fn compile_prop_map(
1034    ctx: &Context,
1035    lbl: &str,
1036    prop_map: Option<&Value>,
1037) -> Result<IndexMap<String, Schema>> {
1038    match prop_map {
1039        None => Ok(IndexMap::new()),
1040        Some(val) => val
1041            .as_object()
1042            .ok_or_else(|| anyhow!("Expected object for '{lbl}', got {}", limited_str(val)))?
1043            .iter()
1044            .map(|(k, v)| compile_resource(ctx, ctx.as_resource_ref(v)).map(|v| (k.clone(), v)))
1045            .collect(),
1046    }
1047}
1048
1049fn get_usize(schema: &HashMap<&str, &Value>, name: &str) -> Result<Option<usize>> {
1050    if let Some(val) = schema.get(name) {
1051        if let Some(val) = val.as_u64() {
1052            ensure!(
1053                val <= usize::MAX as u64,
1054                "Value {val} for '{name}' is too large"
1055            );
1056            Ok(Some(val as usize))
1057        } else {
1058            bail!(
1059                "Expected positive integer for '{name}', got {}",
1060                limited_str(val)
1061            )
1062        }
1063    } else {
1064        Ok(None)
1065    }
1066}
1067
1068fn compile_object(ctx: &Context, schema: &HashMap<&str, &Value>) -> Result<Schema> {
1069    let properties = schema.get("properties").copied();
1070    let pattern_properties = schema.get("patternProperties").copied();
1071    let additional_properties = schema.get("additionalProperties").copied();
1072    let required = schema.get("required").copied();
1073    let min_properties = get_usize(schema, "minProperties")?.unwrap_or(0);
1074    let max_properties = get_usize(schema, "maxProperties")?;
1075
1076    let properties = compile_prop_map(ctx, "properties", properties)?;
1077    let pattern_properties = compile_prop_map(ctx, "patternProperties", pattern_properties)?;
1078    ctx.check_disjoint_pattern_properties(&pattern_properties.keys().collect::<Vec<_>>())?;
1079    let additional_properties = match additional_properties {
1080        None => None,
1081        Some(val) => Some(Box::new(compile_resource(ctx, ctx.as_resource_ref(val))?)),
1082    };
1083    let required = match required {
1084        None => IndexSet::new(),
1085        Some(val) => val
1086            .as_array()
1087            .ok_or_else(|| anyhow!("Expected array for 'required', got {}", limited_str(val)))?
1088            .iter()
1089            .map(|item| {
1090                item.as_str()
1091                    .ok_or_else(|| {
1092                        anyhow!(
1093                            "Expected string for 'required' item, got {}",
1094                            limited_str(item)
1095                        )
1096                    })
1097                    .map(|s| s.to_string())
1098            })
1099            .collect::<Result<IndexSet<String>>>()?,
1100    };
1101
1102    Ok(mk_object_schema(ObjectSchema {
1103        properties,
1104        pattern_properties,
1105        additional_properties,
1106        required,
1107        min_properties,
1108        max_properties,
1109    }))
1110}
1111
1112fn mk_object_schema(obj: ObjectSchema) -> Schema {
1113    if let Some(max) = obj.max_properties {
1114        if obj.min_properties > max {
1115            return Schema::unsat("minProperties > maxProperties");
1116        }
1117    }
1118    if obj.required.len() > obj.max_properties.unwrap_or(usize::MAX) {
1119        return Schema::unsat("required > maxProperties");
1120    }
1121
1122    Schema::Object(obj)
1123}
1124
1125fn opt_max<T: PartialOrd>(a: Option<T>, b: Option<T>) -> Option<T> {
1126    match (a, b) {
1127        (Some(a), Some(b)) => {
1128            if a >= b {
1129                Some(a)
1130            } else {
1131                Some(b)
1132            }
1133        }
1134        (Some(a), None) => Some(a),
1135        (None, Some(b)) => Some(b),
1136        (None, None) => None,
1137    }
1138}
1139
1140fn opt_min<T: PartialOrd>(a: Option<T>, b: Option<T>) -> Option<T> {
1141    match (a, b) {
1142        (Some(a), Some(b)) => {
1143            if a <= b {
1144                Some(a)
1145            } else {
1146                Some(b)
1147            }
1148        }
1149        (Some(a), None) => Some(a),
1150        (None, Some(b)) => Some(b),
1151        (None, None) => None,
1152    }
1153}
1154
1155#[cfg(all(test, feature = "referencing"))]
1156mod test_retriever {
1157    use crate::json::{Retrieve, RetrieveWrapper};
1158    use crate::JsonCompileOptions;
1159
1160    use super::{build_schema, Schema, StringSchema};
1161    use serde_json::{json, Value};
1162    use std::{fmt, sync::Arc};
1163
1164    #[derive(Debug, Clone)]
1165    struct TestRetrieverError(String);
1166    impl fmt::Display for TestRetrieverError {
1167        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1168            write!(f, "Could not retrieve URI: {}", self.0)
1169        }
1170    }
1171    impl std::error::Error for TestRetrieverError {}
1172
1173    struct TestRetriever {
1174        schemas: std::collections::HashMap<String, serde_json::Value>,
1175    }
1176    impl Retrieve for TestRetriever {
1177        fn retrieve(&self, uri: &str) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
1178            let key = uri;
1179            match self.schemas.get(key) {
1180                Some(schema) => Ok(schema.clone()),
1181                None => Err(Box::new(TestRetrieverError(key.to_string()))),
1182            }
1183        }
1184    }
1185
1186    #[test]
1187    fn test_retriever() {
1188        let key: &str = "http://example.com/schema";
1189
1190        let schema = json!({
1191            "$ref": key
1192        });
1193        let retriever = TestRetriever {
1194            schemas: vec![(
1195                key.to_string(),
1196                json!({
1197                    "type": "string"
1198                }),
1199            )]
1200            .into_iter()
1201            .collect(),
1202        };
1203        let wrapper = RetrieveWrapper::new(Arc::new(retriever));
1204        let options = JsonCompileOptions {
1205            retriever: Some(wrapper.clone()),
1206            ..Default::default()
1207        };
1208        let r = build_schema(schema, &options).unwrap();
1209        let schema = r.schema;
1210        let defs = r.definitions;
1211        match schema {
1212            Schema::Ref(uri) => {
1213                assert_eq!(uri, key);
1214            }
1215            _ => panic!("Unexpected schema: {:?}", schema),
1216        }
1217        assert_eq!(defs.len(), 1);
1218        let val = defs.get(key).unwrap();
1219        // poor-man's partial_eq
1220        match val {
1221            Schema::String(StringSchema {
1222                min_length: 0,
1223                max_length: None,
1224                regex: None,
1225            }) => {}
1226            _ => panic!("Unexpected schema: {:?}", val),
1227        }
1228    }
1229}
1230
1231#[cfg(test)]
1232mod tests {
1233    use super::*;
1234    use serde_json::json;
1235
1236    #[test]
1237    fn test_problem_child() {
1238        let schema = json!({
1239            "allOf" : [
1240                {"$ref": "#/$defs/tree1"},
1241                {"$ref": "#/$defs/tree2"}
1242            ],
1243            "$defs" : {
1244                "tree1": {
1245                    "type": "object",
1246                    "properties": {
1247                        "child": {
1248                            "$ref": "#/$defs/tree1"
1249                        }
1250                    }
1251                },
1252                "tree2": {
1253                    "type": "object",
1254                    "properties": {
1255                        "child": {
1256                            "$ref": "#/$defs/tree2"
1257                        }
1258                    }
1259                }
1260            }
1261        });
1262        // Test failure amounts to this resulting in a stack overflow
1263        let options = JsonCompileOptions::default();
1264        let _ = build_schema(schema, &options);
1265    }
1266}