1use crate::{regex_to_lark, HashMap, JsonCompileOptions};
2use anyhow::{anyhow, bail, ensure, Result};
3use derivre::RegexAst;
4use indexmap::{IndexMap, IndexSet};
5use serde_json::Value;
6use std::mem;
7
8use super::context::{Context, Draft, PreContext, ResourceRef};
9use super::formats::lookup_format;
10use super::numeric::Decimal;
11use super::shared_context::BuiltSchema;
12
13const TYPES: [&str; 6] = ["null", "boolean", "number", "string", "array", "object"];
14
15pub(crate) const IMPLEMENTED: [&str; 27] = [
17 "anyOf",
19 "oneOf",
20 "allOf",
21 "$ref",
22 "const",
23 "enum",
24 "type",
25 "items",
27 "additionalItems",
28 "prefixItems",
29 "minItems",
30 "maxItems",
31 "properties",
33 "additionalProperties",
34 "patternProperties",
35 "required",
36 "minProperties",
37 "maxProperties",
38 "minLength",
40 "maxLength",
41 "pattern",
42 "format",
43 "minimum",
45 "maximum",
46 "exclusiveMinimum",
47 "exclusiveMaximum",
48 "multipleOf",
49];
50
51pub(crate) const META_AND_ANNOTATIONS: [&str; 15] = [
55 "$anchor",
56 "$defs",
57 "definitions",
58 "$schema",
59 "$id",
60 "id",
61 "$comment",
62 "title",
63 "description",
64 "default",
65 "readOnly",
66 "writeOnly",
67 "examples",
68 "contentMediaType",
69 "contentEncoding",
70];
71
72fn limited_str(node: &Value) -> String {
73 let s = node.to_string();
74 if s.len() > 100 {
75 format!("{}...", &s[..100])
76 } else {
77 s
78 }
79}
80
81#[derive(Debug, Clone)]
82pub enum Schema {
83 Any,
84 Unsatisfiable(String),
85 Null,
86 Number(NumberSchema),
87 String(StringSchema),
88 Array(ArraySchema),
89 Object(ObjectSchema),
90 Boolean(Option<bool>),
91 AnyOf(Vec<Schema>),
92 OneOf(Vec<Schema>),
93 Ref(String),
94}
95
96#[derive(Debug, Clone, Default)]
97pub struct NumberSchema {
98 pub minimum: Option<f64>,
99 pub maximum: Option<f64>,
100 pub exclusive_minimum: Option<f64>,
101 pub exclusive_maximum: Option<f64>,
102 pub integer: bool,
103 pub multiple_of: Option<Decimal>,
104}
105
106impl NumberSchema {
107 pub fn get_minimum(&self) -> (Option<f64>, bool) {
108 match (self.minimum, self.exclusive_minimum) {
109 (Some(min), Some(xmin)) => {
110 if xmin >= min {
111 (Some(xmin), true)
112 } else {
113 (Some(min), false)
114 }
115 }
116 (Some(min), None) => (Some(min), false),
117 (None, Some(xmin)) => (Some(xmin), true),
118 (None, None) => (None, false),
119 }
120 }
121
122 pub fn get_maximum(&self) -> (Option<f64>, bool) {
123 match (self.maximum, self.exclusive_maximum) {
124 (Some(max), Some(xmax)) => {
125 if xmax <= max {
126 (Some(xmax), true)
127 } else {
128 (Some(max), false)
129 }
130 }
131 (Some(max), None) => (Some(max), false),
132 (None, Some(xmax)) => (Some(xmax), true),
133 (None, None) => (None, false),
134 }
135 }
136}
137
138#[derive(Debug, Clone)]
139pub struct StringSchema {
140 pub min_length: usize,
141 pub max_length: Option<usize>,
142 pub regex: Option<RegexAst>,
143}
144
145#[derive(Debug, Clone)]
146pub struct ArraySchema {
147 pub min_items: usize,
148 pub max_items: Option<usize>,
149 pub prefix_items: Vec<Schema>,
150 pub items: Option<Box<Schema>>,
151}
152
153#[derive(Debug, Clone)]
154pub struct ObjectSchema {
155 pub properties: IndexMap<String, Schema>,
156 pub pattern_properties: IndexMap<String, Schema>,
157 pub additional_properties: Option<Box<Schema>>,
158 pub required: IndexSet<String>,
159 pub min_properties: usize,
160 pub max_properties: Option<usize>,
161}
162
163pub trait OptSchemaExt {
164 fn schema(&self) -> Schema;
165 fn schema_ref(&self) -> &Schema;
166}
167
168impl OptSchemaExt for Option<Box<Schema>> {
169 fn schema(&self) -> Schema {
170 match self {
171 Some(schema) => schema.as_ref().clone(),
172 None => Schema::Any,
173 }
174 }
175
176 fn schema_ref(&self) -> &Schema {
177 match self {
178 Some(schema) => schema.as_ref(),
179 None => &Schema::Any,
180 }
181 }
182}
183
184impl Schema {
185 pub fn unsat(reason: &str) -> Schema {
186 Schema::Unsatisfiable(reason.to_string())
187 }
188
189 pub fn false_schema() -> Schema {
190 Self::unsat("schema is false")
191 }
192
193 pub fn any_box() -> Option<Box<Schema>> {
194 Some(Box::new(Schema::Any))
195 }
196
197 pub fn is_unsat(&self) -> bool {
198 matches!(self, Schema::Unsatisfiable(_))
199 }
200
201 fn normalize(self, ctx: &Context) -> Schema {
203 match self {
204 Schema::AnyOf(options) => {
205 let mut unsats = Vec::new();
206 let mut valid = Vec::new();
207 for option in options.into_iter() {
208 match option {
209 Schema::Any => {
210 return Schema::Any;
211 }
212 Schema::Unsatisfiable(reason) => unsats.push(Schema::Unsatisfiable(reason)),
213 Schema::AnyOf(nested) => valid.extend(nested),
214 other => valid.push(other),
215 }
216 }
217 if valid.is_empty() {
218 if let Some(unsat) = unsats.into_iter().next() {
220 return unsat;
221 }
222 return Schema::unsat("anyOf is empty");
224 }
225 if valid.len() == 1 {
226 return valid.swap_remove(0);
228 }
229 Schema::AnyOf(valid)
230 }
231 Schema::OneOf(options) => {
232 let mut unsats = Vec::new();
233 let mut valid = Vec::new();
234 for option in options.into_iter() {
235 match option {
236 Schema::Unsatisfiable(reason) => unsats.push(Schema::Unsatisfiable(reason)),
237 Schema::OneOf(nested) => valid.extend(nested),
239 other => valid.push(other),
240 }
241 }
242 if valid.is_empty() {
243 if let Some(unsat) = unsats.into_iter().next() {
245 return unsat;
246 }
247 return Schema::unsat("oneOf is empty");
249 }
250 if valid.len() == 1 {
251 return valid.swap_remove(0);
253 }
254 if valid.iter().enumerate().all(|(i, x)| {
255 valid
256 .iter()
257 .skip(i + 1) .all(|y| x.is_verifiably_disjoint_from(y, ctx))
259 }) {
260 Schema::AnyOf(valid)
261 } else {
262 Schema::OneOf(valid)
263 }
264 }
265 other_schema => other_schema,
266 }
267 }
268
269 fn intersect(self, other: Schema, ctx: &Context, stack_level: usize) -> Result<Schema> {
271 ctx.increment()?;
272 if stack_level > ctx.options.max_stack_level {
273 bail!("Schema intersection stack level exceeded");
274 }
275
276 let merged = match (self, other) {
277 (Schema::Any, schema1) => schema1,
278 (schema0, Schema::Any) => schema0,
279 (Schema::Unsatisfiable(reason), _) => Schema::Unsatisfiable(reason),
280 (_, Schema::Unsatisfiable(reason)) => Schema::Unsatisfiable(reason),
281 (Schema::Ref(uri), schema1) => {
282 intersect_ref(ctx, &uri, schema1, true, stack_level + 1)?
283 }
284 (schema0, Schema::Ref(uri)) => {
285 intersect_ref(ctx, &uri, schema0, false, stack_level + 1)?
286 }
287 (Schema::OneOf(options), schema1) => Schema::OneOf(
288 options
289 .into_iter()
290 .map(|opt| opt.intersect(schema1.clone(), ctx, stack_level + 1))
291 .collect::<Result<Vec<_>>>()?,
292 ),
293
294 (schema0, Schema::OneOf(options)) => Schema::OneOf(
295 options
296 .into_iter()
297 .map(|opt| schema0.clone().intersect(opt, ctx, stack_level + 1))
298 .collect::<Result<Vec<_>>>()?,
299 ),
300 (Schema::AnyOf(options), schema1) => Schema::AnyOf(
301 options
302 .into_iter()
303 .map(|opt| opt.intersect(schema1.clone(), ctx, stack_level + 1))
304 .collect::<Result<Vec<_>>>()?,
305 ),
306 (schema0, Schema::AnyOf(options)) => Schema::AnyOf(
307 options
308 .into_iter()
309 .map(|opt| schema0.clone().intersect(opt, ctx, stack_level + 1))
310 .collect::<Result<Vec<_>>>()?,
311 ),
312 (Schema::Null, Schema::Null) => Schema::Null,
313 (Schema::Boolean(value1), Schema::Boolean(value2)) => {
314 if value1 == value2 || value2.is_none() {
315 Schema::Boolean(value1)
316 } else if value1.is_none() {
317 Schema::Boolean(value2)
318 } else {
319 Schema::unsat("incompatible boolean values")
320 }
321 }
322
323 (Schema::Number(n1), Schema::Number(n2)) => Schema::Number(NumberSchema {
324 minimum: opt_max(n1.minimum, n2.minimum),
325 maximum: opt_min(n1.maximum, n2.maximum),
326 exclusive_minimum: opt_max(n1.exclusive_minimum, n2.exclusive_minimum),
327 exclusive_maximum: opt_min(n1.exclusive_maximum, n2.exclusive_maximum),
328 integer: n1.integer || n2.integer,
329 multiple_of: match (n1.multiple_of, n2.multiple_of) {
330 (None, None) => None,
331 (None, Some(m)) | (Some(m), None) => Some(m),
332 (Some(m1), Some(m2)) => Some(m1.lcm(&m2)),
333 },
334 }),
335
336 (Schema::String(s1), Schema::String(s2)) => Schema::String(StringSchema {
337 min_length: s1.min_length.max(s2.min_length),
338 max_length: opt_min(s1.max_length, s2.max_length),
339 regex: match (s1.regex, s2.regex) {
340 (None, None) => None,
341 (None, Some(r)) | (Some(r), None) => Some(r),
342 (Some(r1), Some(r2)) => Some(RegexAst::And(vec![r1, r2])),
343 },
344 }),
345
346 (Schema::Array(mut a1), Schema::Array(mut a2)) => Schema::Array(ArraySchema {
347 min_items: a1.min_items.max(a2.min_items),
348 max_items: opt_min(a1.max_items, a2.max_items),
349 prefix_items: {
350 let len = a1.prefix_items.len().max(a2.prefix_items.len());
351 a1.prefix_items.resize_with(len, || a1.items.schema());
352 a2.prefix_items.resize_with(len, || a2.items.schema());
353 a1.prefix_items
354 .into_iter()
355 .zip(a2.prefix_items.into_iter())
356 .map(|(item1, item2)| item1.intersect(item2, ctx, stack_level + 1))
357 .collect::<Result<Vec<_>>>()?
358 },
359 items: match (a1.items, a2.items) {
360 (None, None) => None,
361 (None, Some(item)) | (Some(item), None) => Some(item),
362 (Some(item1), Some(item2)) => {
363 Some(Box::new(item1.intersect(*item2, ctx, stack_level + 1)?))
364 }
365 },
366 }),
367
368 (Schema::Object(mut o1), Schema::Object(mut o2)) => {
369 let mut properties = IndexMap::new();
370 for (key, prop1) in std::mem::take(&mut o1.properties).into_iter() {
371 let prop2 = ctx.property_schema(&o2, &key)?;
372 properties.insert(key, prop1.intersect(prop2.clone(), ctx, stack_level + 1)?);
373 }
374 for (key, prop2) in o2.properties.into_iter() {
375 if properties.contains_key(&key) {
376 continue;
377 }
378 let prop1 = ctx.property_schema(&o1, &key)?;
379 properties.insert(key, prop1.clone().intersect(prop2, ctx, stack_level + 1)?);
380 }
381 let mut required = o1.required;
382 required.extend(o2.required);
383
384 let mut pattern_properties = IndexMap::new();
385 for (key, prop1) in o1.pattern_properties.into_iter() {
386 if let Some(prop2) = o2.pattern_properties.get_mut(&key) {
387 let prop2 = std::mem::replace(prop2, Schema::Null);
388 pattern_properties.insert(
389 key.clone(),
390 prop1.intersect(prop2.clone(), ctx, stack_level + 1)?,
391 );
392 } else {
393 pattern_properties.insert(key.clone(), prop1);
394 }
395 }
396 for (key, prop2) in o2.pattern_properties.into_iter() {
397 if pattern_properties.contains_key(&key) {
398 continue;
399 }
400 pattern_properties.insert(key.clone(), prop2);
401 }
402
403 let keys = pattern_properties.keys().collect::<Vec<_>>();
404 if !keys.is_empty() {
405 ctx.check_disjoint_pattern_properties(&keys)?;
406 }
407
408 let additional_properties =
409 match (o1.additional_properties, o2.additional_properties) {
410 (None, None) => None,
411 (None, Some(p)) | (Some(p), None) => Some(p),
412 (Some(p1), Some(p2)) => {
413 Some(Box::new((*p1).intersect(*p2, ctx, stack_level + 1)?))
414 }
415 };
416
417 let min_properties = o1.min_properties.max(o2.min_properties);
418 let max_properties = opt_min(o1.max_properties, o2.max_properties);
419
420 mk_object_schema(ObjectSchema {
421 properties,
422 pattern_properties,
423 additional_properties,
424 required,
425 min_properties,
426 max_properties,
427 })
428 }
429
430 _ => Schema::unsat("incompatible types"),
432 };
433 Ok(merged.normalize(ctx))
434 }
435
436 fn is_verifiably_disjoint_from(&self, other: &Schema, ctx: &Context) -> bool {
437 match (self, other) {
438 (Schema::Unsatisfiable(_), _) => true,
439 (_, Schema::Unsatisfiable(_)) => true,
440 (Schema::Any, _) => false,
441 (_, Schema::Any) => false,
442 (Schema::Ref(_), _) => false, (_, Schema::Ref(_)) => false, (Schema::Boolean(value1), Schema::Boolean(value2)) => {
445 value1.is_some() && value2.is_some() && value1 != value2
446 }
447 (Schema::AnyOf(options), _) => options
448 .iter()
449 .all(|opt| opt.is_verifiably_disjoint_from(other, ctx)),
450 (_, Schema::AnyOf(options)) => options
451 .iter()
452 .all(|opt| self.is_verifiably_disjoint_from(opt, ctx)),
453 (Schema::OneOf(options), _) => options
454 .iter()
455 .all(|opt| opt.is_verifiably_disjoint_from(other, ctx)),
456 (_, Schema::OneOf(options)) => options
457 .iter()
458 .all(|opt| self.is_verifiably_disjoint_from(opt, ctx)),
459 (
461 Schema::String(StringSchema {
462 regex: Some(RegexAst::Literal(lit1)),
463 ..
464 }),
465 Schema::String(StringSchema {
466 regex: Some(RegexAst::Literal(lit2)),
467 ..
468 }),
469 ) => lit1 != lit2,
470 (Schema::Object(o1), Schema::Object(o2)) => {
471 o1.required.union(&o2.required).any(|key| {
472 let prop1 = ctx.property_schema(o1, key).unwrap_or(&Schema::Any);
473 let prop2 = ctx.property_schema(o2, key).unwrap_or(&Schema::Any);
474 prop1.is_verifiably_disjoint_from(prop2, ctx)
475 })
476 }
477 _ => {
478 mem::discriminant(self) != mem::discriminant(other)
480 }
481 }
482 }
483
484 fn apply(self, applicator: (&str, &Value), ctx: &Context) -> Result<Schema> {
485 let mut result = self;
486 let (k, v) = applicator;
487 match k {
488 "const" => {
490 let schema = compile_const(v)?;
491 result = result.intersect(schema, ctx, 0)?
492 }
493 "enum" => {
494 let instances = v
495 .as_array()
496 .ok_or_else(|| anyhow!("enum must be an array"))?;
497 let options = instances
498 .iter()
499 .map(compile_const)
500 .collect::<Result<Vec<_>>>()?;
501 result = result.intersect(Schema::AnyOf(options), ctx, 0)?;
502 }
503 "allOf" => {
504 let all_of = v
505 .as_array()
506 .ok_or_else(|| anyhow!("allOf must be an array"))?;
507 for value in all_of {
508 let schema = compile_resource(ctx, ctx.as_resource_ref(value))?;
509 result = result.intersect(schema, ctx, 0)?;
510 }
511 }
512 "anyOf" => {
513 let any_of = v
514 .as_array()
515 .ok_or_else(|| anyhow!("anyOf must be an array"))?;
516 let options = any_of
517 .iter()
518 .map(|value| compile_resource(ctx, ctx.as_resource_ref(value)))
519 .collect::<Result<Vec<_>>>()?;
520 result = result.intersect(Schema::AnyOf(options), ctx, 0)?;
521 }
522 "oneOf" => {
523 let one_of = v
524 .as_array()
525 .ok_or_else(|| anyhow!("oneOf must be an array"))?;
526 let options = one_of
527 .iter()
528 .map(|value| compile_resource(ctx, ctx.as_resource_ref(value)))
529 .collect::<Result<Vec<_>>>()?;
530 result = result.intersect(Schema::OneOf(options), ctx, 0)?;
531 }
532 "$ref" => {
533 let reference = v
534 .as_str()
535 .ok_or_else(|| anyhow!("$ref must be a string, got {}", limited_str(v)))?
536 .to_string();
537 let uri: String = ctx.normalize_ref(&reference)?;
538 if matches!(result, Schema::Any) {
539 define_ref(ctx, &uri)?;
540 result = Schema::Ref(uri);
541 } else {
542 result = intersect_ref(ctx, &uri, result, false, 0)?;
543 }
544 }
545 _ => bail!("Unknown applicator: {}", applicator.0),
546 };
547 Ok(result)
548 }
549}
550
551#[derive(Clone)]
552pub struct SchemaBuilderOptions {
553 pub max_size: usize,
554 pub max_stack_level: usize,
555 pub lenient: bool,
556}
557
558impl Default for SchemaBuilderOptions {
559 fn default() -> Self {
560 SchemaBuilderOptions {
561 max_size: 50_000,
562 max_stack_level: 128, lenient: false,
564 }
565 }
566}
567
568pub fn build_schema(contents: Value, options: &JsonCompileOptions) -> Result<BuiltSchema> {
569 if let Some(b) = contents.as_bool() {
570 let s = if b {
571 Schema::Any
572 } else {
573 Schema::false_schema()
574 };
575 return Ok(BuiltSchema::simple(s));
576 }
577
578 let pre_ctx = PreContext::new(contents, options.retriever.clone())?;
579 let mut ctx = Context::new(&pre_ctx)?;
580
581 ctx.options.lenient = options.lenient;
582
583 let root_resource = ctx.lookup_resource(&pre_ctx.base_uri)?;
584 let schema = compile_resource(&ctx, root_resource)?;
585 Ok(ctx.into_result(schema))
586}
587
588fn compile_resource(ctx: &Context, resource: ResourceRef) -> Result<Schema> {
589 let ctx = ctx.in_subresource(resource)?;
590 compile_contents(&ctx, resource.contents())
591}
592
593fn compile_contents(ctx: &Context, contents: &Value) -> Result<Schema> {
594 compile_contents_inner(ctx, contents).map(|schema| schema.normalize(ctx))
595}
596
597fn compile_contents_inner(ctx: &Context, contents: &Value) -> Result<Schema> {
598 if let Some(b) = contents.as_bool() {
599 if b {
600 return Ok(Schema::Any);
601 } else {
602 return Ok(Schema::false_schema());
603 }
604 }
605
606 let schemadict = contents
609 .as_object()
610 .ok_or_else(|| anyhow!("schema must be an object or boolean"))?;
611
612 let schemadict = schemadict
614 .iter()
615 .map(|(k, v)| (k.as_str(), v))
616 .collect::<IndexMap<_, _>>();
617
618 compile_contents_map(ctx, schemadict)
619}
620
621fn compile_contents_map(ctx: &Context, schemadict: IndexMap<&str, &Value>) -> Result<Schema> {
622 ctx.increment()?;
623
624 if schemadict
626 .keys()
627 .all(|k| META_AND_ANNOTATIONS.contains(k) || !ctx.draft.is_known_keyword(k))
628 {
629 return Ok(Schema::Any);
630 }
631
632 let mut unimplemented_keys = schemadict
634 .keys()
635 .filter(|k| !ctx.is_valid_keyword(k))
636 .collect::<Vec<_>>();
637 if !unimplemented_keys.is_empty() {
638 unimplemented_keys.sort();
640 let msg = format!("Unimplemented keys: {:?}", unimplemented_keys);
641 if ctx.options.lenient {
642 ctx.record_warning(msg);
643 } else {
644 bail!(msg);
645 }
646 }
647
648 let dummy_properties = match schemadict.get("properties") {
652 Some(properties) => {
653 let properties = properties
654 .as_object()
655 .ok_or_else(|| anyhow!("properties must be an object"))?;
656 Some(Value::from_iter(
657 properties
658 .iter()
659 .map(|(k, _)| (k.as_str(), Value::Bool(true))),
660 ))
661 }
662 None => None,
663 };
664 let dummy_prefix_items = match schemadict.get("prefixItems") {
665 Some(prefix_items) => {
666 let prefix_items = prefix_items
667 .as_array()
668 .ok_or_else(|| anyhow!("prefixItems must be an array"))?;
669 Some(Value::from_iter(
670 prefix_items.iter().map(|_| Value::Bool(true)),
671 ))
672 }
673 None => None,
674 };
675
676 let mut result = Schema::Any;
677 let mut current = HashMap::default();
678 let in_place_applicator_kwds = ["const", "enum", "allOf", "anyOf", "oneOf", "$ref"];
679 for (k, v) in schemadict.iter() {
680 if in_place_applicator_kwds.contains(k) {
681 if !current.is_empty() {
682 if let Some(&types) = schemadict.get("type") {
683 current.insert("type", types);
685 }
686 let current_schema = compile_contents_simple(ctx, std::mem::take(&mut current))?;
687 result = result.intersect(current_schema, ctx, 0)?;
688 }
689 result = result.apply((k, v), ctx)?;
691 } else if !META_AND_ANNOTATIONS.contains(k) {
692 current.insert(k, v);
693 if *k == "additionalProperties" && !current.contains_key("properties") {
694 if let Some(dummy_props) = &dummy_properties {
698 current.insert("properties", dummy_props);
699 }
700 }
701 if *k == "items" && !current.contains_key("prefixItems") {
702 if let Some(dummy_itms) = &dummy_prefix_items {
706 current.insert("prefixItems", dummy_itms);
707 }
708 }
709 }
710 }
711 if !current.is_empty() {
712 if let Some(&types) = schemadict.get("type") {
713 current.insert("type", types);
715 }
716 let current_schema = compile_contents_simple(ctx, std::mem::take(&mut current))?;
717 result = result.intersect(current_schema, ctx, 0)?;
718 }
719 Ok(result)
720}
721
722fn compile_contents_simple(ctx: &Context, schemadict: HashMap<&str, &Value>) -> Result<Schema> {
723 if schemadict.is_empty() {
724 Ok(Schema::Any)
725 } else {
726 let types = schemadict.get("type");
727 match types {
728 Some(Value::String(tp)) => compile_type(ctx, tp, &schemadict),
729 Some(Value::Array(types)) => {
730 let options = types
731 .iter()
732 .map(|type_value| {
733 type_value
734 .as_str()
735 .ok_or_else(|| anyhow!("type must be a string"))
736 })
737 .collect::<Result<Vec<_>>>()?;
738 compile_types(ctx, options, &schemadict)
739 }
740 None => compile_types(ctx, TYPES.to_vec(), &schemadict),
741 Some(_) => {
742 bail!("type must be a string or array of strings");
743 }
744 }
745 }
746}
747
748fn define_ref(ctx: &Context, ref_uri: &str) -> Result<()> {
749 if !ctx.been_seen(ref_uri) {
750 ctx.mark_seen(ref_uri);
751 let resource = ctx.lookup_resource(ref_uri)?;
752 let resolved_schema = compile_resource(ctx, resource)?;
753 ctx.insert_ref(ref_uri, resolved_schema);
754 }
755 Ok(())
756}
757
758fn intersect_ref(
759 ctx: &Context,
760 ref_uri: &str,
761 schema: Schema,
762 ref_first: bool,
763 stack_level: usize,
764) -> Result<Schema> {
765 define_ref(ctx, ref_uri)?;
766 let resolved_schema = ctx
767 .get_ref_cloned(ref_uri)
768 .ok_or_else(|| {
773 anyhow!(
774 "circular references with sibling keys are not supported: {}",
775 ref_uri
776 )
777 })?;
778 if ref_first {
779 resolved_schema.intersect(schema, ctx, stack_level + 1)
780 } else {
781 schema.intersect(resolved_schema, ctx, stack_level + 1)
782 }
783}
784
785fn compile_const(instance: &Value) -> Result<Schema> {
786 match instance {
787 Value::Null => Ok(Schema::Null),
788 Value::Bool(b) => Ok(Schema::Boolean(Some(*b))),
789 Value::Number(n) => {
790 let value = n.as_f64().ok_or_else(|| {
791 anyhow!(
792 "Expected f64 for numeric const, got {}",
793 limited_str(instance)
794 )
795 })?;
796 Ok(Schema::Number(NumberSchema {
797 minimum: Some(value),
798 maximum: Some(value),
799 exclusive_minimum: None,
800 exclusive_maximum: None,
801 integer: n.is_i64(),
802 multiple_of: None,
803 }))
804 }
805 Value::String(s) => Ok(Schema::String(StringSchema {
806 min_length: 0,
807 max_length: None,
808 regex: Some(RegexAst::Literal(s.to_string())),
809 })),
810 Value::Array(items) => {
811 let prefix_items = items
812 .iter()
813 .map(compile_const)
814 .collect::<Result<Vec<Schema>>>()?;
815 Ok(Schema::Array(ArraySchema {
816 min_items: prefix_items.len(),
817 max_items: Some(prefix_items.len()),
818 prefix_items,
819 items: Some(Box::new(Schema::false_schema())),
820 }))
821 }
822 Value::Object(mapping) => {
823 let properties = mapping
824 .iter()
825 .map(|(k, v)| Ok((k.clone(), compile_const(v)?)))
826 .collect::<Result<IndexMap<String, Schema>>>()?;
827 let required = properties.keys().cloned().collect();
828 Ok(Schema::Object(ObjectSchema {
829 properties,
830 pattern_properties: IndexMap::default(),
831 additional_properties: Some(Box::new(Schema::false_schema())),
832 required,
833 min_properties: 0,
834 max_properties: None,
835 }))
836 }
837 }
838}
839
840fn compile_types(
841 ctx: &Context,
842 types: Vec<&str>,
843 schema: &HashMap<&str, &Value>,
844) -> Result<Schema> {
845 let mut options = Vec::new();
846 for tp in types {
847 let option = compile_type(ctx, tp, schema)?;
848 options.push(option);
849 }
850 if options.len() == 1 {
851 Ok(options.swap_remove(0))
852 } else {
853 Ok(Schema::AnyOf(options))
854 }
855}
856
857fn compile_type(ctx: &Context, tp: &str, schema: &HashMap<&str, &Value>) -> Result<Schema> {
858 ctx.increment()?;
859
860 match tp {
861 "null" => Ok(Schema::Null),
862 "boolean" => Ok(Schema::Boolean(None)),
863 "number" | "integer" => compile_numeric(schema, tp == "integer"),
864 "string" => compile_string(ctx, schema),
865 "array" => compile_array(ctx, schema),
866 "object" => compile_object(ctx, schema),
867 _ => bail!("Invalid type: {}", tp),
868 }
869}
870
871fn compile_numeric(schema: &HashMap<&str, &Value>, integer: bool) -> Result<Schema> {
872 let minimum = schema.get("minimum").copied();
873 let maximum = schema.get("maximum").copied();
874 let exclusive_minimum = schema.get("exclusiveMinimum").copied();
875 let exclusive_maximum = schema.get("exclusiveMaximum").copied();
876 let multiple_of = schema.get("multipleOf").copied();
877
878 let minimum = match minimum {
879 None => None,
880 Some(val) => Some(
881 val.as_f64()
882 .ok_or_else(|| anyhow!("Expected f64 for 'minimum', got {}", limited_str(val)))?,
883 ),
884 };
885 let maximum = match maximum {
886 None => None,
887 Some(val) => Some(
888 val.as_f64()
889 .ok_or_else(|| anyhow!("Expected f64 for 'maximum', got {}", limited_str(val)))?,
890 ),
891 };
892 let exclusive_minimum = match exclusive_minimum {
894 None | Some(Value::Bool(false)) => None,
896 Some(Value::Bool(true)) => minimum,
897 Some(value) => Some(value.as_f64().ok_or_else(|| {
899 anyhow!(
900 "Expected f64 for 'exclusiveMinimum', got {}",
901 limited_str(value)
902 )
903 })?),
904 };
905 let exclusive_maximum = match exclusive_maximum {
906 None | Some(Value::Bool(false)) => None,
908 Some(Value::Bool(true)) => maximum,
909 Some(value) => Some(value.as_f64().ok_or_else(|| {
911 anyhow!(
912 "Expected f64 for 'exclusiveMaximum', got {}",
913 limited_str(value)
914 )
915 })?),
916 };
917 let multiple_of = match multiple_of {
918 None => None,
919 Some(val) => {
920 let f = val.as_f64().ok_or_else(|| {
921 anyhow!("Expected f64 for 'multipleOf', got {}", limited_str(val))
922 })?;
923 Some(Decimal::try_from(f.abs())?)
925 }
926 };
927 Ok(Schema::Number(NumberSchema {
928 minimum,
929 maximum,
930 exclusive_minimum,
931 exclusive_maximum,
932 integer,
933 multiple_of,
934 }))
935}
936
937fn compile_string(ctx: &Context, schema: &HashMap<&str, &Value>) -> Result<Schema> {
938 let pattern = schema.get("pattern").copied();
939 let format = schema.get("format").copied();
940
941 let min_length = get_usize(schema, "minLength")?.unwrap_or(0);
942 let max_length = get_usize(schema, "maxLength")?;
943
944 let pattern_rx = match pattern {
945 None => None,
946 Some(val) => Some({
947 let s = val
948 .as_str()
949 .ok_or_else(|| anyhow!("Expected string for 'pattern', got {}", limited_str(val)))?
950 .to_string();
951 RegexAst::SearchRegex(regex_to_lark(&s, "dw"))
952 }),
953 };
954 let format_rx = match format {
955 None => None,
956 Some(val) => {
957 let key = val
958 .as_str()
959 .ok_or_else(|| anyhow!("Expected string for 'format', got {}", limited_str(val)))?
960 .to_string();
961
962 if let Some(fmt) = lookup_format(&key) {
963 Some(RegexAst::Regex(fmt.to_string()))
964 } else {
965 let msg = format!("Unknown format: {}", key);
966 if ctx.options.lenient {
967 ctx.record_warning(msg);
968 None
969 } else {
970 bail!(msg);
971 }
972 }
973 }
974 };
975 let regex = match (pattern_rx, format_rx) {
976 (None, None) => None,
977 (None, Some(fmt)) => Some(fmt),
978 (Some(pat), None) => Some(pat),
979 (Some(pat), Some(fmt)) => Some(RegexAst::And(vec![pat, fmt])),
980 };
981 Ok(Schema::String(StringSchema {
982 min_length,
983 max_length,
984 regex,
985 }))
986}
987
988fn compile_array(ctx: &Context, schema: &HashMap<&str, &Value>) -> Result<Schema> {
989 let min_items = get_usize(schema, "minItems")?.unwrap_or(0);
990 let max_items = get_usize(schema, "maxItems")?;
991 let prefix_items = schema.get("prefixItems").copied();
992 let items = schema.get("items").copied();
993 let additional_items = schema.get("additionalItems").copied();
994
995 let (prefix_items, items) = {
996 if ctx.draft <= Draft::Draft201909
999 || additional_items.is_some()
1000 || matches!(items, Some(Value::Array(..)))
1001 {
1002 match (items, additional_items) {
1003 (Some(Value::Array(..)), _) => (items, additional_items),
1005 _ => (None, items),
1007 }
1008 } else {
1009 (prefix_items, items)
1010 }
1011 };
1012 let prefix_items = match prefix_items {
1013 None => vec![],
1014 Some(val) => val
1015 .as_array()
1016 .ok_or_else(|| anyhow!("Expected array for 'prefixItems', got {}", limited_str(val)))?
1017 .iter()
1018 .map(|item| compile_resource(ctx, ctx.as_resource_ref(item)))
1019 .collect::<Result<Vec<Schema>>>()?,
1020 };
1021 let items = match items {
1022 None => None,
1023 Some(val) => Some(Box::new(compile_resource(ctx, ctx.as_resource_ref(val))?)),
1024 };
1025 Ok(Schema::Array(ArraySchema {
1026 min_items,
1027 max_items,
1028 prefix_items,
1029 items,
1030 }))
1031}
1032
1033fn compile_prop_map(
1034 ctx: &Context,
1035 lbl: &str,
1036 prop_map: Option<&Value>,
1037) -> Result<IndexMap<String, Schema>> {
1038 match prop_map {
1039 None => Ok(IndexMap::new()),
1040 Some(val) => val
1041 .as_object()
1042 .ok_or_else(|| anyhow!("Expected object for '{lbl}', got {}", limited_str(val)))?
1043 .iter()
1044 .map(|(k, v)| compile_resource(ctx, ctx.as_resource_ref(v)).map(|v| (k.clone(), v)))
1045 .collect(),
1046 }
1047}
1048
1049fn get_usize(schema: &HashMap<&str, &Value>, name: &str) -> Result<Option<usize>> {
1050 if let Some(val) = schema.get(name) {
1051 if let Some(val) = val.as_u64() {
1052 ensure!(
1053 val <= usize::MAX as u64,
1054 "Value {val} for '{name}' is too large"
1055 );
1056 Ok(Some(val as usize))
1057 } else {
1058 bail!(
1059 "Expected positive integer for '{name}', got {}",
1060 limited_str(val)
1061 )
1062 }
1063 } else {
1064 Ok(None)
1065 }
1066}
1067
1068fn compile_object(ctx: &Context, schema: &HashMap<&str, &Value>) -> Result<Schema> {
1069 let properties = schema.get("properties").copied();
1070 let pattern_properties = schema.get("patternProperties").copied();
1071 let additional_properties = schema.get("additionalProperties").copied();
1072 let required = schema.get("required").copied();
1073 let min_properties = get_usize(schema, "minProperties")?.unwrap_or(0);
1074 let max_properties = get_usize(schema, "maxProperties")?;
1075
1076 let properties = compile_prop_map(ctx, "properties", properties)?;
1077 let pattern_properties = compile_prop_map(ctx, "patternProperties", pattern_properties)?;
1078 ctx.check_disjoint_pattern_properties(&pattern_properties.keys().collect::<Vec<_>>())?;
1079 let additional_properties = match additional_properties {
1080 None => None,
1081 Some(val) => Some(Box::new(compile_resource(ctx, ctx.as_resource_ref(val))?)),
1082 };
1083 let required = match required {
1084 None => IndexSet::new(),
1085 Some(val) => val
1086 .as_array()
1087 .ok_or_else(|| anyhow!("Expected array for 'required', got {}", limited_str(val)))?
1088 .iter()
1089 .map(|item| {
1090 item.as_str()
1091 .ok_or_else(|| {
1092 anyhow!(
1093 "Expected string for 'required' item, got {}",
1094 limited_str(item)
1095 )
1096 })
1097 .map(|s| s.to_string())
1098 })
1099 .collect::<Result<IndexSet<String>>>()?,
1100 };
1101
1102 Ok(mk_object_schema(ObjectSchema {
1103 properties,
1104 pattern_properties,
1105 additional_properties,
1106 required,
1107 min_properties,
1108 max_properties,
1109 }))
1110}
1111
1112fn mk_object_schema(obj: ObjectSchema) -> Schema {
1113 if let Some(max) = obj.max_properties {
1114 if obj.min_properties > max {
1115 return Schema::unsat("minProperties > maxProperties");
1116 }
1117 }
1118 if obj.required.len() > obj.max_properties.unwrap_or(usize::MAX) {
1119 return Schema::unsat("required > maxProperties");
1120 }
1121
1122 Schema::Object(obj)
1123}
1124
1125fn opt_max<T: PartialOrd>(a: Option<T>, b: Option<T>) -> Option<T> {
1126 match (a, b) {
1127 (Some(a), Some(b)) => {
1128 if a >= b {
1129 Some(a)
1130 } else {
1131 Some(b)
1132 }
1133 }
1134 (Some(a), None) => Some(a),
1135 (None, Some(b)) => Some(b),
1136 (None, None) => None,
1137 }
1138}
1139
1140fn opt_min<T: PartialOrd>(a: Option<T>, b: Option<T>) -> Option<T> {
1141 match (a, b) {
1142 (Some(a), Some(b)) => {
1143 if a <= b {
1144 Some(a)
1145 } else {
1146 Some(b)
1147 }
1148 }
1149 (Some(a), None) => Some(a),
1150 (None, Some(b)) => Some(b),
1151 (None, None) => None,
1152 }
1153}
1154
1155#[cfg(all(test, feature = "referencing"))]
1156mod test_retriever {
1157 use crate::json::{Retrieve, RetrieveWrapper};
1158 use crate::JsonCompileOptions;
1159
1160 use super::{build_schema, Schema, StringSchema};
1161 use serde_json::{json, Value};
1162 use std::{fmt, sync::Arc};
1163
1164 #[derive(Debug, Clone)]
1165 struct TestRetrieverError(String);
1166 impl fmt::Display for TestRetrieverError {
1167 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1168 write!(f, "Could not retrieve URI: {}", self.0)
1169 }
1170 }
1171 impl std::error::Error for TestRetrieverError {}
1172
1173 struct TestRetriever {
1174 schemas: std::collections::HashMap<String, serde_json::Value>,
1175 }
1176 impl Retrieve for TestRetriever {
1177 fn retrieve(&self, uri: &str) -> Result<Value, Box<dyn std::error::Error + Send + Sync>> {
1178 let key = uri;
1179 match self.schemas.get(key) {
1180 Some(schema) => Ok(schema.clone()),
1181 None => Err(Box::new(TestRetrieverError(key.to_string()))),
1182 }
1183 }
1184 }
1185
1186 #[test]
1187 fn test_retriever() {
1188 let key: &str = "http://example.com/schema";
1189
1190 let schema = json!({
1191 "$ref": key
1192 });
1193 let retriever = TestRetriever {
1194 schemas: vec![(
1195 key.to_string(),
1196 json!({
1197 "type": "string"
1198 }),
1199 )]
1200 .into_iter()
1201 .collect(),
1202 };
1203 let wrapper = RetrieveWrapper::new(Arc::new(retriever));
1204 let options = JsonCompileOptions {
1205 retriever: Some(wrapper.clone()),
1206 ..Default::default()
1207 };
1208 let r = build_schema(schema, &options).unwrap();
1209 let schema = r.schema;
1210 let defs = r.definitions;
1211 match schema {
1212 Schema::Ref(uri) => {
1213 assert_eq!(uri, key);
1214 }
1215 _ => panic!("Unexpected schema: {:?}", schema),
1216 }
1217 assert_eq!(defs.len(), 1);
1218 let val = defs.get(key).unwrap();
1219 match val {
1221 Schema::String(StringSchema {
1222 min_length: 0,
1223 max_length: None,
1224 regex: None,
1225 }) => {}
1226 _ => panic!("Unexpected schema: {:?}", val),
1227 }
1228 }
1229}
1230
1231#[cfg(test)]
1232mod tests {
1233 use super::*;
1234 use serde_json::json;
1235
1236 #[test]
1237 fn test_problem_child() {
1238 let schema = json!({
1239 "allOf" : [
1240 {"$ref": "#/$defs/tree1"},
1241 {"$ref": "#/$defs/tree2"}
1242 ],
1243 "$defs" : {
1244 "tree1": {
1245 "type": "object",
1246 "properties": {
1247 "child": {
1248 "$ref": "#/$defs/tree1"
1249 }
1250 }
1251 },
1252 "tree2": {
1253 "type": "object",
1254 "properties": {
1255 "child": {
1256 "$ref": "#/$defs/tree2"
1257 }
1258 }
1259 }
1260 }
1261 });
1262 let options = JsonCompileOptions::default();
1264 let _ = build_schema(schema, &options);
1265 }
1266}