jsona_schema/
lib.rs

1use either::Either;
2use fancy_regex::Regex;
3use indexmap::IndexMap;
4use jsona::dom::{KeyOrIndex, Keys, Node};
5use once_cell::sync::Lazy;
6use serde::de::DeserializeOwned;
7use serde::{Deserialize, Serialize};
8use serde_json::{Map, Number, Value};
9use std::{cell::RefCell, rc::Rc};
10use std::{collections::HashSet, fmt::Display};
11
12mod error;
13
14pub use error::{SchemaError, SchemaResult, ERROR_SOURCE};
15
16pub const REF_PREFIX: &str = "#/$defs/";
17
18pub static REF_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r#"^#/\$defs/(\w+)$"#).unwrap());
19
20#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Default)]
21pub struct Schema {
22    #[serde(rename = "$ref", skip_serializing_if = "Option::is_none")]
23    pub ref_value: Option<String>,
24    #[serde(rename = "$defs", skip_serializing_if = "Option::is_none")]
25    pub defs: Option<IndexMap<String, Schema>>,
26    #[serde(rename = "$id", skip_serializing_if = "Option::is_none")]
27    pub id: Option<String>,
28    #[serde(rename = "$comment", skip_serializing_if = "Option::is_none")]
29    pub comment: Option<String>,
30
31    #[serde(skip_serializing_if = "Option::is_none")]
32    pub title: Option<String>,
33    #[serde(skip_serializing_if = "Option::is_none")]
34    pub description: Option<String>,
35
36    #[serde(rename = "type", skip_serializing_if = "Option::is_none")]
37    pub schema_type: Option<OneOrMultiTypes>,
38
39    #[serde(rename = "default", skip_serializing_if = "Option::is_none")]
40    pub default: Option<Value>,
41    #[serde(skip_serializing_if = "Option::is_none")]
42    pub deprecated: Option<bool>,
43
44    #[serde(skip_serializing_if = "Option::is_none")]
45    pub maximum: Option<Number>,
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub minimum: Option<Number>,
48    #[serde(rename = "exclusiveMaximum", skip_serializing_if = "Option::is_none")]
49    pub exclusive_maximum: Option<bool>,
50    #[serde(rename = "exclusiveMinimum", skip_serializing_if = "Option::is_none")]
51    pub exclusive_minimum: Option<bool>,
52    #[serde(rename = "multipleOf", skip_serializing_if = "Option::is_none")]
53    pub multiple_of: Option<f64>,
54
55    #[serde(rename = "maxLength", skip_serializing_if = "Option::is_none")]
56    pub max_length: Option<u32>,
57    #[serde(rename = "minLength", skip_serializing_if = "Option::is_none")]
58    pub min_length: Option<u32>,
59    #[serde(skip_serializing_if = "Option::is_none")]
60    pub pattern: Option<String>,
61    #[serde(skip_serializing_if = "Option::is_none")]
62    pub format: Option<String>,
63    #[serde(rename = "contentEncoding", skip_serializing_if = "Option::is_none")]
64    pub content_encoding: Option<String>,
65    #[serde(rename = "contentMediaType", skip_serializing_if = "Option::is_none")]
66    pub content_media_type: Option<String>,
67
68    #[serde(skip_serializing_if = "Option::is_none")]
69    pub items: Option<OneOrMultiSchemas>,
70    #[serde(rename = "maxItems", skip_serializing_if = "Option::is_none")]
71    pub max_items: Option<u32>,
72    #[serde(rename = "minItems", skip_serializing_if = "Option::is_none")]
73    pub min_items: Option<u32>,
74    #[serde(rename = "uniqueItems", skip_serializing_if = "Option::is_none")]
75    pub unique_items: Option<bool>,
76    #[serde(rename = "additionalItems", skip_serializing_if = "Option::is_none")]
77    pub additional_items: Option<BoolOrSchema>,
78    #[serde(skip_serializing_if = "Option::is_none")]
79    pub contains: Option<Box<Schema>>,
80    #[serde(rename = "maxContains", skip_serializing_if = "Option::is_none")]
81    pub max_contains: Option<u32>,
82    #[serde(rename = "minContains", skip_serializing_if = "Option::is_none")]
83    pub min_contains: Option<u32>,
84    #[serde(rename = "unevaluatedItems", skip_serializing_if = "Option::is_none")]
85    pub unevaluated_items: Option<BoolOrSchema>,
86
87    #[serde(skip_serializing_if = "Option::is_none")]
88    pub properties: Option<IndexMap<String, Schema>>,
89    #[serde(rename = "maxProperties", skip_serializing_if = "Option::is_none")]
90    pub max_properties: Option<u32>,
91    #[serde(rename = "minProperties", skip_serializing_if = "Option::is_none")]
92    pub min_properties: Option<u32>,
93    #[serde(skip_serializing_if = "Option::is_none")]
94    pub required: Option<Vec<String>>,
95    #[serde(rename = "patternProperties", skip_serializing_if = "Option::is_none")]
96    pub pattern_properties: Option<IndexMap<String, Schema>>,
97    #[serde(
98        rename = "additionalProperties",
99        skip_serializing_if = "Option::is_none"
100    )]
101    pub additional_properties: Option<BoolOrSchema>,
102    #[serde(rename = "dependentRequired", skip_serializing_if = "Option::is_none")]
103    pub dependent_required: Option<IndexMap<String, Vec<String>>>,
104    #[serde(rename = "dependentSchemas", skip_serializing_if = "Option::is_none")]
105    pub dependent_schemas: Option<IndexMap<String, Schema>>,
106    #[serde(rename = "propertyNames", skip_serializing_if = "Option::is_none")]
107    pub property_names: Option<Box<Schema>>,
108    #[serde(
109        rename = "unevaluatedProperties",
110        skip_serializing_if = "Option::is_none"
111    )]
112    pub unevaluated_properties: Option<BoolOrSchema>,
113
114    #[serde(skip_serializing_if = "Option::is_none")]
115    #[serde(rename = "enum")]
116    pub enum_value: Option<Vec<Value>>,
117    #[serde(skip_serializing_if = "Option::is_none")]
118    #[serde(rename = "const")]
119    pub const_value: Option<Value>,
120
121    #[serde(skip_serializing_if = "Option::is_none")]
122    pub examples: Option<Vec<Value>>,
123    #[serde(rename = "readOnly", skip_serializing_if = "Option::is_none")]
124    pub read_only: Option<bool>,
125    #[serde(rename = "writeOnly", skip_serializing_if = "Option::is_none")]
126    pub write_only: Option<bool>,
127
128    #[serde(rename = "allOf", skip_serializing_if = "Option::is_none")]
129    pub all_of: Option<Vec<Schema>>,
130    #[serde(rename = "oneOf", skip_serializing_if = "Option::is_none")]
131    pub one_of: Option<Vec<Schema>>,
132    #[serde(rename = "anyOf", skip_serializing_if = "Option::is_none")]
133    pub any_of: Option<Vec<Schema>>,
134    #[serde(rename = "not", skip_serializing_if = "Option::is_none")]
135    pub not: Option<Box<Schema>>,
136    #[serde(rename = "if", skip_serializing_if = "Option::is_none")]
137    pub if_value: Option<Box<Schema>>,
138    #[serde(rename = "then", skip_serializing_if = "Option::is_none")]
139    pub then_value: Option<Box<Schema>>,
140    #[serde(rename = "else", skip_serializing_if = "Option::is_none")]
141    pub else_value: Option<Box<Schema>>,
142
143    #[serde(flatten, skip_serializing_if = "Option::is_none")]
144    pub unknown: Option<Map<String, Value>>,
145}
146
147impl Schema {
148    pub fn pointer(&self, keys: &Keys) -> Vec<&Schema> {
149        let mut result = vec![];
150        pointer_impl(&mut result, self, self, keys);
151        result
152    }
153    pub fn maybe_type(&self, schema_type: &SchemaType) -> bool {
154        self.schema_type
155            .as_ref()
156            .map(|v| v.contains(schema_type))
157            .unwrap_or_default()
158    }
159    pub fn one_type(&self) -> Option<SchemaType> {
160        self.schema_type
161            .as_ref()
162            .and_then(|v| v.value.as_ref().left())
163            .cloned()
164    }
165    pub fn types(&self) -> HashSet<SchemaType> {
166        self.schema_type
167            .as_ref()
168            .map(|v| v.types())
169            .unwrap_or_default()
170    }
171    pub fn debug_string(&self) -> String {
172        serde_json::to_string(&self).unwrap_or_default()
173    }
174}
175
176impl TryFrom<&Node> for Schema {
177    type Error = Vec<SchemaError>;
178
179    fn try_from(node: &Node) -> SchemaResult<Self> {
180        let scope = SchemaParser {
181            keys: Keys::default(),
182            node: node.clone(),
183            defs: Default::default(),
184            ref_prefix: Rc::new(REF_PREFIX.to_string()),
185            prefer_optional: true,
186        };
187        let mut schema = scope.parse()?;
188        let defs = scope.defs.take();
189        if !defs.is_empty() {
190            schema.defs = Some(defs);
191        }
192        Ok(schema)
193    }
194}
195
196#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
197#[serde(transparent)]
198pub struct BoolOrSchema {
199    #[serde(with = "either::serde_untagged")]
200    pub value: Either<bool, Box<Schema>>,
201}
202
203impl Default for BoolOrSchema {
204    fn default() -> Self {
205        Self {
206            value: Either::Left(false),
207        }
208    }
209}
210
211#[derive(Clone, Debug, Deserialize, Serialize, Eq, PartialEq, Hash)]
212#[serde(rename_all = "lowercase")]
213pub enum SchemaType {
214    String,
215    Number,
216    Boolean,
217    Integer,
218    Null,
219    Object,
220    Array,
221}
222
223impl SchemaType {
224    pub fn from_node(node: &Node) -> Option<Self> {
225        let schema_type = match &node {
226            Node::Null(v) => {
227                if v.is_valid() {
228                    SchemaType::Null
229                } else {
230                    return None;
231                }
232            }
233            Node::Bool(_) => SchemaType::Boolean,
234            Node::Number(v) => {
235                if v.is_integer() {
236                    SchemaType::Integer
237                } else {
238                    SchemaType::Number
239                }
240            }
241            Node::String(_) => SchemaType::String,
242            Node::Array(_) => SchemaType::Array,
243            Node::Object(_) => SchemaType::Object,
244        };
245        Some(schema_type)
246    }
247
248    pub fn match_node(&self, node: &Node) -> bool {
249        match self {
250            SchemaType::String => node.is_string(),
251            SchemaType::Number => node.is_number(),
252            SchemaType::Boolean => node.is_bool(),
253            SchemaType::Integer => node.is_integer(),
254            SchemaType::Null => node.is_null(),
255            SchemaType::Object => node.is_object(),
256            SchemaType::Array => node.is_array(),
257        }
258    }
259}
260
261impl Display for SchemaType {
262    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
263        let type_str = match self {
264            SchemaType::String => "string",
265            SchemaType::Number => "number",
266            SchemaType::Integer => "integer",
267            SchemaType::Boolean => "boolean",
268            SchemaType::Null => "null",
269            SchemaType::Object => "object",
270            SchemaType::Array => "array",
271        };
272        f.write_str(type_str)
273    }
274}
275
276#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)]
277#[serde(transparent)]
278pub struct OneOrMultiTypes {
279    #[serde(with = "either::serde_untagged")]
280    pub value: Either<SchemaType, Vec<SchemaType>>,
281}
282
283impl OneOrMultiTypes {
284    pub fn new(items: impl Iterator<Item = SchemaType>) -> Self {
285        let mut items: Vec<SchemaType> = items.collect();
286        if items.len() > 1 {
287            Self {
288                value: Either::Right(items),
289            }
290        } else {
291            Self {
292                value: Either::Left(items.remove(0)),
293            }
294        }
295    }
296    pub fn contains(&self, target: &SchemaType) -> bool {
297        match self.value.as_ref() {
298            Either::Left(value) => value == target,
299            Either::Right(values) => values.iter().any(|v| v == target),
300        }
301    }
302    pub fn types(&self) -> HashSet<SchemaType> {
303        match self.value.as_ref() {
304            Either::Left(value) => [value.clone()].into(),
305            Either::Right(values) => values.iter().cloned().collect(),
306        }
307    }
308    pub fn is_empty(&self) -> bool {
309        self.len() == 0
310    }
311    pub fn len(&self) -> usize {
312        match self.value.as_ref() {
313            Either::Left(_) => 1,
314            Either::Right(values) => values.len(),
315        }
316    }
317}
318
319impl From<SchemaType> for OneOrMultiTypes {
320    fn from(schema_type: SchemaType) -> Self {
321        Self {
322            value: Either::Left(schema_type),
323        }
324    }
325}
326
327#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)]
328#[serde(transparent)]
329pub struct OneOrMultiSchemas {
330    #[serde(with = "either::serde_untagged")]
331    pub value: Either<Box<Schema>, Vec<Schema>>,
332}
333
334impl OneOrMultiSchemas {
335    pub fn new(mut items: Vec<Schema>) -> Self {
336        if items.len() > 1 {
337            Self {
338                value: Either::Right(items),
339            }
340        } else {
341            Self {
342                value: Either::Left(Box::new(items.remove(0))),
343            }
344        }
345    }
346}
347#[derive(Debug, Clone)]
348pub struct SchemaParser {
349    pub node: Node,
350    pub keys: Keys,
351    pub defs: Rc<RefCell<IndexMap<String, Schema>>>,
352    pub ref_prefix: Rc<String>,
353    pub prefer_optional: bool,
354}
355
356impl SchemaParser {
357    pub fn parse(&self) -> SchemaResult<Schema> {
358        let mut def_value = String::new();
359        if let Some(def) = self.parse_string_annotation("@def")? {
360            let mut defs = self.defs.borrow_mut();
361            if defs.contains_key(&def) {
362                return Err(vec![SchemaError::ConflictDef {
363                    keys: self.keys.clone(),
364                    name: def,
365                }]);
366            }
367            defs.insert(def.clone(), Default::default());
368            def_value = def;
369        } else if let Some(ref_value) = self.parse_string_annotation("@ref")? {
370            let defs = self.defs.borrow();
371            if !defs.contains_key(&ref_value) {
372                return Err(vec![SchemaError::UnknownRef {
373                    keys: self.keys.clone(),
374                    name: ref_value,
375                }]);
376            }
377            return Ok(Schema {
378                ref_value: Some(format!("{}{}", self.ref_prefix, ref_value)),
379                ..Default::default()
380            });
381        }
382        let mut schema: Schema = self.parse_object_annotation("@schema")?.unwrap_or_default();
383        if let Some(describe) = self.parse_string_annotation("@describe")? {
384            schema.description = Some(describe);
385        }
386        if self.exist_annotation("@default") {
387            schema.default = Some(self.node.to_plain_json())
388        }
389        let schema_types = schema.types();
390        let node_type = SchemaType::from_node(&self.node);
391        if schema_types.is_empty() {
392            schema.schema_type = node_type.map(Into::into);
393        } else if let Some(node_type) = node_type {
394            if !schema_types.contains(&node_type) {
395                return Err(vec![SchemaError::UnmatchedSchemaType {
396                    keys: self.keys.clone(),
397                }]);
398            }
399        }
400        match &self.node {
401            Node::Object(obj) => {
402                let mut errors = vec![];
403                for (key, child) in obj.value().read().iter() {
404                    let child_parser = self.spawn(key.clone(), child.clone());
405                    let key = key.value();
406                    let pattern = child_parser.parse_string_annotation("@pattern")?;
407                    let child_schema = match child_parser.parse() {
408                        Ok(v) => v,
409                        Err(errs) => {
410                            errors.extend(errs);
411                            continue;
412                        }
413                    };
414                    if let Some(pattern) = pattern {
415                        let props = schema.pattern_properties.get_or_insert(Default::default());
416                        props.insert(pattern, child_schema);
417                    } else {
418                        let props = schema.properties.get_or_insert(Default::default());
419                        props.insert(key.to_string(), child_schema);
420                        if (self.prefer_optional && child_parser.exist_annotation("@required"))
421                            || (!self.prefer_optional
422                                && !child_parser.exist_annotation("@optional"))
423                        {
424                            schema
425                                .required
426                                .get_or_insert(Default::default())
427                                .push(key.to_string());
428                        }
429                    }
430                }
431                if !errors.is_empty() {
432                    return Err(errors);
433                }
434            }
435            Node::Array(arr) => {
436                let mut errors = vec![];
437                let arr = arr.value().read();
438                if arr.len() > 0 {
439                    let mut schemas = vec![];
440                    for (i, child) in arr.iter().enumerate() {
441                        let child_parser = self.spawn(i, child.clone());
442                        match child_parser.parse() {
443                            Ok(v) => schemas.push(v),
444                            Err(errs) => {
445                                errors.extend(errs);
446                                continue;
447                            }
448                        }
449                    }
450                    if !errors.is_empty() {
451                        return Err(errors);
452                    }
453                    if let Some(compound) = self.parse_string_annotation("@compound")? {
454                        schema.schema_type = None;
455                        match compound.as_str() {
456                            "anyOf" => schema.any_of = Some(schemas),
457                            "oneOf" => schema.one_of = Some(schemas),
458                            "allOf" => schema.all_of = Some(schemas),
459                            _ => {
460                                return Err(vec![SchemaError::InvalidCompoundValue {
461                                    keys: self.keys.join(KeyOrIndex::annotation("@compound")),
462                                }]);
463                            }
464                        }
465                    } else if arr.len() == 1 {
466                        schema.items = Some(OneOrMultiSchemas::new(schemas));
467                    } else {
468                        schema.items = Some(OneOrMultiSchemas::new(schemas))
469                    }
470                }
471            }
472            _ => {}
473        }
474        if self.exist_annotation("@anytype") {
475            schema.schema_type = None;
476        }
477        if !def_value.is_empty() {
478            self.defs.borrow_mut().insert(def_value.clone(), schema);
479            return Ok(Schema {
480                ref_value: Some(format!("{}{}", self.ref_prefix, def_value)),
481                ..Default::default()
482            });
483        }
484        Ok(schema)
485    }
486
487    fn spawn(&self, key: impl Into<KeyOrIndex>, node: Node) -> Self {
488        Self {
489            node,
490            keys: self.keys.clone().join(key.into()),
491            defs: self.defs.clone(),
492            ref_prefix: self.ref_prefix.clone(),
493            prefer_optional: self.prefer_optional,
494        }
495    }
496
497    fn exist_annotation(&self, name: &str) -> bool {
498        self.node.get(&KeyOrIndex::annotation(name)).is_some()
499    }
500
501    fn parse_object_annotation<T: DeserializeOwned>(&self, name: &str) -> SchemaResult<Option<T>> {
502        match self.node.get_as_object(name) {
503            Some((key, Some(value))) => {
504                let value = Node::from(value).to_plain_json();
505                match serde_json::from_value(value) {
506                    Ok(v) => Ok(Some(v)),
507                    Err(err) => Err(vec![SchemaError::InvalidSchemaValue {
508                        keys: self.keys.clone().join(key),
509                        error: err.to_string(),
510                    }]),
511                }
512            }
513            Some((key, None)) => Err(vec![SchemaError::UnexpectedType {
514                keys: self.keys.clone().join(key),
515            }]),
516            None => Ok(None),
517        }
518    }
519
520    fn parse_string_annotation(&self, name: &str) -> SchemaResult<Option<String>> {
521        match self.node.get_as_string(name) {
522            Some((_, Some(value))) => Ok(Some(value.value().to_string())),
523            Some((key, None)) => Err(vec![SchemaError::UnexpectedType {
524                keys: self.keys.clone().join(key),
525            }]),
526            None => Ok(None),
527        }
528    }
529}
530
531fn pointer_impl<'a>(
532    result: &mut Vec<&'a Schema>,
533    root_schema: &'a Schema,
534    local_schema: &'a Schema,
535    keys: &Keys,
536) {
537    let local_schema = match resolve(root_schema, local_schema) {
538        Some(v) => v,
539        None => return,
540    };
541    if let Some(schemas) = local_schema
542        .one_of
543        .as_ref()
544        .or(local_schema.any_of.as_ref())
545        .or(local_schema.all_of.as_ref())
546    {
547        for schema in schemas.iter() {
548            pointer_impl(result, root_schema, schema, keys);
549        }
550    } else {
551        match keys.shift() {
552            None => {
553                result.push(local_schema);
554            }
555            Some((key, keys)) => match key {
556                KeyOrIndex::Index(index) => {
557                    if let Some(local_schema) = local_schema.items.as_ref() {
558                        match local_schema.value.as_ref() {
559                            Either::Left(local_schema) => {
560                                pointer_impl(result, root_schema, local_schema, &keys)
561                            }
562                            Either::Right(schemas) => {
563                                if let Some(local_schema) = schemas.get(index) {
564                                    pointer_impl(result, root_schema, local_schema, &keys)
565                                }
566                            }
567                        }
568                    }
569                }
570                KeyOrIndex::Key(key) => {
571                    if let Some(local_schema) = local_schema
572                        .properties
573                        .as_ref()
574                        .and_then(|v| v.get(key.value()))
575                    {
576                        pointer_impl(result, root_schema, local_schema, &keys)
577                    }
578                    if let Some(schemas) = local_schema.pattern_properties.as_ref() {
579                        for (pat, local_schema) in schemas.iter() {
580                            if let Ok(re) = Regex::new(pat) {
581                                if let Ok(true) = re.is_match(key.value()) {
582                                    pointer_impl(result, root_schema, local_schema, &keys)
583                                }
584                            }
585                        }
586                    }
587                    if let Some(local_schema) = local_schema
588                        .additional_properties
589                        .as_ref()
590                        .and_then(|v| v.value.as_ref().right())
591                    {
592                        pointer_impl(result, root_schema, local_schema, &keys)
593                    }
594                }
595            },
596        }
597    }
598}
599
600fn resolve<'a>(root_schema: &'a Schema, local_schema: &'a Schema) -> Option<&'a Schema> {
601    let schema = match local_schema.ref_value.as_ref() {
602        Some(ref_value) => {
603            if ref_value == "#" {
604                root_schema
605            } else {
606                match root_schema.defs.as_ref().and_then(|defs| {
607                    REF_REGEX
608                        .captures(ref_value)
609                        .ok()
610                        .flatten()
611                        .and_then(|v| v.get(1))
612                        .and_then(|v| defs.get(v.as_str()))
613                }) {
614                    Some(v) => v,
615                    None => return None,
616                }
617            }
618        }
619        None => local_schema,
620    };
621    Some(schema)
622}