boon/
draft.rs

1use std::{
2    collections::{hash_map::Entry, HashMap},
3    str::FromStr,
4};
5
6use once_cell::sync::Lazy;
7use serde_json::{Map, Value};
8use url::Url;
9
10use crate::{compiler::*, root::Resource, util::*, SchemaIndex, Schemas};
11
12const POS_SELF: u8 = 1 << 0;
13const POS_PROP: u8 = 1 << 1;
14const POS_ITEM: u8 = 1 << 2;
15
16pub(crate) static DRAFT4: Lazy<Draft> = Lazy::new(|| Draft {
17    version: 4,
18    id: "id",
19    url: "http://json-schema.org/draft-04/schema",
20    subschemas: HashMap::from([
21        // type agnostic
22        ("definitions", POS_PROP),
23        ("not", POS_SELF),
24        ("allOf", POS_ITEM),
25        ("anyOf", POS_ITEM),
26        ("oneOf", POS_ITEM),
27        // object
28        ("properties", POS_PROP),
29        ("additionalProperties", POS_SELF),
30        ("patternProperties", POS_PROP),
31        // array
32        ("items", POS_SELF | POS_ITEM),
33        ("additionalItems", POS_SELF),
34        ("dependencies", POS_PROP),
35    ]),
36    vocab_prefix: "",
37    all_vocabs: vec![],
38    default_vocabs: vec![],
39});
40
41pub(crate) static DRAFT6: Lazy<Draft> = Lazy::new(|| {
42    let mut subschemas = DRAFT4.subschemas.clone();
43    subschemas.extend([("propertyNames", POS_SELF), ("contains", POS_SELF)]);
44    Draft {
45        version: 6,
46        id: "$id",
47        url: "http://json-schema.org/draft-06/schema",
48        subschemas,
49        vocab_prefix: "",
50        all_vocabs: vec![],
51        default_vocabs: vec![],
52    }
53});
54
55pub(crate) static DRAFT7: Lazy<Draft> = Lazy::new(|| {
56    let mut subschemas = DRAFT6.subschemas.clone();
57    subschemas.extend([("if", POS_SELF), ("then", POS_SELF), ("else", POS_SELF)]);
58    Draft {
59        version: 7,
60        id: "$id",
61        url: "http://json-schema.org/draft-07/schema",
62        subschemas,
63        vocab_prefix: "",
64        all_vocabs: vec![],
65        default_vocabs: vec![],
66    }
67});
68
69pub(crate) static DRAFT2019: Lazy<Draft> = Lazy::new(|| {
70    let mut subschemas = DRAFT7.subschemas.clone();
71    subschemas.extend([
72        ("$defs", POS_PROP),
73        ("dependentSchemas", POS_PROP),
74        ("unevaluatedProperties", POS_SELF),
75        ("unevaluatedItems", POS_SELF),
76        ("contentSchema", POS_SELF),
77    ]);
78    Draft {
79        version: 2019,
80        id: "$id",
81        url: "https://json-schema.org/draft/2019-09/schema",
82        subschemas,
83        vocab_prefix: "https://json-schema.org/draft/2019-09/vocab/",
84        all_vocabs: vec![
85            "core",
86            "applicator",
87            "validation",
88            "meta-data",
89            "format",
90            "content",
91        ],
92        default_vocabs: vec!["core", "applicator", "validation"],
93    }
94});
95
96pub(crate) static DRAFT2020: Lazy<Draft> = Lazy::new(|| {
97    let mut subschemas = DRAFT2019.subschemas.clone();
98    subschemas.extend([("prefixItems", POS_ITEM)]);
99    Draft {
100        version: 2020,
101        id: "$id",
102        url: "https://json-schema.org/draft/2020-12/schema",
103        subschemas,
104        vocab_prefix: "https://json-schema.org/draft/2020-12/vocab/",
105        all_vocabs: vec![
106            "core",
107            "applicator",
108            "unevaluated",
109            "validation",
110            "meta-data",
111            "format-annotation",
112            "format-assertion",
113            "content",
114        ],
115        default_vocabs: vec!["core", "applicator", "unevaluated", "validation"],
116    }
117});
118
119pub(crate) static STD_METASCHEMAS: Lazy<Schemas> =
120    Lazy::new(|| load_std_metaschemas().expect("std metaschemas must be compilable"));
121
122pub(crate) fn latest() -> &'static Draft {
123    crate::Draft::default().internal()
124}
125
126// --
127
128pub(crate) struct Draft {
129    pub(crate) version: usize,
130    pub(crate) url: &'static str,
131    id: &'static str,                         // property name used to represent id
132    subschemas: HashMap<&'static str, u8>,    // location of subschemas
133    pub(crate) vocab_prefix: &'static str,    // prefix used for vocabulary
134    pub(crate) all_vocabs: Vec<&'static str>, // names of supported vocabs
135    pub(crate) default_vocabs: Vec<&'static str>, // names of default vocabs
136}
137
138impl Draft {
139    pub(crate) fn from_url(url: &str) -> Option<&'static Draft> {
140        let (mut url, frag) = split(url);
141        if !frag.is_empty() {
142            return None;
143        }
144        if let Some(s) = url.strip_prefix("http://") {
145            url = s;
146        }
147        if let Some(s) = url.strip_prefix("https://") {
148            url = s;
149        }
150        match url {
151            "json-schema.org/schema" => Some(latest()),
152            "json-schema.org/draft/2020-12/schema" => Some(&DRAFT2020),
153            "json-schema.org/draft/2019-09/schema" => Some(&DRAFT2019),
154            "json-schema.org/draft-07/schema" => Some(&DRAFT7),
155            "json-schema.org/draft-06/schema" => Some(&DRAFT6),
156            "json-schema.org/draft-04/schema" => Some(&DRAFT4),
157            _ => None,
158        }
159    }
160
161    fn get_schema(&self) -> Option<SchemaIndex> {
162        let url = match self.version {
163            2020 => "https://json-schema.org/draft/2020-12/schema",
164            2019 => "https://json-schema.org/draft/2019-09/schema",
165            7 => "http://json-schema.org/draft-07/schema",
166            6 => "http://json-schema.org/draft-06/schema",
167            4 => "http://json-schema.org/draft-04/schema",
168            _ => return None,
169        };
170        let up = UrlPtr {
171            url: Url::parse(url).unwrap_or_else(|_| panic!("{url} should be valid url")),
172            ptr: "".into(),
173        };
174        STD_METASCHEMAS.get_by_loc(&up).map(|s| s.idx)
175    }
176
177    pub(crate) fn validate(&self, up: &UrlPtr, v: &Value) -> Result<(), CompileError> {
178        let Some(sch) = self.get_schema() else {
179            return Err(CompileError::Bug(
180                format!("no metaschema preloaded for draft {}", self.version).into(),
181            ));
182        };
183        STD_METASCHEMAS
184            .validate(v, sch)
185            .map_err(|src| CompileError::ValidationError {
186                url: up.to_string(),
187                src: src.clone_static(),
188            })
189    }
190
191    fn get_id<'a>(&self, obj: &'a Map<String, Value>) -> Option<&'a str> {
192        if self.version < 2019 && obj.contains_key("$ref") {
193            return None; // All other properties in a "$ref" object MUST be ignored
194        }
195        let Some(Value::String(id)) = obj.get(self.id) else {
196            return None;
197        };
198        let (id, _) = split(id); // ignore fragment
199        Some(id).filter(|id| !id.is_empty())
200    }
201
202    pub(crate) fn get_vocabs(
203        &self,
204        url: &Url,
205        doc: &Value,
206    ) -> Result<Option<Vec<String>>, CompileError> {
207        if self.version < 2019 {
208            return Ok(None);
209        }
210        let Value::Object(obj) = doc else {
211            return Ok(None);
212        };
213
214        let Some(Value::Object(obj)) = obj.get("$vocabulary") else {
215            return Ok(None);
216        };
217
218        let mut vocabs = vec![];
219        for (vocab, reqd) in obj {
220            if let Value::Bool(true) = reqd {
221                let name = vocab
222                    .strip_prefix(self.vocab_prefix)
223                    .filter(|name| self.all_vocabs.contains(name));
224                if let Some(name) = name {
225                    vocabs.push(name.to_owned()); // todo: avoid alloc
226                } else {
227                    return Err(CompileError::UnsupportedVocabulary {
228                        url: url.as_str().to_owned(),
229                        vocabulary: vocab.to_owned(),
230                    });
231                }
232            }
233        }
234        Ok(Some(vocabs))
235    }
236
237    // collects anchors/dynamic_achors from `sch` into `res`.
238    // note this does not collect from subschemas in sch.
239    pub(crate) fn collect_anchors(
240        &self,
241        sch: &Value,
242        sch_ptr: &JsonPointer,
243        res: &mut Resource,
244        url: &Url,
245    ) -> Result<(), CompileError> {
246        let Value::Object(obj) = sch else {
247            return Ok(());
248        };
249
250        let mut add_anchor = |anchor: Anchor| match res.anchors.entry(anchor) {
251            Entry::Occupied(entry) => {
252                if entry.get() == sch_ptr {
253                    // anchor with same root_ptr already exists
254                    return Ok(());
255                }
256                Err(CompileError::DuplicateAnchor {
257                    url: url.as_str().to_owned(),
258                    anchor: entry.key().to_string(),
259                    ptr1: entry.get().to_string(),
260                    ptr2: sch_ptr.to_string(),
261                })
262            }
263            entry => {
264                entry.or_insert(sch_ptr.to_owned());
265                Ok(())
266            }
267        };
268
269        if self.version < 2019 {
270            if obj.contains_key("$ref") {
271                return Ok(()); // All other properties in a "$ref" object MUST be ignored
272            }
273            // anchor is specified in id
274            if let Some(Value::String(id)) = obj.get(self.id) {
275                let Ok((_, frag)) = Fragment::split(id) else {
276                    let loc = UrlFrag::format(url, sch_ptr.as_str());
277                    return Err(CompileError::ParseAnchorError { loc });
278                };
279                if let Fragment::Anchor(anchor) = frag {
280                    add_anchor(anchor)?;
281                };
282                return Ok(());
283            }
284        }
285        if self.version >= 2019 {
286            if let Some(Value::String(anchor)) = obj.get("$anchor") {
287                add_anchor(anchor.as_str().into())?;
288            }
289        }
290        if self.version >= 2020 {
291            if let Some(Value::String(anchor)) = obj.get("$dynamicAnchor") {
292                add_anchor(anchor.as_str().into())?;
293                res.dynamic_anchors.insert(anchor.as_str().into());
294            }
295        }
296        Ok(())
297    }
298
299    // error is json-ptr to invalid id
300    pub(crate) fn collect_resources(
301        &self,
302        sch: &Value,
303        base: &Url,           // base of json
304        sch_ptr: JsonPointer, // ptr of json
305        url: &Url,
306        resources: &mut HashMap<JsonPointer, Resource>,
307    ) -> Result<(), CompileError> {
308        if resources.contains_key(&sch_ptr) {
309            // resources are already collected
310            return Ok(());
311        }
312        if let Value::Bool(_) = sch {
313            if sch_ptr.is_empty() {
314                // root resource
315                resources.insert(sch_ptr.clone(), Resource::new(sch_ptr, base.clone()));
316            }
317            return Ok(());
318        }
319
320        let Value::Object(obj) = sch else {
321            return Ok(());
322        };
323
324        let mut base = base;
325        let tmp;
326        let res = if let Some(id) = self.get_id(obj) {
327            let Ok(id) = UrlFrag::join(base, id) else {
328                let loc = UrlFrag::format(url, sch_ptr.as_str());
329                return Err(CompileError::ParseIdError { loc });
330            };
331            tmp = id.url;
332            base = &tmp;
333            Some(Resource::new(sch_ptr.clone(), base.clone()))
334        } else if sch_ptr.is_empty() {
335            // root resource
336            Some(Resource::new(sch_ptr.clone(), base.clone()))
337        } else {
338            None
339        };
340        if let Some(res) = res {
341            if let Some(dup) = resources.values_mut().find(|res| res.id == *base) {
342                return Err(CompileError::DuplicateId {
343                    url: url.to_string(),
344                    id: base.to_string(),
345                    ptr1: res.ptr.to_string(),
346                    ptr2: dup.ptr.to_string(),
347                });
348            }
349            resources.insert(sch_ptr.clone(), res);
350        }
351
352        // collect anchors into base resource
353        if let Some(res) = resources.values_mut().find(|res| res.id == *base) {
354            self.collect_anchors(sch, &sch_ptr, res, url)?;
355        } else {
356            debug_assert!(false, "base resource must exist");
357        }
358
359        for (&kw, &pos) in &self.subschemas {
360            let Some(v) = obj.get(kw) else {
361                continue;
362            };
363            if pos & POS_SELF != 0 {
364                let ptr = sch_ptr.append(kw);
365                self.collect_resources(v, base, ptr, url, resources)?;
366            }
367            if pos & POS_ITEM != 0 {
368                if let Value::Array(arr) = v {
369                    for (i, item) in arr.iter().enumerate() {
370                        let ptr = sch_ptr.append2(kw, &i.to_string());
371                        self.collect_resources(item, base, ptr, url, resources)?;
372                    }
373                }
374            }
375            if pos & POS_PROP != 0 {
376                if let Value::Object(obj) = v {
377                    for (pname, pvalue) in obj {
378                        let ptr = sch_ptr.append2(kw, pname);
379                        self.collect_resources(pvalue, base, ptr, url, resources)?;
380                    }
381                }
382            }
383        }
384        Ok(())
385    }
386
387    pub(crate) fn is_subschema(&self, ptr: &str) -> bool {
388        if ptr.is_empty() {
389            return true;
390        }
391
392        fn split(mut ptr: &str) -> (&str, &str) {
393            ptr = &ptr[1..]; // rm `/` prefix
394            if let Some(i) = ptr.find('/') {
395                (&ptr[..i], &ptr[i..])
396            } else {
397                (ptr, "")
398            }
399        }
400
401        let (tok, ptr) = split(ptr);
402
403        if let Some(&pos) = self.subschemas.get(tok) {
404            if pos & POS_SELF != 0 && self.is_subschema(ptr) {
405                return true;
406            }
407            if !ptr.is_empty() {
408                if pos & POS_PROP != 0 {
409                    let (_, ptr) = split(ptr);
410                    if self.is_subschema(ptr) {
411                        return true;
412                    }
413                }
414                if pos & POS_ITEM != 0 {
415                    let (tok, ptr) = split(ptr);
416                    if usize::from_str(tok).is_ok() && self.is_subschema(ptr) {
417                        return true;
418                    }
419                }
420            }
421        }
422
423        false
424    }
425}
426
427fn load_std_metaschemas() -> Result<Schemas, CompileError> {
428    let mut schemas = Schemas::new();
429    let mut compiler = Compiler::new();
430    compiler.enable_format_assertions();
431    compiler.compile("https://json-schema.org/draft/2020-12/schema", &mut schemas)?;
432    compiler.compile("https://json-schema.org/draft/2019-09/schema", &mut schemas)?;
433    compiler.compile("http://json-schema.org/draft-07/schema", &mut schemas)?;
434    compiler.compile("http://json-schema.org/draft-06/schema", &mut schemas)?;
435    compiler.compile("http://json-schema.org/draft-04/schema", &mut schemas)?;
436    Ok(schemas)
437}
438
439#[cfg(test)]
440mod tests {
441    use crate::{Compiler, Schemas};
442
443    use super::*;
444
445    #[test]
446    fn test_meta() {
447        let mut schemas = Schemas::default();
448        let mut compiler = Compiler::default();
449        let v: Value = serde_json::from_str(include_str!("metaschemas/draft-04/schema")).unwrap();
450        let url = "https://json-schema.org/draft-04/schema";
451        compiler.add_resource(url, v).unwrap();
452        compiler.compile(url, &mut schemas).unwrap();
453    }
454
455    #[test]
456    fn test_from_url() {
457        let tests = [
458            ("http://json-schema.org/draft/2020-12/schema", Some(2020)), // http url
459            ("https://json-schema.org/draft/2020-12/schema", Some(2020)), // https url
460            ("https://json-schema.org/schema", Some(latest().version)),  // latest
461            ("https://json-schema.org/draft-04/schema", Some(4)),
462        ];
463        for (url, version) in tests {
464            let got = Draft::from_url(url).map(|d| d.version);
465            assert_eq!(got, version, "for {url}");
466        }
467    }
468
469    #[test]
470    fn test_collect_ids() {
471        let url = Url::parse("http://a.com/schema.json").unwrap();
472        let json: Value = serde_json::from_str(
473            r#"{
474                "id": "http://a.com/schemas/schema.json",
475                "definitions": {
476                    "s1": { "id": "http://a.com/definitions/s1" },
477                    "s2": {
478                        "id": "../s2",
479                        "items": [
480                            { "id": "http://c.com/item" },
481                            { "id": "http://d.com/item" }
482                        ]
483                    },
484                    "s3": {
485                        "definitions": {
486                            "s1": {
487                                "id": "s3",
488                                "items": {
489                                    "id": "http://b.com/item"
490                                }
491                            }
492                        }
493                    },
494                    "s4": { "id": "http://e.com/def#abcd" }
495                }
496            }"#,
497        )
498        .unwrap();
499
500        let want = {
501            let mut m = HashMap::new();
502            m.insert("", "http://a.com/schemas/schema.json"); // root with id
503            m.insert("/definitions/s1", "http://a.com/definitions/s1");
504            m.insert("/definitions/s2", "http://a.com/s2"); // relative id
505            m.insert("/definitions/s3/definitions/s1", "http://a.com/schemas/s3");
506            m.insert("/definitions/s3/definitions/s1/items", "http://b.com/item");
507            m.insert("/definitions/s2/items/0", "http://c.com/item");
508            m.insert("/definitions/s2/items/1", "http://d.com/item");
509            m.insert("/definitions/s4", "http://e.com/def"); // id with fragments
510            m
511        };
512        let mut got = HashMap::new();
513        DRAFT4
514            .collect_resources(&json, &url, "".into(), &url, &mut got)
515            .unwrap();
516        let got = got
517            .iter()
518            .map(|(k, v)| (k.as_str(), v.id.as_str()))
519            .collect::<HashMap<&str, &str>>();
520        assert_eq!(got, want);
521    }
522
523    #[test]
524    fn test_collect_anchors() {
525        let url = Url::parse("http://a.com/schema.json").unwrap();
526        let json: Value = serde_json::from_str(
527            r#"{
528                "$defs": {
529                    "s2": {
530                        "$id": "http://b.com",
531                        "$anchor": "b1", 
532                        "items": [
533                            { "$anchor": "b2" },
534                            {
535                                "$id": "http//c.com",
536                                "items": [
537                                    {"$anchor": "c1"},
538                                    {"$dynamicAnchor": "c2"}
539                                ]
540                            },
541                            { "$dynamicAnchor": "b3" }
542                        ]
543                    }
544                }
545            }"#,
546        )
547        .unwrap();
548        let mut resources = HashMap::new();
549        DRAFT2020
550            .collect_resources(&json, &url, "".into(), &url, &mut resources)
551            .unwrap();
552        assert!(resources.get("").unwrap().anchors.is_empty());
553        assert_eq!(resources.get("/$defs/s2").unwrap().anchors, {
554            let mut want = HashMap::new();
555            want.insert("b1".into(), "/$defs/s2".into());
556            want.insert("b2".into(), "/$defs/s2/items/0".into());
557            want.insert("b3".into(), "/$defs/s2/items/2".into());
558            want
559        });
560        assert_eq!(resources.get("/$defs/s2/items/1").unwrap().anchors, {
561            let mut want = HashMap::new();
562            want.insert("c1".into(), "/$defs/s2/items/1/items/0".into());
563            want.insert("c2".into(), "/$defs/s2/items/1/items/1".into());
564            want
565        });
566    }
567
568    #[test]
569    fn test_is_subschema() {
570        let tests = vec![("/allOf/0", true), ("/allOf/$defs", false)];
571        for test in tests {
572            let got = DRAFT2020.is_subschema(test.0);
573            assert_eq!(got, test.1, "{}", test.0);
574        }
575    }
576}