1use std::{
25 collections::{BTreeMap, HashMap, HashSet},
26 str::FromStr,
27};
28
29use crate::{
30 attribute::{self, Attribute},
31 datamodel::DataModel,
32 json::schema::{AnyOfItemType, DataType, Item, Property, ReferenceItemType},
33 markdown::frontmatter::FrontMatter,
34 object::{Enumeration, Object},
35 option::AttrOption,
36 validation::BASIC_TYPES,
37};
38
39use super::schema::{self, PrimitiveType};
40
41const SCHEMA: &str = "https://json-schema.org/draft/2020-12/schema";
42
43pub fn to_json_schema(
55 model: &DataModel,
56 root: &str,
57 openai: bool,
58) -> Result<schema::SchemaObject, String> {
59 let root_object = retrieve_object(model, root)?;
60
61 let mut schema_object = schema::SchemaObject::try_from(root_object)?;
62 let mut used_types = HashSet::new();
63 let mut used_enums = HashSet::new();
64
65 collect_definitions(root_object, model, &mut used_types, &mut used_enums)?;
66
67 let definitions = collect_definitions_from_model(model, &used_types, &used_enums)?;
68
69 schema_object.schema = Some(SCHEMA.to_string());
70 schema_object.definitions = definitions;
71
72 if let Some(config) = model.config.clone() {
73 post_process_schema(&mut schema_object, &config, openai, &used_enums)?;
74 }
75
76 Ok(schema_object)
77}
78
79fn retrieve_object<'a>(model: &'a DataModel, name: &'a str) -> Result<&'a Object, String> {
90 model
91 .objects
92 .iter()
93 .find(|obj| obj.name == name)
94 .ok_or(format!("Object {name} not found"))
95}
96
97fn retrieve_enum<'a>(model: &'a DataModel, name: &'a str) -> Result<&'a Enumeration, String> {
108 model
109 .enums
110 .iter()
111 .find(|e| e.name == name)
112 .ok_or(format!("Enum {name} not found"))
113}
114
115fn collect_definitions_from_model(
127 model: &DataModel,
128 used_types: &HashSet<String>,
129 used_enums: &HashSet<String>,
130) -> Result<BTreeMap<String, schema::SchemaType>, String> {
131 let mut definitions = BTreeMap::new();
132
133 for obj_name in used_types {
134 let obj = retrieve_object(model, obj_name)?;
135 definitions.insert(obj_name.clone(), schema::SchemaType::try_from(obj)?);
136 }
137
138 for enum_name in used_enums {
139 let enum_object = retrieve_enum(model, enum_name)?;
140 definitions.insert(
141 enum_name.clone(),
142 schema::SchemaType::try_from(enum_object)?,
143 );
144 }
145
146 Ok(definitions)
147}
148
149fn collect_definitions(
162 object: &Object,
163 model: &DataModel,
164 used_types: &mut HashSet<String>,
165 used_enums: &mut HashSet<String>,
166) -> Result<(), String> {
167 for attr in object.attributes.iter() {
168 for dtype in attr.dtypes.iter() {
169 if BASIC_TYPES.contains(&dtype.as_str()) || used_types.contains(dtype) {
170 continue;
171 }
172
173 let object = model.objects.iter().find(|obj| obj.name == *dtype);
174 let enumeration = model.enums.iter().find(|e| e.name == *dtype);
175
176 if let Some(object) = object {
177 used_types.insert(dtype.clone());
178 collect_definitions(object, model, used_types, used_enums)?;
179 } else if let Some(enumeration) = enumeration {
180 used_enums.insert(enumeration.name.clone());
181 } else {
182 return Err(format!("Object or enumeration {dtype} not found"));
183 }
184 }
185 }
186
187 Ok(())
188}
189
190fn resolve_prefixes(schema: &mut schema::SchemaObject, prefixes: &HashMap<String, String>) {
197 for (_, property) in schema.properties.iter_mut() {
198 if let Some(reference) = property.term.clone() {
199 let (prefix, term) = reference.split_once(":").unwrap_or(("", ""));
200 if let Some(prefix) = prefixes.get(prefix) {
201 property.term = Some(format!("{prefix}{term}"));
202 }
203 }
204 }
205}
206
207fn post_process_schema(
215 schema_object: &mut schema::SchemaObject,
216 config: &FrontMatter,
217 openai: bool,
218 used_enums: &HashSet<String>,
219) -> Result<(), String> {
220 schema_object.id = Some(config.repo.clone());
221 post_process_object(schema_object, config, openai, used_enums)?;
222
223 for (_, definition) in schema_object.definitions.iter_mut() {
224 if let schema::SchemaType::Object(definition) = definition {
225 post_process_object(definition, config, openai, used_enums)?;
226 }
227 }
228
229 Ok(())
230}
231
232fn post_process_object(
241 object: &mut schema::SchemaObject,
242 config: &FrontMatter,
243 openai: bool,
244 used_enums: &HashSet<String>,
245) -> Result<(), String> {
246 if let Some(prefixes) = &config.prefixes {
247 resolve_prefixes(object, prefixes);
248 }
249 if openai {
250 object.schema = None;
251 object.id = None;
252 remove_options(object);
253 set_required_and_nullable(object);
254 }
255
256 for (_, property) in object.properties.iter_mut() {
257 if let Some(reference) = &property.reference {
258 if used_enums.contains(
259 reference
260 .split("/")
261 .last()
262 .ok_or(format!("Failed to split reference: {reference}"))?,
263 ) {
264 if openai {
265 property.dtype = None;
266 } else {
267 property.dtype = Some(schema::DataType::String);
268 }
269 }
270 }
271 }
272
273 Ok(())
274}
275
276fn remove_options(schema: &mut schema::SchemaObject) {
282 for (_, property) in schema.properties.iter_mut() {
283 property.options = HashMap::new();
284 }
285}
286
287fn set_required_and_nullable(schema: &mut schema::SchemaObject) {
293 let mut new_required = Vec::new();
294
295 for (name, property) in &mut schema.properties {
296 clean_reference_property(property);
297 convert_one_of_to_any_of(property);
298
299 if !schema.required.contains(name) {
300 new_required.push(name.clone());
301 make_property_nullable(property);
302 }
303 }
304
305 finalize_schema_requirements(schema, new_required);
306}
307
308fn clean_reference_property(property: &mut schema::Property) {
314 if property.reference.is_some() {
315 property.description = None;
316 property.title = None;
317 property.dtype = None;
318 }
319}
320
321fn convert_one_of_to_any_of(property: &mut schema::Property) {
327 if let Some(Item::OneOfItem(one_of)) = &mut property.items {
328 property.items = Some(Item::AnyOfItem(AnyOfItemType {
329 any_of: one_of.one_of.clone(),
330 }));
331 }
332}
333
334fn make_property_nullable(property: &mut schema::Property) {
340 let mut any_of = vec![Item::PropertyItem(Box::new(Property {
341 dtype: Some(DataType::Null),
342 ..Default::default()
343 }))];
344
345 handle_property_data_type(property, &mut any_of);
346 handle_property_reference(property, &mut any_of);
347 handle_property_one_of(property, &mut any_of);
348
349 if !matches!(property.dtype, Some(DataType::Array)) {
350 property.any_of = Some(any_of);
351 }
352}
353
354fn handle_property_data_type(property: &mut schema::Property, any_of: &mut Vec<Item>) {
361 if let Some(dtype) = &property.dtype {
362 let is_array = matches!(dtype, DataType::Array);
363
364 match dtype {
365 DataType::Array => {
366 any_of.push(Item::PropertyItem(Box::new(Property {
367 dtype: Some(DataType::Null),
368 ..Default::default()
369 })));
370 }
371 DataType::Object => {
372 property.dtype = None;
373 }
374 DataType::Multiple(data_types) => {
375 add_multiple_data_types(any_of, data_types);
376 }
377 _ => {
378 any_of.push(Item::PropertyItem(Box::new(Property {
379 dtype: Some(dtype.clone()),
380 ..Default::default()
381 })));
382 }
383 }
384
385 if !is_array {
386 property.dtype = None;
387 }
388 }
389}
390
391fn add_multiple_data_types(any_of: &mut Vec<Item>, data_types: &[DataType]) {
398 for dtype in data_types.iter() {
399 if dtype.is_not_object() || dtype.is_array() {
400 any_of.push(Item::PropertyItem(Box::new(Property {
401 dtype: Some(dtype.clone()),
402 ..Default::default()
403 })));
404 }
405 }
406}
407
408fn handle_property_reference(property: &mut schema::Property, any_of: &mut Vec<Item>) {
415 if let Some(reference) = &property.reference {
416 any_of.push(Item::ReferenceItem(ReferenceItemType {
417 reference: reference.clone(),
418 }));
419 property.reference = None;
420 property.dtype = None;
421 property.title = None;
422 property.description = None;
423 }
424}
425
426fn handle_property_one_of(property: &mut schema::Property, any_of: &mut Vec<Item>) {
433 if let Some(one_of) = &property.one_of {
434 any_of.extend(one_of.clone());
435 property.one_of = None;
436 }
437}
438
439fn finalize_schema_requirements(schema: &mut schema::SchemaObject, new_required: Vec<String>) {
446 schema.additional_properties = false;
447 schema.required.extend(new_required);
448 schema.required.sort();
449}
450
451impl TryFrom<&Enumeration> for schema::SchemaType {
452 type Error = String;
453
454 fn try_from(enumeration: &Enumeration) -> Result<Self, Self::Error> {
464 Ok(schema::SchemaType::Enum(schema::EnumObject::try_from(
465 enumeration,
466 )?))
467 }
468}
469
470impl TryFrom<&Object> for schema::SchemaType {
471 type Error = String;
472
473 fn try_from(obj: &Object) -> Result<Self, Self::Error> {
483 Ok(schema::SchemaType::Object(schema::SchemaObject::try_from(
484 obj,
485 )?))
486 }
487}
488
489impl TryFrom<&Object> for schema::SchemaObject {
490 type Error = String;
491
492 fn try_from(obj: &Object) -> Result<Self, Self::Error> {
502 let properties: Result<BTreeMap<String, schema::Property>, String> = obj
503 .attributes
504 .iter()
505 .map(|attr| -> Result<(String, schema::Property), String> {
506 Ok((attr.name.clone(), schema::Property::try_from(attr)?))
507 })
508 .collect();
509
510 let required: Vec<String> = obj
511 .attributes
512 .iter()
513 .filter(|attr| attr.required)
514 .map(|attr| attr.name.clone())
515 .collect();
516
517 Ok(schema::SchemaObject {
518 title: obj.name.clone(),
519 dtype: Some(schema::DataType::Object),
520 description: Some(obj.docstring.clone()),
521 properties: properties?,
522 definitions: BTreeMap::new(),
523 required,
524 optional: Vec::new(),
525 schema: None,
526 id: None,
527 additional_properties: false,
528 })
529 }
530}
531
532impl TryFrom<&Enumeration> for schema::EnumObject {
533 type Error = String;
534
535 fn try_from(enumeration: &Enumeration) -> Result<Self, Self::Error> {
545 let values = enumeration
546 .mappings
547 .values()
548 .cloned()
549 .collect::<Vec<String>>();
550
551 Ok(schema::EnumObject {
552 title: enumeration.name.clone(),
553 dtype: Some(schema::DataType::String),
554 description: Some(enumeration.docstring.clone()),
555 enum_values: values,
556 })
557 }
558}
559
560impl TryFrom<&Attribute> for schema::Property {
561 type Error = String;
562
563 fn try_from(attr: &Attribute) -> Result<Self, Self::Error> {
573 let mut dtype = (!attr.is_enum)
574 .then(|| schema::DataType::try_from(attr))
575 .transpose()?;
576
577 let options: HashMap<String, PrimitiveType> = attr
578 .options
579 .iter()
580 .map(|o| -> Result<(String, PrimitiveType), String> {
581 Ok((o.key().to_string(), o.try_into()?))
582 })
583 .collect::<Result<HashMap<String, PrimitiveType>, String>>()?;
584
585 let reference: Option<String> = if (attr.is_enum
586 || matches!(dtype, Some(schema::DataType::Object)))
587 && attr.dtypes.len() == 1
588 {
589 Some(format!("#/$defs/{}", attr.dtypes[0]))
590 } else {
591 None
592 };
593
594 let items: Option<schema::Item> = attr.into();
595 let one_of = (!attr.is_array).then(|| attr.into());
596 let description = (!attr.docstring.is_empty()).then(|| attr.docstring.clone());
597 let enum_values = if attr.is_enum { Some(Vec::new()) } else { None };
598
599 if attr.dtypes.len() > 1 && !attr.is_array {
600 dtype = None;
602 }
603
604 let default: Option<PrimitiveType> = if let Some(default) = attr.default.clone() {
606 process_default(default, &dtype)
607 } else {
608 None
609 };
610
611 Ok(schema::Property {
612 title: Some(attr.name.clone()),
613 dtype,
614 default,
615 description,
616 term: attr.term.clone(),
617 reference,
618 options,
619 one_of,
620 items,
621 enum_values,
622 any_of: None,
623 all_of: None,
624 examples: Vec::new(),
625 ..Default::default()
626 })
627 }
628}
629
630fn process_default(
641 default: attribute::DataType,
642 dtype: &Option<schema::DataType>,
643) -> Option<PrimitiveType> {
644 if matches!(dtype, Some(schema::DataType::String)) {
645 default
646 .as_string()
647 .map(|d| PrimitiveType::String(d.trim_matches('"').to_string()))
648 } else {
649 Some(default.into())
650 }
651}
652
653impl TryFrom<&Attribute> for schema::DataType {
654 type Error = String;
655
656 fn try_from(attr: &Attribute) -> Result<Self, Self::Error> {
670 if attr.is_array {
671 return Ok(schema::DataType::Array);
672 }
673
674 schema::DataType::try_from(
675 attr.dtypes
676 .first()
677 .ok_or(format!("No data types found for attribute: {}", attr.name))?,
678 )
679 }
680}
681
682impl From<&Attribute> for Option<schema::Item> {
684 fn from(attr: &Attribute) -> Self {
694 if !attr.is_array {
695 return None;
698 }
699
700 let one_of: Vec<schema::Item> = attr.into();
702
703 if one_of.is_empty() {
704 Some(process_dtype(&attr.dtypes[0]))
706 } else {
707 Some(schema::Item::OneOfItem(schema::OneOfItemType { one_of }))
708 }
709 }
710}
711
712impl From<&Attribute> for Vec<schema::Item> {
713 fn from(attr: &Attribute) -> Self {
723 if attr.dtypes.len() == 1 {
724 return Vec::new();
725 }
726
727 let mut items = Vec::new();
728 for dtype in attr.dtypes.iter() {
729 items.push(process_dtype(dtype));
730 }
731
732 items
733 }
734}
735
736fn process_dtype(dtype: &str) -> schema::Item {
746 match schema::DataType::from_str(dtype) {
747 Ok(basic_type) => schema::Item::PropertyItem(Box::new(schema::Property {
748 dtype: Some(basic_type),
749 ..Default::default()
750 })),
751 Err(_) => schema::Item::ReferenceItem(schema::ReferenceItemType {
752 reference: format!("#/$defs/{dtype}"),
753 }),
754 }
755}
756
757impl TryFrom<&AttrOption> for PrimitiveType {
758 type Error = String;
759
760 fn try_from(option: &AttrOption) -> Result<Self, Self::Error> {
761 let value = option.value();
762
763 if let Ok(float_val) = value.parse::<f64>() {
765 return Ok(PrimitiveType::Number(float_val));
766 }
767
768 if let Ok(bool_val) = value.parse::<bool>() {
769 return Ok(PrimitiveType::Boolean(bool_val));
770 }
771
772 if let Ok(int_val) = value.parse::<i64>() {
773 return Ok(PrimitiveType::Integer(int_val));
774 }
775
776 Ok(PrimitiveType::String(value))
778 }
779}
780
781#[cfg(test)]
782mod tests {
783 use serde_json::{json, Value};
784
785 use super::*;
786 use crate::attribute::Attribute;
787
788 #[test]
789 fn test_attribute_with_multiple_types() {
790 let attr = Attribute {
791 name: "test_attribute".to_string(),
792 is_array: false,
793 is_id: false,
794 dtypes: vec!["string".to_string(), "RefType".to_string()],
795 docstring: "".to_string(),
796 options: vec![],
797 term: None,
798 required: false,
799 default: None,
800 xml: None,
801 is_enum: false,
802 position: None,
803 import_prefix: None,
804 };
805
806 let property: schema::Property =
807 schema::Property::try_from(&attr).expect("Failed to convert Attribute to Property");
808
809 let serialized_property =
810 serde_json::to_value(&property).expect("Failed to serialize Property to JSON");
811
812 let expected_json = json!({
813 "title": "test_attribute",
814 "oneOf": [
815 {"type": "string"},
816 {"$ref": "#/$defs/RefType"},
817 ]
818 });
819
820 assert_eq!(serialized_property, expected_json);
821 }
822
823 #[test]
824 fn test_array_attribute() {
825 let attr = Attribute {
826 name: "test_attribute".to_string(),
827 is_array: true,
828 is_id: false,
829 dtypes: vec!["string".to_string(), "RefType".to_string()],
830 docstring: "".to_string(),
831 options: vec![],
832 term: None,
833 required: false,
834 default: None,
835 xml: None,
836 is_enum: false,
837 position: None,
838 import_prefix: None,
839 };
840
841 let property: schema::Property =
842 schema::Property::try_from(&attr).expect("Failed to convert Attribute to Property");
843 let serialized_property: Value =
844 serde_json::to_value(&property).expect("Failed to serialize Property to JSON");
845
846 let expected_json = json!({
847 "title": "test_attribute",
848 "type": "array",
849 "items": {
850 "oneOf": [
851 {"type": "string"},
852 {"$ref": "#/$defs/RefType"}
853 ]
854 }
855 });
856
857 assert_eq!(serialized_property, expected_json);
858 }
859}