1use std::{
25 collections::{BTreeMap, HashMap, HashSet},
26 str::FromStr,
27};
28
29use crate::{
30 attribute::{self, Attribute},
31 datamodel::DataModel,
32 json::schema::{AnyOfItemType, DataType, DataTypeItemType, Item, ReferenceItemType},
33 markdown::frontmatter::FrontMatter,
34 object::{Enumeration, Object},
35 option::AttrOption,
36 validation::BASIC_TYPES,
37};
38
39use super::schema::{self, PrimitiveType};
40
41const SCHEMA: &str = "https://json-schema.org/draft/2020-12/schema";
42
43pub fn to_json_schema(
55 model: &DataModel,
56 root: &str,
57 openai: bool,
58) -> Result<schema::SchemaObject, String> {
59 let root_object = retrieve_object(model, root)?;
60
61 let mut schema_object = schema::SchemaObject::try_from(root_object)?;
62 let mut used_types = HashSet::new();
63 let mut used_enums = HashSet::new();
64
65 collect_definitions(root_object, model, &mut used_types, &mut used_enums)?;
66
67 let definitions = collect_definitions_from_model(model, &used_types, &used_enums)?;
68
69 schema_object.schema = Some(SCHEMA.to_string());
70 schema_object.definitions = definitions;
71
72 if let Some(config) = model.config.clone() {
73 post_process_schema(&mut schema_object, &config, openai, &used_enums)?;
74 }
75
76 Ok(schema_object)
77}
78
79fn retrieve_object<'a>(model: &'a DataModel, name: &'a str) -> Result<&'a Object, String> {
90 model
91 .objects
92 .iter()
93 .find(|obj| obj.name == name)
94 .ok_or(format!("Object {name} not found"))
95}
96
97fn retrieve_enum<'a>(model: &'a DataModel, name: &'a str) -> Result<&'a Enumeration, String> {
108 model
109 .enums
110 .iter()
111 .find(|e| e.name == name)
112 .ok_or(format!("Enum {name} not found"))
113}
114
115fn collect_definitions_from_model(
127 model: &DataModel,
128 used_types: &HashSet<String>,
129 used_enums: &HashSet<String>,
130) -> Result<BTreeMap<String, schema::SchemaType>, String> {
131 let mut definitions = BTreeMap::new();
132
133 for obj_name in used_types {
134 let obj = retrieve_object(model, obj_name)?;
135 definitions.insert(obj_name.clone(), schema::SchemaType::try_from(obj)?);
136 }
137
138 for enum_name in used_enums {
139 let enum_object = retrieve_enum(model, enum_name)?;
140 definitions.insert(
141 enum_name.clone(),
142 schema::SchemaType::try_from(enum_object)?,
143 );
144 }
145
146 Ok(definitions)
147}
148
149fn collect_definitions(
162 object: &Object,
163 model: &DataModel,
164 used_types: &mut HashSet<String>,
165 used_enums: &mut HashSet<String>,
166) -> Result<(), String> {
167 for attr in object.attributes.iter() {
168 for dtype in attr.dtypes.iter() {
169 if BASIC_TYPES.contains(&dtype.as_str()) || used_types.contains(dtype) {
170 continue;
171 }
172
173 let object = model.objects.iter().find(|obj| obj.name == *dtype);
174 let enumeration = model.enums.iter().find(|e| e.name == *dtype);
175
176 if let Some(object) = object {
177 used_types.insert(dtype.clone());
178 collect_definitions(object, model, used_types, used_enums)?;
179 } else if let Some(enumeration) = enumeration {
180 used_enums.insert(enumeration.name.clone());
181 } else {
182 return Err(format!("Object or enumeration {dtype} not found"));
183 }
184 }
185 }
186
187 Ok(())
188}
189
190fn resolve_prefixes(schema: &mut schema::SchemaObject, prefixes: &HashMap<String, String>) {
197 for (_, property) in schema.properties.iter_mut() {
198 if let Some(reference) = property.term.clone() {
199 let (prefix, term) = reference.split_once(":").unwrap_or(("", ""));
200 if let Some(prefix) = prefixes.get(prefix) {
201 property.term = Some(format!("{prefix}{term}"));
202 }
203 }
204 }
205}
206
207fn post_process_schema(
215 schema_object: &mut schema::SchemaObject,
216 config: &FrontMatter,
217 openai: bool,
218 used_enums: &HashSet<String>,
219) -> Result<(), String> {
220 schema_object.id = Some(config.repo.clone());
221 post_process_object(schema_object, config, openai, used_enums)?;
222
223 for (_, definition) in schema_object.definitions.iter_mut() {
224 if let schema::SchemaType::Object(definition) = definition {
225 post_process_object(definition, config, openai, used_enums)?;
226 }
227 }
228
229 Ok(())
230}
231
232fn post_process_object(
241 object: &mut schema::SchemaObject,
242 config: &FrontMatter,
243 openai: bool,
244 used_enums: &HashSet<String>,
245) -> Result<(), String> {
246 if let Some(prefixes) = &config.prefixes {
247 resolve_prefixes(object, prefixes);
248 }
249 if openai {
250 object.schema = None;
251 object.id = None;
252 remove_options(object);
253 set_required_and_nullable(object);
254 }
255
256 for (_, property) in object.properties.iter_mut() {
257 if let Some(reference) = &property.reference {
258 if used_enums.contains(
259 reference
260 .split("/")
261 .last()
262 .ok_or(format!("Failed to split reference: {reference}"))?,
263 ) {
264 if openai {
265 property.dtype = None;
266 } else {
267 property.dtype = Some(schema::DataType::String);
268 }
269 }
270 }
271 }
272
273 Ok(())
274}
275
276fn remove_options(schema: &mut schema::SchemaObject) {
282 for (_, property) in schema.properties.iter_mut() {
283 property.options = HashMap::new();
284 }
285}
286
287fn set_required_and_nullable(schema: &mut schema::SchemaObject) {
293 let mut new_required = Vec::new();
294
295 for (name, property) in &mut schema.properties {
296 clean_reference_property(property);
297 convert_one_of_to_any_of(property);
298
299 if !schema.required.contains(name) {
300 new_required.push(name.clone());
301 make_property_nullable(property);
302 }
303 }
304
305 finalize_schema_requirements(schema, new_required);
306}
307
308fn clean_reference_property(property: &mut schema::Property) {
314 if property.reference.is_some() {
315 property.description = None;
316 property.title = None;
317 property.dtype = None;
318 }
319}
320
321fn convert_one_of_to_any_of(property: &mut schema::Property) {
327 if let Some(Item::OneOfItem(one_of)) = &mut property.items {
328 property.items = Some(Item::AnyOfItem(AnyOfItemType {
329 any_of: one_of.one_of.clone(),
330 }));
331 }
332}
333
334fn make_property_nullable(property: &mut schema::Property) {
340 let mut any_of = vec![Item::DataTypeItem(DataTypeItemType {
341 dtype: DataType::Null,
342 })];
343
344 handle_property_data_type(property, &mut any_of);
345 handle_property_reference(property, &mut any_of);
346 handle_property_one_of(property, &mut any_of);
347
348 if !matches!(property.dtype, Some(DataType::Array)) {
349 property.any_of = Some(any_of);
350 }
351}
352
353fn handle_property_data_type(property: &mut schema::Property, any_of: &mut Vec<Item>) {
360 if let Some(dtype) = &property.dtype {
361 let is_array = matches!(dtype, DataType::Array);
362
363 match dtype {
364 DataType::Array => {
365 any_of.push(Item::DataTypeItem(DataTypeItemType {
366 dtype: DataType::Null,
367 }));
368 }
369 DataType::Object => {
370 property.dtype = None;
371 }
372 DataType::Multiple(data_types) => {
373 add_multiple_data_types(any_of, data_types);
374 }
375 _ => {
376 any_of.push(Item::DataTypeItem(DataTypeItemType {
377 dtype: dtype.clone(),
378 }));
379 }
380 }
381
382 if !is_array {
383 property.dtype = None;
384 }
385 }
386}
387
388fn add_multiple_data_types(any_of: &mut Vec<Item>, data_types: &[DataType]) {
395 for dtype in data_types.iter() {
396 if dtype.is_not_object() || dtype.is_array() {
397 any_of.push(Item::DataTypeItem(DataTypeItemType {
398 dtype: dtype.clone(),
399 }));
400 }
401 }
402}
403
404fn handle_property_reference(property: &mut schema::Property, any_of: &mut Vec<Item>) {
411 if let Some(reference) = &property.reference {
412 any_of.push(Item::ReferenceItem(ReferenceItemType {
413 reference: reference.clone(),
414 }));
415 property.reference = None;
416 property.dtype = None;
417 property.title = None;
418 property.description = None;
419 }
420}
421
422fn handle_property_one_of(property: &mut schema::Property, any_of: &mut Vec<Item>) {
429 if let Some(one_of) = &property.one_of {
430 any_of.extend(one_of.clone());
431 property.one_of = None;
432 }
433}
434
435fn finalize_schema_requirements(schema: &mut schema::SchemaObject, new_required: Vec<String>) {
442 schema.additional_properties = false;
443 schema.required.extend(new_required);
444 schema.required.sort();
445}
446
447impl TryFrom<&Enumeration> for schema::SchemaType {
448 type Error = String;
449
450 fn try_from(enumeration: &Enumeration) -> Result<Self, Self::Error> {
460 Ok(schema::SchemaType::Enum(schema::EnumObject::try_from(
461 enumeration,
462 )?))
463 }
464}
465
466impl TryFrom<&Object> for schema::SchemaType {
467 type Error = String;
468
469 fn try_from(obj: &Object) -> Result<Self, Self::Error> {
479 Ok(schema::SchemaType::Object(schema::SchemaObject::try_from(
480 obj,
481 )?))
482 }
483}
484
485impl TryFrom<&Object> for schema::SchemaObject {
486 type Error = String;
487
488 fn try_from(obj: &Object) -> Result<Self, Self::Error> {
498 let properties: Result<BTreeMap<String, schema::Property>, String> = obj
499 .attributes
500 .iter()
501 .map(|attr| -> Result<(String, schema::Property), String> {
502 Ok((attr.name.clone(), schema::Property::try_from(attr)?))
503 })
504 .collect();
505
506 let required: Vec<String> = obj
507 .attributes
508 .iter()
509 .filter(|attr| attr.required)
510 .map(|attr| attr.name.clone())
511 .collect();
512
513 Ok(schema::SchemaObject {
514 title: obj.name.clone(),
515 dtype: Some(schema::DataType::Object),
516 description: Some(obj.docstring.clone()),
517 properties: properties?,
518 definitions: BTreeMap::new(),
519 required,
520 schema: None,
521 id: None,
522 additional_properties: false,
523 })
524 }
525}
526
527impl TryFrom<&Enumeration> for schema::EnumObject {
528 type Error = String;
529
530 fn try_from(enumeration: &Enumeration) -> Result<Self, Self::Error> {
540 let values = enumeration
541 .mappings
542 .values()
543 .cloned()
544 .collect::<Vec<String>>();
545
546 Ok(schema::EnumObject {
547 title: enumeration.name.clone(),
548 dtype: Some(schema::DataType::String),
549 description: Some(enumeration.docstring.clone()),
550 enum_values: values,
551 })
552 }
553}
554
555impl TryFrom<&Attribute> for schema::Property {
556 type Error = String;
557
558 fn try_from(attr: &Attribute) -> Result<Self, Self::Error> {
568 let mut dtype = (!attr.is_enum)
569 .then(|| schema::DataType::try_from(attr))
570 .transpose()?;
571
572 let options: HashMap<String, PrimitiveType> = attr
573 .options
574 .iter()
575 .map(|o| -> Result<(String, PrimitiveType), String> {
576 Ok((o.key().to_string(), o.try_into()?))
577 })
578 .collect::<Result<HashMap<String, PrimitiveType>, String>>()?;
579
580 let reference: Option<String> = if (attr.is_enum
581 || matches!(dtype, Some(schema::DataType::Object)))
582 && attr.dtypes.len() == 1
583 {
584 Some(format!("#/$defs/{}", attr.dtypes[0]))
585 } else {
586 None
587 };
588
589 let items: Option<schema::Item> = attr.into();
590 let one_of = (!attr.is_array).then(|| attr.into());
591 let description = (!attr.docstring.is_empty()).then(|| attr.docstring.clone());
592 let enum_values = if attr.is_enum { Some(Vec::new()) } else { None };
593
594 if attr.dtypes.len() > 1 && !attr.is_array {
595 dtype = None;
597 }
598
599 let default: Option<PrimitiveType> = if let Some(default) = attr.default.clone() {
601 process_default(default, &dtype)
602 } else {
603 None
604 };
605
606 Ok(schema::Property {
607 title: Some(attr.name.clone()),
608 dtype,
609 default,
610 description,
611 term: attr.term.clone(),
612 reference,
613 options,
614 one_of,
615 items,
616 enum_values,
617 any_of: None,
618 all_of: None,
619 examples: Vec::new(),
620 })
621 }
622}
623
624fn process_default(
635 default: attribute::DataType,
636 dtype: &Option<schema::DataType>,
637) -> Option<PrimitiveType> {
638 if matches!(dtype, Some(schema::DataType::String)) {
639 default
640 .as_string()
641 .map(|d| PrimitiveType::String(d.trim_matches('"').to_string()))
642 } else {
643 Some(default.into())
644 }
645}
646
647impl TryFrom<&Attribute> for schema::DataType {
648 type Error = String;
649
650 fn try_from(attr: &Attribute) -> Result<Self, Self::Error> {
664 if attr.is_array {
665 return Ok(schema::DataType::Array);
666 }
667
668 schema::DataType::try_from(
669 attr.dtypes
670 .first()
671 .ok_or(format!("No data types found for attribute: {}", attr.name))?,
672 )
673 }
674}
675
676impl From<&Attribute> for Option<schema::Item> {
678 fn from(attr: &Attribute) -> Self {
688 if !attr.is_array {
689 return None;
692 }
693
694 let one_of: Vec<schema::Item> = attr.into();
696
697 if one_of.is_empty() {
698 Some(process_dtype(&attr.dtypes[0]))
700 } else {
701 Some(schema::Item::OneOfItem(schema::OneOfItemType { one_of }))
702 }
703 }
704}
705
706impl From<&Attribute> for Vec<schema::Item> {
707 fn from(attr: &Attribute) -> Self {
717 if attr.dtypes.len() == 1 {
718 return Vec::new();
719 }
720
721 let mut items = Vec::new();
722 for dtype in attr.dtypes.iter() {
723 items.push(process_dtype(dtype));
724 }
725
726 items
727 }
728}
729
730fn process_dtype(dtype: &str) -> schema::Item {
740 match schema::DataType::from_str(dtype) {
741 Ok(basic_type) => {
742 schema::Item::DataTypeItem(schema::DataTypeItemType { dtype: basic_type })
743 }
744 Err(_) => schema::Item::ReferenceItem(schema::ReferenceItemType {
745 reference: format!("#/$defs/{dtype}"),
746 }),
747 }
748}
749
750impl TryFrom<&AttrOption> for PrimitiveType {
751 type Error = String;
752
753 fn try_from(option: &AttrOption) -> Result<Self, Self::Error> {
754 let value = option.value();
755
756 if let Ok(float_val) = value.parse::<f64>() {
758 return Ok(PrimitiveType::Number(float_val));
759 }
760
761 if let Ok(bool_val) = value.parse::<bool>() {
762 return Ok(PrimitiveType::Boolean(bool_val));
763 }
764
765 if let Ok(int_val) = value.parse::<i64>() {
766 return Ok(PrimitiveType::Integer(int_val));
767 }
768
769 Ok(PrimitiveType::String(value))
771 }
772}
773
774#[cfg(test)]
775mod tests {
776 use serde_json::{json, Value};
777
778 use super::*;
779 use crate::attribute::Attribute;
780
781 #[test]
782 fn test_attribute_with_multiple_types() {
783 let attr = Attribute {
784 name: "test_attribute".to_string(),
785 is_array: false,
786 is_id: false,
787 dtypes: vec!["string".to_string(), "RefType".to_string()],
788 docstring: "".to_string(),
789 options: vec![],
790 term: None,
791 required: false,
792 default: None,
793 xml: None,
794 is_enum: false,
795 position: None,
796 import_prefix: None,
797 };
798
799 let property: schema::Property =
800 schema::Property::try_from(&attr).expect("Failed to convert Attribute to Property");
801
802 let serialized_property =
803 serde_json::to_value(&property).expect("Failed to serialize Property to JSON");
804
805 let expected_json = json!({
806 "title": "test_attribute",
807 "oneOf": [
808 {"type": "string"},
809 {"$ref": "#/$defs/RefType"},
810 ]
811 });
812
813 assert_eq!(serialized_property, expected_json);
814 }
815
816 #[test]
817 fn test_array_attribute() {
818 let attr = Attribute {
819 name: "test_attribute".to_string(),
820 is_array: true,
821 is_id: false,
822 dtypes: vec!["string".to_string(), "RefType".to_string()],
823 docstring: "".to_string(),
824 options: vec![],
825 term: None,
826 required: false,
827 default: None,
828 xml: None,
829 is_enum: false,
830 position: None,
831 import_prefix: None,
832 };
833
834 let property: schema::Property =
835 schema::Property::try_from(&attr).expect("Failed to convert Attribute to Property");
836 let serialized_property: Value =
837 serde_json::to_value(&property).expect("Failed to serialize Property to JSON");
838
839 let expected_json = json!({
840 "title": "test_attribute",
841 "type": "array",
842 "items": {
843 "oneOf": [
844 {"type": "string"},
845 {"$ref": "#/$defs/RefType"}
846 ]
847 }
848 });
849
850 assert_eq!(serialized_property, expected_json);
851 }
852}