1use crate::ingest::{
2 IngestDType, IngestJsonNameKeyedStructure, IngestJsonStructure, IngestJsonStructureList,
3 IngestSchema, IngestStructureProperty,
4};
5use serde_json_schema::Schema;
6use serde_json_schema::property::{Property, PropertyInstance};
7use std::collections::{BTreeMap, BTreeSet};
8
9#[derive(Debug, thiserror::Error)]
10pub enum JSONSchemaLoadError {
11 #[error("json schema parse error: {0}")]
12 Parse(#[from] serde_json_schema::error::Error),
13 #[error("json schema conversion error: {0}")]
14 Convert(String),
15}
16
17impl From<serde_json::Error> for JSONSchemaLoadError {
18 fn from(e: serde_json::Error) -> Self {
19 Self::Parse(e.into())
20 }
21}
22
23#[derive(Debug, Clone)]
24pub struct JSONSchemaIngestSchema(pub Schema);
25
26impl JSONSchemaIngestSchema {
27 pub fn from_json_utf8(bytes: &[u8]) -> Result<Self, JSONSchemaLoadError> {
29 Ok(Self(serde_json::from_slice(bytes)?))
30 }
31
32 pub fn from_json_str(s: &str) -> Result<Self, JSONSchemaLoadError> {
33 Self::from_json_utf8(s.as_bytes())
34 }
35}
36
37fn infer_dtype_from_property_instance(
38 pi: &PropertyInstance,
39) -> Result<Option<IngestDType>, String> {
40 match pi {
41 PropertyInstance::String => Ok(Some(IngestDType::String("str".into()))),
42 PropertyInstance::Integer { .. } => Ok(Some(IngestDType::String("int".into()))),
43 PropertyInstance::Number { .. } => Ok(Some(IngestDType::String("float".into()))),
44 PropertyInstance::Boolean => Ok(Some(IngestDType::String("bool".into()))),
45 PropertyInstance::Null => Ok(None),
46 PropertyInstance::Object { .. } => {
47 Err("object properties are not supported in this conversion".to_string())
48 }
49 PropertyInstance::Array { .. } => {
50 Err("array properties are not supported in this conversion".to_string())
51 }
52 }
53}
54
55fn infer_dtype_from_property(
56 prop: &Property,
57 schema: &Schema,
58) -> Result<Option<IngestDType>, String> {
59 match prop {
60 Property::Value(pi) => infer_dtype_from_property_instance(pi),
61 Property::Ref(r) => r.deref(schema).map_or_else(
62 || Err("unresolvable property reference".to_string()),
63 infer_dtype_from_property_instance,
64 ),
65 }
66}
67
68impl TryFrom<JSONSchemaIngestSchema> for IngestSchema {
69 type Error = JSONSchemaLoadError;
70
71 fn try_from(v: JSONSchemaIngestSchema) -> Result<Self, Self::Error> {
72 let schema = v.0;
73 let props = schema.properties().ok_or_else(|| {
74 JSONSchemaLoadError::Convert(
75 "root JSON Schema must be an object with 'properties'".to_string(),
76 )
77 })?;
78 let required: BTreeSet<String> = schema
79 .required_properties()
80 .cloned()
81 .unwrap_or_default()
82 .into_iter()
83 .collect();
84
85 let mut structure_props = BTreeMap::new();
86 for (name, prop) in props {
87 let dtype = infer_dtype_from_property(prop, &schema).map_err(|message| {
88 JSONSchemaLoadError::Convert(format!(
89 "unsupported JSON Schema for property {name:?}: {message}"
90 ))
91 })?;
92 let description = if required.contains(name) {
93 Some("required field".to_string())
94 } else {
95 None
96 };
97
98 structure_props.insert(
99 name.clone(),
100 IngestStructureProperty {
101 choices: None,
102 description,
103 value: None,
104 dtype,
105 validator: None,
106 threshold: None,
107 },
108 );
109 }
110
111 let structure_name = schema
112 .id()
113 .map(|id| id.to_string())
114 .unwrap_or_else(|| "Model".to_string());
115 let props_value = serde_json::to_value(structure_props).map_err(|e| {
116 JSONSchemaLoadError::Convert(format!(
117 "failed to materialize converted structure properties: {e}"
118 ))
119 })?;
120 let mut keyed = BTreeMap::new();
121 keyed.insert(structure_name, props_value);
122 Ok(IngestSchema {
123 entities: None,
124 json_structures: Some(IngestJsonStructureList::Single(
125 IngestJsonStructure::JsonNameKeyedStructure(IngestJsonNameKeyedStructure(keyed)),
126 )),
127 classifications: None,
128 relations: None,
129 })
130 }
131}
132
133#[cfg(test)]
134mod tests {
135 use super::*;
136
137 #[test]
138 fn json_schema_uses_id_and_required() {
139 let raw = r#"
140 {
141 "$id": "BusinessRecord",
142 "type": "object",
143 "required": ["business_name"],
144 "properties": {
145 "business_name": { "type": "string" },
146 "status": { "type": "string" }
147 }
148 }
149 "#;
150
151 let ingest =
152 IngestSchema::try_from(JSONSchemaIngestSchema::from_json_str(raw).unwrap()).unwrap();
153 let IngestJsonStructureList::Single(IngestJsonStructure::JsonNameKeyedStructure(ns)) =
154 ingest.json_structures.unwrap()
155 else {
156 panic!("expected one name-keyed structure");
157 };
158 let props = ns.0.get("BusinessRecord").expect("businessrecord entry");
159 let props: BTreeMap<String, IngestStructureProperty> =
160 serde_json::from_value(props.clone()).expect("properties");
161 assert_eq!(
162 props["business_name"].description.as_deref(),
163 Some("required field")
164 );
165 assert_eq!(props["status"].description, None);
166 }
167
168 #[test]
169 fn json_schema_rejects_non_scalar_property_types() {
170 let raw = r#"
171 {
172 "type": "object",
173 "properties": {
174 "nested": { "type": "object", "properties": { "a": { "type": "string" } } }
175 }
176 }
177 "#;
178
179 let err = IngestSchema::try_from(JSONSchemaIngestSchema::from_json_str(raw).unwrap())
180 .expect_err("nested object should be rejected");
181 match err {
182 JSONSchemaLoadError::Convert(msg) => assert!(msg.contains("unsupported JSON Schema")),
183 other => panic!("unexpected error: {other:?}"),
184 }
185 }
186}