Skip to main content

ie_schema/
json_schema.rs

1use crate::ingest::{
2    IngestDType, IngestJsonNameKeyedStructure, IngestJsonStructure, IngestJsonStructureList,
3    IngestSchema, IngestStructureProperty,
4};
5use serde_json_schema::Schema;
6use serde_json_schema::property::{Property, PropertyInstance};
7use std::collections::{BTreeMap, BTreeSet};
8
9#[derive(Debug, thiserror::Error)]
10pub enum JSONSchemaLoadError {
11    #[error("json schema parse error: {0}")]
12    Parse(#[from] serde_json_schema::error::Error),
13    #[error("json schema conversion error: {0}")]
14    Convert(String),
15}
16
17impl From<serde_json::Error> for JSONSchemaLoadError {
18    fn from(e: serde_json::Error) -> Self {
19        Self::Parse(e.into())
20    }
21}
22
23#[derive(Debug, Clone)]
24pub struct JSONSchemaIngestSchema(pub Schema);
25
26impl JSONSchemaIngestSchema {
27    /// Parse JSON Schema from UTF-8 JSON bytes without an intermediate [`serde_json::Value`].
28    pub fn from_json_utf8(bytes: &[u8]) -> Result<Self, JSONSchemaLoadError> {
29        Ok(Self(serde_json::from_slice(bytes)?))
30    }
31
32    pub fn from_json_str(s: &str) -> Result<Self, JSONSchemaLoadError> {
33        Self::from_json_utf8(s.as_bytes())
34    }
35}
36
37fn infer_dtype_from_property_instance(
38    pi: &PropertyInstance,
39) -> Result<Option<IngestDType>, String> {
40    match pi {
41        PropertyInstance::String => Ok(Some(IngestDType::String("str".into()))),
42        PropertyInstance::Integer { .. } => Ok(Some(IngestDType::String("int".into()))),
43        PropertyInstance::Number { .. } => Ok(Some(IngestDType::String("float".into()))),
44        PropertyInstance::Boolean => Ok(Some(IngestDType::String("bool".into()))),
45        PropertyInstance::Null => Ok(None),
46        PropertyInstance::Object { .. } => {
47            Err("object properties are not supported in this conversion".to_string())
48        }
49        PropertyInstance::Array { .. } => {
50            Err("array properties are not supported in this conversion".to_string())
51        }
52    }
53}
54
55fn infer_dtype_from_property(
56    prop: &Property,
57    schema: &Schema,
58) -> Result<Option<IngestDType>, String> {
59    match prop {
60        Property::Value(pi) => infer_dtype_from_property_instance(pi),
61        Property::Ref(r) => r.deref(schema).map_or_else(
62            || Err("unresolvable property reference".to_string()),
63            infer_dtype_from_property_instance,
64        ),
65    }
66}
67
68impl TryFrom<JSONSchemaIngestSchema> for IngestSchema {
69    type Error = JSONSchemaLoadError;
70
71    fn try_from(v: JSONSchemaIngestSchema) -> Result<Self, Self::Error> {
72        let schema = v.0;
73        let props = schema.properties().ok_or_else(|| {
74            JSONSchemaLoadError::Convert(
75                "root JSON Schema must be an object with 'properties'".to_string(),
76            )
77        })?;
78        let required: BTreeSet<String> = schema
79            .required_properties()
80            .cloned()
81            .unwrap_or_default()
82            .into_iter()
83            .collect();
84
85        let mut structure_props = BTreeMap::new();
86        for (name, prop) in props {
87            let dtype = infer_dtype_from_property(prop, &schema).map_err(|message| {
88                JSONSchemaLoadError::Convert(format!(
89                    "unsupported JSON Schema for property {name:?}: {message}"
90                ))
91            })?;
92            let description = if required.contains(name) {
93                Some("required field".to_string())
94            } else {
95                None
96            };
97
98            structure_props.insert(
99                name.clone(),
100                IngestStructureProperty {
101                    choices: None,
102                    description,
103                    value: None,
104                    dtype,
105                    validator: None,
106                    threshold: None,
107                },
108            );
109        }
110
111        let structure_name = schema
112            .id()
113            .map(|id| id.to_string())
114            .unwrap_or_else(|| "Model".to_string());
115        let props_value = serde_json::to_value(structure_props).map_err(|e| {
116            JSONSchemaLoadError::Convert(format!(
117                "failed to materialize converted structure properties: {e}"
118            ))
119        })?;
120        let mut keyed = BTreeMap::new();
121        keyed.insert(structure_name, props_value);
122        Ok(IngestSchema {
123            entities: None,
124            json_structures: Some(IngestJsonStructureList::Single(
125                IngestJsonStructure::JsonNameKeyedStructure(IngestJsonNameKeyedStructure(keyed)),
126            )),
127            classifications: None,
128            relations: None,
129        })
130    }
131}
132
133#[cfg(test)]
134mod tests {
135    use super::*;
136
137    #[test]
138    fn json_schema_uses_id_and_required() {
139        let raw = r#"
140        {
141            "$id": "BusinessRecord",
142            "type": "object",
143            "required": ["business_name"],
144            "properties": {
145                "business_name": { "type": "string" },
146                "status": { "type": "string" }
147            }
148        }
149        "#;
150
151        let ingest =
152            IngestSchema::try_from(JSONSchemaIngestSchema::from_json_str(raw).unwrap()).unwrap();
153        let IngestJsonStructureList::Single(IngestJsonStructure::JsonNameKeyedStructure(ns)) =
154            ingest.json_structures.unwrap()
155        else {
156            panic!("expected one name-keyed structure");
157        };
158        let props = ns.0.get("BusinessRecord").expect("businessrecord entry");
159        let props: BTreeMap<String, IngestStructureProperty> =
160            serde_json::from_value(props.clone()).expect("properties");
161        assert_eq!(
162            props["business_name"].description.as_deref(),
163            Some("required field")
164        );
165        assert_eq!(props["status"].description, None);
166    }
167
168    #[test]
169    fn json_schema_rejects_non_scalar_property_types() {
170        let raw = r#"
171        {
172            "type": "object",
173            "properties": {
174                "nested": { "type": "object", "properties": { "a": { "type": "string" } } }
175            }
176        }
177        "#;
178
179        let err = IngestSchema::try_from(JSONSchemaIngestSchema::from_json_str(raw).unwrap())
180            .expect_err("nested object should be rejected");
181        match err {
182            JSONSchemaLoadError::Convert(msg) => assert!(msg.contains("unsupported JSON Schema")),
183            other => panic!("unexpected error: {other:?}"),
184        }
185    }
186}