Skip to main content

ie_schema/
expanded.rs

1use crate::normalized::{
2    Classification, DType, EntitySpec, ExpandedName, JsonStructure, NamedStructure,
3    NormalizedSchema, Relation, RelationAcquired, StructureProperty, Validator,
4};
5use serde::Serialize;
6use std::collections::BTreeMap;
7use std::convert::TryFrom;
8
9/// Expanded schema.
10///
11/// Goals:
12/// - slug-only names
13/// - defaults filled
14/// - no top-level unions
15/// - shared concepts use one canonical representation
16pub type Description = String;
17pub type Regex = String;
18pub type Threshold = f64;
19
20#[derive(Debug, Clone, PartialEq, Serialize)]
21pub struct ExpandedEntity {
22    pub name: ExpandedName,
23    pub dtype: Option<DType>,
24    pub validator: Option<Validator>,
25    pub threshold: Option<Threshold>,
26    pub description: Option<Description>,
27}
28
29#[derive(Debug, Clone, PartialEq, Serialize)]
30pub struct ExpandedStructureProperty {
31    pub choices: Vec<ExpandedEntity>,
32    pub description: Option<Description>,
33    pub value: Option<String>,
34    pub dtype: Option<DType>,
35    pub validator: Option<Validator>,
36    pub threshold: Option<Threshold>,
37}
38
39#[derive(Debug, Clone, PartialEq, Serialize)]
40pub struct ExpandedJsonStructure {
41    pub name: ExpandedName,
42    pub props: BTreeMap<ExpandedName, ExpandedStructureProperty>,
43}
44
45#[derive(Debug, Clone, PartialEq, Serialize)]
46pub struct ExpandedClassification {
47    pub task: ExpandedEntity,
48    pub labels: Vec<ExpandedEntity>,
49    pub threshold: Option<Threshold>,
50    pub multi_label: bool,
51    pub label_descriptions: BTreeMap<ExpandedName, ExpandedEntity>,
52}
53
54#[derive(Debug, Clone, PartialEq, Serialize)]
55pub enum ExpandedRelation {
56    EmptyAcquired {
57        name: ExpandedName,
58        description: Option<Description>,
59    },
60    EntityAcquired {
61        name: ExpandedName,
62        description: Option<Description>,
63        head: Box<ExpandedEntity>,
64        tail: Box<ExpandedEntity>,
65    },
66}
67
68#[derive(Debug, Clone, PartialEq, Serialize, Default)]
69pub struct ExpandedSchema {
70    pub entities: Vec<ExpandedEntity>,
71    pub json_structures: Vec<ExpandedJsonStructure>,
72    pub relations: Vec<ExpandedRelation>,
73    pub classifications: Vec<ExpandedClassification>,
74}
75
76#[derive(Debug, thiserror::Error)]
77pub enum SchemaExpandError {
78    #[error("relation without acquired form cannot be expanded at index {index}: {name}")]
79    RelationWithoutAcquired { index: usize, name: String },
80}
81
82fn entity2_to_3(v: EntitySpec) -> ExpandedEntity {
83    ExpandedEntity {
84        name: v.name,
85        dtype: v.dtype,
86        validator: v.validator,
87        threshold: v.threshold,
88        description: v.description,
89    }
90}
91
92fn entity_spec_to_structure_property(spec: EntitySpec) -> ExpandedStructureProperty {
93    ExpandedStructureProperty {
94        choices: Vec::new(),
95        description: spec.description,
96        value: None,
97        dtype: spec.dtype,
98        validator: spec.validator,
99        threshold: spec.threshold,
100    }
101}
102
103fn structure_property2_to_3(v: StructureProperty) -> ExpandedStructureProperty {
104    ExpandedStructureProperty {
105        choices: v.choices.into_iter().map(entity2_to_3).collect(),
106        description: v.description,
107        value: v.value,
108        dtype: v.dtype,
109        validator: v.validator,
110        threshold: v.threshold,
111    }
112}
113
114fn json_structure2_to_3(
115    v: JsonStructure,
116    index: usize,
117) -> Result<ExpandedJsonStructure, SchemaExpandError> {
118    match v {
119        JsonStructure::NamedStructure(NamedStructure { name, props }) => {
120            Ok(ExpandedJsonStructure {
121                name,
122                props: props
123                    .into_iter()
124                    .map(|(k, v)| (k, structure_property2_to_3(v)))
125                    .collect(),
126            })
127        }
128        JsonStructure::NameKeyedStructure { name, props } => Ok(ExpandedJsonStructure {
129            name,
130            props: props
131                .into_iter()
132                .map(|(k, v)| (k, structure_property2_to_3(v)))
133                .collect(),
134        }),
135        JsonStructure::EntityList(list) => {
136            let props = list
137                .into_iter()
138                .map(|spec| {
139                    let key = spec.name.clone();
140                    (key, entity_spec_to_structure_property(spec))
141                })
142                .collect();
143            Ok(ExpandedJsonStructure {
144                name: ExpandedName::new(format!("unnamed_{index}")),
145                props,
146            })
147        }
148    }
149}
150
151fn classification2_to_3(v: Classification) -> ExpandedClassification {
152    ExpandedClassification {
153        task: entity2_to_3(v.task),
154        labels: v.labels.into_iter().map(entity2_to_3).collect(),
155        threshold: v.threshold,
156        multi_label: v.multi_label,
157        label_descriptions: v
158            .label_descriptions
159            .into_iter()
160            .map(|(k, v)| (k, entity2_to_3(v)))
161            .collect(),
162    }
163}
164
165fn relation2_to_3(v: Relation, index: usize) -> Result<ExpandedRelation, SchemaExpandError> {
166    match v.acquired {
167        Some(RelationAcquired::Empty) => Ok(ExpandedRelation::EmptyAcquired {
168            name: v.name,
169            description: v.description,
170        }),
171        Some(RelationAcquired::Entity { head, tail }) => Ok(ExpandedRelation::EntityAcquired {
172            name: v.name,
173            description: v.description,
174            head: Box::new(entity2_to_3(*head)),
175            tail: Box::new(entity2_to_3(*tail)),
176        }),
177        None => Err(SchemaExpandError::RelationWithoutAcquired {
178            index,
179            name: v.name.to_string(),
180        }),
181    }
182}
183
184impl TryFrom<NormalizedSchema> for ExpandedSchema {
185    type Error = SchemaExpandError;
186
187    fn try_from(v: NormalizedSchema) -> Result<Self, Self::Error> {
188        let entities = v.entities.into_iter().map(entity2_to_3).collect();
189
190        let json_structures = v
191            .json_structures
192            .into_iter()
193            .enumerate()
194            .map(|(i, js)| json_structure2_to_3(js, i))
195            .collect::<Result<Vec<_>, _>>()?;
196
197        let relations = v
198            .relations
199            .into_iter()
200            .enumerate()
201            .map(|(i, rel)| relation2_to_3(rel, i))
202            .collect::<Result<Vec<_>, _>>()?;
203
204        let classifications = v
205            .classifications
206            .into_iter()
207            .map(classification2_to_3)
208            .collect();
209
210        Ok(Self {
211            entities,
212            json_structures,
213            relations,
214            classifications,
215        })
216    }
217}
218
219#[cfg(test)]
220mod tests {
221    use super::*;
222    use crate::normalized::NormalizedSchema;
223
224    #[test]
225    fn expanded_expands_named_structure() {
226        let s = r#"
227        {
228            "json_structures": [
229                {
230                    "name": "Patient Record",
231                    "id": { "description": "identifier", "dtype": "str" }
232                }
233            ],
234            "relations": [
235                { "contains": { "head": "patient", "tail": "record" } }
236            ]
237        }
238        "#;
239
240        let s2 = NormalizedSchema::from_json_str(s).unwrap();
241        let s3 = ExpandedSchema::try_from(s2).unwrap();
242
243        assert_eq!(s3.json_structures.len(), 1);
244        assert_eq!(s3.relations.len(), 1);
245        assert_eq!(s3.json_structures[0].name.as_str(), "patient_record");
246    }
247
248    #[test]
249    fn expanded_rejects_relation_without_acquired_form() {
250        let s = r#"
251        {
252            "relations": ["interacts_with"]
253        }
254        "#;
255
256        let s2 = NormalizedSchema::from_json_str(s).unwrap();
257        let err = ExpandedSchema::try_from(s2).unwrap_err();
258
259        match err {
260            SchemaExpandError::RelationWithoutAcquired { .. } => {}
261        }
262    }
263
264    #[test]
265    fn expanded_entity_list_becomes_unnamed_structure() {
266        let s = r#"
267        {
268            "json_structures": [
269                ["gene::str", "score::float::0.9"]
270            ]
271        }
272        "#;
273
274        let s2 = NormalizedSchema::from_json_str(s).unwrap();
275        let s3 = ExpandedSchema::try_from(s2).unwrap();
276
277        assert_eq!(s3.json_structures.len(), 1);
278        assert_eq!(s3.json_structures[0].name.as_str(), "unnamed_0");
279        assert_eq!(s3.entities.len(), 0);
280
281        let props = &s3.json_structures[0].props;
282        assert_eq!(props.len(), 2);
283
284        let gene = props.get(&ExpandedName::new("gene".to_string())).unwrap();
285        assert_eq!(gene.dtype, Some(DType::String));
286        assert_eq!(gene.threshold, None);
287
288        let score = props.get(&ExpandedName::new("score".to_string())).unwrap();
289        assert_eq!(score.dtype, Some(DType::Float));
290        assert_eq!(score.threshold, Some(0.9));
291    }
292
293    #[test]
294    fn expanded_multiple_entity_lists_get_sequential_names() {
295        let s = r#"
296        {
297            "json_structures": [
298                ["a::str"],
299                ["b::float"],
300                ["c::bool"]
301            ]
302        }
303        "#;
304
305        let s2 = NormalizedSchema::from_json_str(s).unwrap();
306        let s3 = ExpandedSchema::try_from(s2).unwrap();
307
308        assert_eq!(s3.json_structures.len(), 3);
309        assert_eq!(s3.json_structures[0].name.as_str(), "unnamed_0");
310        assert_eq!(s3.json_structures[1].name.as_str(), "unnamed_1");
311        assert_eq!(s3.json_structures[2].name.as_str(), "unnamed_2");
312    }
313
314    #[test]
315    fn expanded_mixed_entity_list_and_named_structure() {
316        let s = r#"
317        {
318            "entities": ["gene::str"],
319            "json_structures": [
320                ["patient::str"],
321                {
322                    "name": "Patient Record",
323                    "id": { "dtype": "str" }
324                }
325            ]
326        }
327        "#;
328
329        let s2 = NormalizedSchema::from_json_str(s).unwrap();
330        let s3 = ExpandedSchema::try_from(s2).unwrap();
331
332        assert_eq!(s3.entities.len(), 1);
333        assert_eq!(s3.entities[0].name.as_str(), "gene");
334        assert_eq!(s3.json_structures.len(), 2);
335        assert_eq!(s3.json_structures[0].name.as_str(), "unnamed_0");
336        assert_eq!(s3.json_structures[1].name.as_str(), "patient_record");
337    }
338
339    #[test]
340    fn expanded_entity_list_entities_not_promoted_to_top_level() {
341        let s = r#"
342        {
343            "entities": ["gene::str"],
344            "json_structures": [
345                ["gene::str", "score::float::0.9"]
346            ]
347        }
348        "#;
349
350        let s2 = NormalizedSchema::from_json_str(s).unwrap();
351        let s3 = ExpandedSchema::try_from(s2).unwrap();
352
353        assert_eq!(s3.entities.len(), 1);
354        assert_eq!(s3.entities[0].name.as_str(), "gene");
355        assert_eq!(s3.json_structures.len(), 1);
356
357        let props = &s3.json_structures[0].props;
358        assert!(props.contains_key(&ExpandedName::new("gene".to_string())));
359        assert!(props.contains_key(&ExpandedName::new("score".to_string())));
360    }
361}