1use crate::normalized::{
2 Classification, DType, EntitySpec, ExpandedName, JsonStructure, NamedStructure,
3 NormalizedSchema, Relation, RelationAcquired, StructureProperty, Validator,
4};
5use serde::Serialize;
6use std::collections::BTreeMap;
7use std::convert::TryFrom;
8
9pub type Description = String;
17pub type Regex = String;
18pub type Threshold = f64;
19
20#[derive(Debug, Clone, PartialEq, Serialize)]
21pub struct ExpandedEntity {
22 pub name: ExpandedName,
23 pub dtype: Option<DType>,
24 pub validator: Option<Validator>,
25 pub threshold: Option<Threshold>,
26 pub description: Option<Description>,
27}
28
29#[derive(Debug, Clone, PartialEq, Serialize)]
30pub struct ExpandedStructureProperty {
31 pub choices: Vec<ExpandedEntity>,
32 pub description: Option<Description>,
33 pub value: Option<String>,
34 pub dtype: Option<DType>,
35 pub validator: Option<Validator>,
36 pub threshold: Option<Threshold>,
37}
38
39#[derive(Debug, Clone, PartialEq, Serialize)]
40pub struct ExpandedJsonStructure {
41 pub name: ExpandedName,
42 pub props: BTreeMap<ExpandedName, ExpandedStructureProperty>,
43}
44
45#[derive(Debug, Clone, PartialEq, Serialize)]
46pub struct ExpandedClassification {
47 pub task: ExpandedEntity,
48 pub labels: Vec<ExpandedEntity>,
49 pub threshold: Option<Threshold>,
50 pub multi_label: bool,
51 pub label_descriptions: BTreeMap<ExpandedName, ExpandedEntity>,
52}
53
54#[derive(Debug, Clone, PartialEq, Serialize)]
55pub enum ExpandedRelation {
56 EmptyAcquired {
57 name: ExpandedName,
58 description: Option<Description>,
59 },
60 EntityAcquired {
61 name: ExpandedName,
62 description: Option<Description>,
63 head: Box<ExpandedEntity>,
64 tail: Box<ExpandedEntity>,
65 },
66}
67
68#[derive(Debug, Clone, PartialEq, Serialize, Default)]
69pub struct ExpandedSchema {
70 pub entities: Vec<ExpandedEntity>,
71 pub json_structures: Vec<ExpandedJsonStructure>,
72 pub relations: Vec<ExpandedRelation>,
73 pub classifications: Vec<ExpandedClassification>,
74}
75
76#[derive(Debug, thiserror::Error)]
77pub enum SchemaExpandError {
78 #[error("relation without acquired form cannot be expanded at index {index}: {name}")]
79 RelationWithoutAcquired { index: usize, name: String },
80}
81
82fn entity2_to_3(v: EntitySpec) -> ExpandedEntity {
83 ExpandedEntity {
84 name: v.name,
85 dtype: v.dtype,
86 validator: v.validator,
87 threshold: v.threshold,
88 description: v.description,
89 }
90}
91
92fn entity_spec_to_structure_property(spec: EntitySpec) -> ExpandedStructureProperty {
93 ExpandedStructureProperty {
94 choices: Vec::new(),
95 description: spec.description,
96 value: None,
97 dtype: spec.dtype,
98 validator: spec.validator,
99 threshold: spec.threshold,
100 }
101}
102
103fn structure_property2_to_3(v: StructureProperty) -> ExpandedStructureProperty {
104 ExpandedStructureProperty {
105 choices: v.choices.into_iter().map(entity2_to_3).collect(),
106 description: v.description,
107 value: v.value,
108 dtype: v.dtype,
109 validator: v.validator,
110 threshold: v.threshold,
111 }
112}
113
114fn json_structure2_to_3(
115 v: JsonStructure,
116 index: usize,
117) -> Result<ExpandedJsonStructure, SchemaExpandError> {
118 match v {
119 JsonStructure::NamedStructure(NamedStructure { name, props }) => {
120 Ok(ExpandedJsonStructure {
121 name,
122 props: props
123 .into_iter()
124 .map(|(k, v)| (k, structure_property2_to_3(v)))
125 .collect(),
126 })
127 }
128 JsonStructure::NameKeyedStructure { name, props } => Ok(ExpandedJsonStructure {
129 name,
130 props: props
131 .into_iter()
132 .map(|(k, v)| (k, structure_property2_to_3(v)))
133 .collect(),
134 }),
135 JsonStructure::EntityList(list) => {
136 let props = list
137 .into_iter()
138 .map(|spec| {
139 let key = spec.name.clone();
140 (key, entity_spec_to_structure_property(spec))
141 })
142 .collect();
143 Ok(ExpandedJsonStructure {
144 name: ExpandedName::new(format!("unnamed_{index}")),
145 props,
146 })
147 }
148 }
149}
150
151fn classification2_to_3(v: Classification) -> ExpandedClassification {
152 ExpandedClassification {
153 task: entity2_to_3(v.task),
154 labels: v.labels.into_iter().map(entity2_to_3).collect(),
155 threshold: v.threshold,
156 multi_label: v.multi_label,
157 label_descriptions: v
158 .label_descriptions
159 .into_iter()
160 .map(|(k, v)| (k, entity2_to_3(v)))
161 .collect(),
162 }
163}
164
165fn relation2_to_3(v: Relation, index: usize) -> Result<ExpandedRelation, SchemaExpandError> {
166 match v.acquired {
167 Some(RelationAcquired::Empty) => Ok(ExpandedRelation::EmptyAcquired {
168 name: v.name,
169 description: v.description,
170 }),
171 Some(RelationAcquired::Entity { head, tail }) => Ok(ExpandedRelation::EntityAcquired {
172 name: v.name,
173 description: v.description,
174 head: Box::new(entity2_to_3(*head)),
175 tail: Box::new(entity2_to_3(*tail)),
176 }),
177 None => Err(SchemaExpandError::RelationWithoutAcquired {
178 index,
179 name: v.name.to_string(),
180 }),
181 }
182}
183
184impl TryFrom<NormalizedSchema> for ExpandedSchema {
185 type Error = SchemaExpandError;
186
187 fn try_from(v: NormalizedSchema) -> Result<Self, Self::Error> {
188 let entities = v.entities.into_iter().map(entity2_to_3).collect();
189
190 let json_structures = v
191 .json_structures
192 .into_iter()
193 .enumerate()
194 .map(|(i, js)| json_structure2_to_3(js, i))
195 .collect::<Result<Vec<_>, _>>()?;
196
197 let relations = v
198 .relations
199 .into_iter()
200 .enumerate()
201 .map(|(i, rel)| relation2_to_3(rel, i))
202 .collect::<Result<Vec<_>, _>>()?;
203
204 let classifications = v
205 .classifications
206 .into_iter()
207 .map(classification2_to_3)
208 .collect();
209
210 Ok(Self {
211 entities,
212 json_structures,
213 relations,
214 classifications,
215 })
216 }
217}
218
219#[cfg(test)]
220mod tests {
221 use super::*;
222 use crate::normalized::NormalizedSchema;
223
224 #[test]
225 fn expanded_expands_named_structure() {
226 let s = r#"
227 {
228 "json_structures": [
229 {
230 "name": "Patient Record",
231 "id": { "description": "identifier", "dtype": "str" }
232 }
233 ],
234 "relations": [
235 { "contains": { "head": "patient", "tail": "record" } }
236 ]
237 }
238 "#;
239
240 let s2 = NormalizedSchema::from_json_str(s).unwrap();
241 let s3 = ExpandedSchema::try_from(s2).unwrap();
242
243 assert_eq!(s3.json_structures.len(), 1);
244 assert_eq!(s3.relations.len(), 1);
245 assert_eq!(s3.json_structures[0].name.as_str(), "patient_record");
246 }
247
248 #[test]
249 fn expanded_rejects_relation_without_acquired_form() {
250 let s = r#"
251 {
252 "relations": ["interacts_with"]
253 }
254 "#;
255
256 let s2 = NormalizedSchema::from_json_str(s).unwrap();
257 let err = ExpandedSchema::try_from(s2).unwrap_err();
258
259 match err {
260 SchemaExpandError::RelationWithoutAcquired { .. } => {}
261 }
262 }
263
264 #[test]
265 fn expanded_entity_list_becomes_unnamed_structure() {
266 let s = r#"
267 {
268 "json_structures": [
269 ["gene::str", "score::float::0.9"]
270 ]
271 }
272 "#;
273
274 let s2 = NormalizedSchema::from_json_str(s).unwrap();
275 let s3 = ExpandedSchema::try_from(s2).unwrap();
276
277 assert_eq!(s3.json_structures.len(), 1);
278 assert_eq!(s3.json_structures[0].name.as_str(), "unnamed_0");
279 assert_eq!(s3.entities.len(), 0);
280
281 let props = &s3.json_structures[0].props;
282 assert_eq!(props.len(), 2);
283
284 let gene = props.get(&ExpandedName::new("gene".to_string())).unwrap();
285 assert_eq!(gene.dtype, Some(DType::String));
286 assert_eq!(gene.threshold, None);
287
288 let score = props.get(&ExpandedName::new("score".to_string())).unwrap();
289 assert_eq!(score.dtype, Some(DType::Float));
290 assert_eq!(score.threshold, Some(0.9));
291 }
292
293 #[test]
294 fn expanded_multiple_entity_lists_get_sequential_names() {
295 let s = r#"
296 {
297 "json_structures": [
298 ["a::str"],
299 ["b::float"],
300 ["c::bool"]
301 ]
302 }
303 "#;
304
305 let s2 = NormalizedSchema::from_json_str(s).unwrap();
306 let s3 = ExpandedSchema::try_from(s2).unwrap();
307
308 assert_eq!(s3.json_structures.len(), 3);
309 assert_eq!(s3.json_structures[0].name.as_str(), "unnamed_0");
310 assert_eq!(s3.json_structures[1].name.as_str(), "unnamed_1");
311 assert_eq!(s3.json_structures[2].name.as_str(), "unnamed_2");
312 }
313
314 #[test]
315 fn expanded_mixed_entity_list_and_named_structure() {
316 let s = r#"
317 {
318 "entities": ["gene::str"],
319 "json_structures": [
320 ["patient::str"],
321 {
322 "name": "Patient Record",
323 "id": { "dtype": "str" }
324 }
325 ]
326 }
327 "#;
328
329 let s2 = NormalizedSchema::from_json_str(s).unwrap();
330 let s3 = ExpandedSchema::try_from(s2).unwrap();
331
332 assert_eq!(s3.entities.len(), 1);
333 assert_eq!(s3.entities[0].name.as_str(), "gene");
334 assert_eq!(s3.json_structures.len(), 2);
335 assert_eq!(s3.json_structures[0].name.as_str(), "unnamed_0");
336 assert_eq!(s3.json_structures[1].name.as_str(), "patient_record");
337 }
338
339 #[test]
340 fn expanded_entity_list_entities_not_promoted_to_top_level() {
341 let s = r#"
342 {
343 "entities": ["gene::str"],
344 "json_structures": [
345 ["gene::str", "score::float::0.9"]
346 ]
347 }
348 "#;
349
350 let s2 = NormalizedSchema::from_json_str(s).unwrap();
351 let s3 = ExpandedSchema::try_from(s2).unwrap();
352
353 assert_eq!(s3.entities.len(), 1);
354 assert_eq!(s3.entities[0].name.as_str(), "gene");
355 assert_eq!(s3.json_structures.len(), 1);
356
357 let props = &s3.json_structures[0].props;
358 assert!(props.contains_key(&ExpandedName::new("gene".to_string())));
359 assert!(props.contains_key(&ExpandedName::new("score".to_string())));
360 }
361}