integrationos_domain/domain/schema/
json_schema.rs

1use super::{
2    common_model::{CommonModel, DataType, Expandable},
3    json_mapper::Field,
4};
5use crate::{IntegrationOSError, InternalError};
6use serde::{Deserialize, Serialize};
7use serde_json::{json, Map, Value};
8use std::collections::HashMap;
9
10#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone, Default)]
11#[cfg_attr(feature = "dummy", derive(fake::Dummy))]
12pub struct JsonSchema {
13    #[serde(rename = "type")]
14    pub type_name: String,
15    #[serde(default = "HashMap::new")]
16    pub properties: HashMap<String, Property>,
17    pub required: Option<Vec<String>>,
18    pub path: Option<String>,
19
20    #[serde(skip_serializing_if = "Option::is_none")]
21    pub items: Option<Box<Property>>,
22}
23
24impl JsonSchema {
25    pub fn new(type_name: String) -> Self {
26        Self {
27            type_name,
28            properties: HashMap::new(),
29            required: None,
30            path: None,
31            items: None,
32        }
33    }
34
35    pub fn empty() -> Self {
36        Self {
37            type_name: "object".to_string(),
38            properties: HashMap::new(),
39            required: None,
40            path: None,
41            items: None,
42        }
43    }
44
45    pub fn from_value(value: Value) -> Result<Self, IntegrationOSError> {
46        serde_json::from_value::<Self>(value.clone())
47            .map_err(|e| InternalError::invalid_argument(&e.to_string(), Some(&value.to_string())))
48    }
49
50    pub fn to_value(&self) -> Result<Value, IntegrationOSError> {
51        serde_json::to_value(self)
52            .map_err(|e| InternalError::invalid_argument(&e.to_string(), None))
53    }
54
55    pub fn filter(mut self, keys_to_remove: &[String]) -> Self {
56        self.properties.retain(|name, _| {
57            let retain = !keys_to_remove.contains(name);
58
59            if !retain {
60                if let Some(ref mut required) = self.required {
61                    required.retain(|n| n != name);
62                }
63            }
64
65            retain
66        });
67        self
68    }
69
70    pub fn keys_at_path(&self, search_path: &str) -> Vec<String> {
71        if search_path == "$" {
72            return self.properties.keys().cloned().collect();
73        }
74
75        self.properties
76            .iter()
77            .flat_map(|(_, property)| self.collect_keys(property, search_path))
78            .collect()
79    }
80
81    pub fn keys(&self) -> String {
82        self.properties
83            .keys()
84            .cloned()
85            .collect::<Vec<String>>()
86            .join(", ")
87    }
88
89    #[allow(clippy::only_used_in_recursion)]
90    fn collect_keys(&self, property: &Property, search_path: &str) -> Vec<String> {
91        let mut keys = vec![];
92
93        if let Some(ref actual_path) = property.path {
94            if actual_path == search_path {
95                if let Some(nested_properties) = &property.properties {
96                    keys.extend(nested_properties.keys().cloned());
97                }
98                if let Some(nested_items) = &property.items {
99                    if let Some(nested_properties) = &nested_items.properties {
100                        keys.extend(nested_properties.keys().cloned());
101                    }
102                }
103                return keys;
104            }
105        }
106
107        if let Some(nested_properties) = &property.properties {
108            keys.extend(
109                nested_properties
110                    .iter()
111                    .flat_map(|(_, nested_property)| {
112                        self.collect_keys(nested_property, search_path)
113                    })
114                    .collect::<Vec<String>>(),
115            );
116        }
117        if let Some(nested_items) = &property.items {
118            keys.extend(self.collect_keys(nested_items, search_path));
119        }
120
121        keys
122    }
123
124    pub fn remove_expandables(mut self) -> JsonSchema {
125        self.properties.retain(|name, value| {
126            let retain = !matches!(value.r#type.as_str(), "array" | "object");
127
128            if !retain {
129                if let Some(ref mut required) = self.required {
130                    required.retain(|n| n != name);
131                }
132            }
133
134            retain
135        });
136        self
137    }
138
139    pub fn remove_primitives(mut self) -> JsonSchema {
140        self.properties.retain(|name, value| {
141            let retain = matches!(value.r#type.as_str(), "array" | "object");
142
143            if !retain {
144                if let Some(ref mut required) = self.required {
145                    required.retain(|n| n != name);
146                }
147            }
148
149            retain
150        });
151        self
152    }
153
154    pub fn flatten(mut self) -> JsonSchema {
155        self.properties
156            .iter_mut()
157            .for_each(|(_, value)| match value.r#type.as_str() {
158                "array" => {
159                    value.properties = None;
160                    value.items = None;
161                }
162                "object" => {
163                    value.properties = None;
164                    value.items = None;
165                }
166                _ => {}
167            });
168
169        self
170    }
171
172    pub fn extract_expandables(&self) -> Vec<JsonSchema> {
173        let mut schemas = vec![];
174
175        for (k, v) in &self.properties {
176            let path = format!("$.{k}");
177
178            match v.r#type.as_str() {
179                "array" => schemas.push(JsonSchema {
180                    type_name: v.r#type.clone(),
181                    properties: v.properties.clone().unwrap_or_default(),
182                    required: None,
183                    path: Some(path),
184                    items: None,
185                }),
186                "object" => schemas.push(JsonSchema {
187                    type_name: v.r#type.clone(),
188                    properties: v.properties.clone().unwrap_or_default(),
189                    required: None,
190                    path: Some(path),
191                    items: None,
192                }),
193                _ => {}
194            }
195        }
196
197        schemas
198    }
199
200    pub fn insert(&mut self, name: String, r#type: String, path: String) {
201        self.properties.insert(
202            name,
203            Property {
204                r#type,
205                path: Some(path),
206                description: None,
207                properties: None,
208                items: None,
209                r#enum: None,
210            },
211        );
212    }
213}
214
215impl TryFrom<CommonModel> for JsonSchema {
216    type Error = IntegrationOSError;
217
218    fn try_from(common_model: CommonModel) -> std::prelude::v1::Result<Self, Self::Error> {
219        let mut properties = HashMap::new();
220        for field in common_model.fields {
221            properties.insert(field.name, field.datatype.try_into()?);
222        }
223
224        Ok(JsonSchema {
225            type_name: "object".to_string(),
226            properties,
227            required: None,
228            path: None,
229            items: None,
230        })
231    }
232}
233
234#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
235#[cfg_attr(feature = "dummy", derive(fake::Dummy))]
236pub struct Property {
237    #[serde(rename = "type")]
238    pub r#type: String,
239    #[serde(skip_serializing_if = "Option::is_none")]
240    pub path: Option<String>,
241    #[serde(skip_serializing_if = "Option::is_none")]
242    pub description: Option<String>,
243    #[serde(skip_serializing_if = "Option::is_none")]
244    #[cfg_attr(feature = "dummy", dummy(default))]
245    pub properties: Option<HashMap<String, Property>>,
246    #[serde(skip_serializing_if = "Option::is_none")]
247    #[cfg_attr(feature = "dummy", dummy(default))]
248    pub items: Option<Box<Property>>,
249    #[serde(skip_serializing_if = "Option::is_none")]
250    #[cfg_attr(feature = "dummy", dummy(default))]
251    pub r#enum: Option<Vec<String>>,
252}
253
254impl Property {
255    pub fn new(r#type: &str, desc: Option<&str>) -> Self {
256        Self {
257            r#type: r#type.to_string(),
258            path: None,
259            description: desc.map(|d| d.to_string()),
260            properties: None,
261            items: None,
262            r#enum: None,
263        }
264    }
265
266    pub fn retain_recursive(&mut self, name: &str, map: &HashMap<String, Field>) -> bool {
267        match self.r#type.as_str() {
268            "object" => {
269                let Some(ref mut props) = self.properties else {
270                    return true;
271                };
272                props.retain(|sub_name, prop| {
273                    if let Some(Field::Object { fields, .. }) = map.get(name) {
274                        prop.retain_recursive(sub_name, fields)
275                    } else if let Some(Field::Array { items, .. }) = map.get(name) {
276                        if let Field::Object { fields, .. } = items.as_ref() {
277                            prop.retain_recursive(sub_name, fields)
278                        } else {
279                            true
280                        }
281                    } else {
282                        true
283                    }
284                });
285                !props.is_empty()
286            }
287            "array" => {
288                if let Some(ref mut items) = self.items {
289                    items.retain_recursive(name, map)
290                } else {
291                    true
292                }
293            }
294            _ => !map.contains_key(name),
295        }
296    }
297}
298
299impl TryFrom<DataType> for Property {
300    type Error = IntegrationOSError;
301
302    fn try_from(data_type: DataType) -> std::prelude::v1::Result<Self, Self::Error> {
303        match data_type {
304            DataType::String => Ok(Property::new("string", None)),
305            DataType::Number => Ok(Property::new("number", None)),
306            DataType::Boolean => Ok(Property::new("boolean", None)),
307            DataType::Date => Ok(Property::new("number", None)),
308            DataType::Enum { options, .. } => {
309                let options = options
310                    .unwrap_or_default()
311                    .into_iter()
312                    .map(|o| o.to_string())
313                    .collect::<Vec<String>>();
314                Ok(Property {
315                    r#type: "string".to_string(),
316                    path: None,
317                    description: None,
318                    properties: None,
319                    items: None,
320                    r#enum: Some(options),
321                })
322            }
323
324            DataType::Expandable(expandable) => match expandable {
325                Expandable::Expanded { model, .. } => {
326                    let mut map = HashMap::new();
327                    for field in model.fields {
328                        map.insert(field.name, field.datatype.try_into()?);
329                    }
330                    Ok(Property {
331                        r#type: "object".to_string(),
332                        path: None,
333                        description: None,
334                        properties: Some(map),
335                        items: None,
336                        r#enum: None,
337                    })
338                }
339                _ => Ok(Property {
340                    r#type: "object".to_string(),
341                    path: None,
342                    description: None,
343                    properties: None,
344                    items: None,
345                    r#enum: None,
346                }),
347            },
348            DataType::Array { element_type } => Ok(Property {
349                r#type: "array".to_string(),
350                path: None,
351                description: None,
352                properties: None,
353                items: Some(Box::new(Property::try_from(*element_type)?)),
354                r#enum: None,
355            }),
356            DataType::Unknown => Ok(Property {
357                r#type: "unknown".to_string(),
358                path: None,
359                description: None,
360                properties: None,
361                items: None,
362                r#enum: None,
363            }),
364        }
365    }
366}
367
368pub fn generate_schema(input: &Value, json_path: &str) -> Value {
369    match input {
370        Value::Object(map) => {
371            let mut properties: Map<String, Value> = Map::new();
372
373            for (key, value) in map.iter() {
374                let new_path = format!("{}.{}", json_path, key);
375                properties.insert(key.to_string(), generate_value_schema(value, &new_path));
376            }
377
378            json!({
379                "type": "object",
380                "path": json_path,
381                "properties": properties,
382            })
383        }
384        Value::Array(arr) => {
385            let item_schema = if let Some(item) = arr.first() {
386                generate_value_schema(item, &format!("{}[0]", json_path))
387            } else {
388                json!({ "type": "unknown", "path": format!("{}[0]", json_path) })
389            };
390
391            json!({
392                "type": "array",
393                "path": json_path,
394                "items": item_schema,
395            })
396        }
397        _ => json!({
398            "type": "unknown",
399            "path": json_path,
400        }),
401    }
402}
403
404pub fn extract_flat_primitive_keys(input: &Value) -> Value {
405    // Initialize a JSON Schema object
406    let mut properties: Map<String, Value> = Map::new();
407
408    // Iterate over the object's keys and values if it's an object
409    if let Value::Object(map) = input {
410        for (key, value) in map.iter() {
411            // Determine the type based on the JSON value
412            let type_name = match value {
413                Value::String(_) => "string",
414                Value::Number(_) => "number",
415                Value::Bool(_) => "boolean",
416                Value::Null => "unknown",
417                _ => continue, // Skip non-primitive types
418            };
419
420            // Add the property to the schema
421            properties.insert(
422                key.to_string(),
423                json!({ "type": type_name, "path": format!("$.{}", key) }),
424            );
425        }
426    }
427
428    // Return the final JSON Schema object
429    json!({
430        "type": "object",
431        "properties": properties,
432    })
433}
434
435pub fn extract_nested_keys(input: &Value, json_path: &str) -> Value {
436    let mut properties: Map<String, Value> = Map::new();
437
438    if let Value::Object(map) = input {
439        for (key, value) in map.iter() {
440            let new_path = format!("{}.{}", json_path, key);
441
442            // Check for objects or arrays
443            match value {
444                Value::Object(_) => {
445                    properties.insert(
446                        key.to_string(),
447                        json!({ "type": "object", "path": new_path }),
448                    );
449                }
450                Value::Array(_) => {
451                    properties.insert(
452                        key.to_string(),
453                        json!({ "type": "array", "path": new_path }),
454                    );
455                }
456                _ => continue,
457            };
458        }
459    }
460
461    json!({
462        "type": "object",
463        "properties": properties,
464    })
465}
466
467pub fn generate_value_schema(value: &Value, json_path: &str) -> Value {
468    match value {
469        Value::String(_) => json!({ "type": "string", "path": json_path }),
470        Value::Number(_) => json!({ "type": "number", "path": json_path }),
471        Value::Bool(_) => json!({ "type": "boolean", "path": json_path }),
472        Value::Null => json!({ "type": "unknown", "path": json_path }),
473        Value::Object(_) => generate_schema(value, json_path),
474        Value::Array(arr) => {
475            let item_schema = if let Some(item) = arr.first() {
476                generate_value_schema(item, &format!("{}[0]", json_path))
477            } else {
478                json!({ "type": "unknown", "path": format!("{}[0]", json_path) })
479            };
480
481            json!({
482                "type": "array",
483                "path": json_path,
484                "items": item_schema,
485            })
486        }
487    }
488}
489
490#[cfg(test)]
491mod tests {
492    use super::*;
493
494    use serde_json::json;
495    use tracing::{info, metadata::LevelFilter};
496    use tracing_subscriber::EnvFilter;
497
498    #[ignore]
499    #[test]
500    fn test_keys_at_path() {
501        let filter = EnvFilter::builder()
502            .with_default_directive(LevelFilter::DEBUG.into())
503            .from_env_lossy();
504        tracing_subscriber::fmt().with_env_filter(filter).init();
505
506        let schema_json = json!({
507            "type": "object",
508            "properties": {
509                "name": { "type": "string", "path": "$.name" },
510                "age": { "type": "number", "path": "$.age" },
511                "email": { "type": "string", "path": "$.email" },
512                "address": {
513                    "type": "object",
514                    "path": "$.address",
515                    "properties": {
516                        "street": { "type": "string", "path": "$.address.street" },
517                        "city": { "type": "string", "path": "$.address.city" },
518                        "state": { "type": "string", "path": "$.address.state" },
519                        "postalCode": { "type": "string", "path": "$.address.postalCode" }
520                    }
521                },
522                "phoneNumbers": {
523                    "type": "array",
524                    "path": "$.phoneNumbers",
525                    "items": {
526                        "type": "object",
527                        "path": "$.phoneNumbers",
528                        "properties": {
529                            "type": { "type": "string", "path": "$.phoneNumbers.type" },
530                            "number": { "type": "string", "path": "$.phoneNumbers.number" }
531                        }
532                    }
533                },
534                "emails": {
535                    "type": "array",
536                    "path": "$.emails",
537                    "items": {
538                        "type": "string",
539                        "path": "$.emails[0]"
540                    }
541                }
542            }
543        });
544
545        // Deserialize the JSON value into a JsonSchema instance
546        let schema: JsonSchema = serde_json::from_value(schema_json).unwrap();
547
548        let path_to_search = "$";
549        let keys = schema.keys_at_path(path_to_search);
550
551        info!("Keys: {:#?}", keys);
552
553        // Check that the keys for the given path are returned
554        // assert_eq!(keys, vec!["name".to_string(), "age".to_string()]);
555    }
556
557    #[ignore]
558    #[test]
559    fn test_generate_schema_on_object() {
560        let filter = EnvFilter::builder()
561            .with_default_directive(LevelFilter::DEBUG.into())
562            .from_env_lossy();
563        tracing_subscriber::fmt().with_env_filter(filter).init();
564
565        let input = json!({
566            "name": "John",
567            "profile": { "city": "New York", "age": 25 },
568            "scores": [10, 20, 30],
569            "address": {
570                "city": "New York",
571                "postalCodes": [10001, 10002]
572            },
573            "phoneNumbers": [
574                {
575                    "type": "home",
576                    "number": "212 555-1234"
577                },
578                {
579                    "type": "office",
580                    "number": "646 555-4567"
581                }
582            ],
583            "nullValue": null,
584            "emptyArray": [],
585        });
586
587        let json_path = "$";
588
589        let result = generate_schema(&input, json_path);
590        info!("result: {:#?}", result);
591
592        // assert_eq!(result, expected_output);
593    }
594
595    #[ignore]
596    #[test]
597    fn test_generate_schema_on_array() {
598        let filter = EnvFilter::builder()
599            .with_default_directive(LevelFilter::DEBUG.into())
600            .from_env_lossy();
601        tracing_subscriber::fmt().with_env_filter(filter).init();
602
603        let input = json!([{
604            "channel_id": 1,
605            "enabled_currencies": [
606                "USD"
607            ],
608            "default_currency": "USD",
609            "meta": {
610                "responseJSON": {
611                    "data": [
612                        {
613                            "channel_id": 1,
614                            "enabled_currencies": [
615                                "USD"
616                            ],
617                            "default_currency": "USD"
618                        },
619                        {
620                            "channel_id": 664177,
621                            "enabled_currencies": [
622                                "USD",
623                                "GBP"
624                            ],
625                            "default_currency": "USD"
626                        },
627                        {
628                            "channel_id": 664179,
629                            "enabled_currencies": [
630                                "USD",
631                                "AUD"
632                            ],
633                            "default_currency": "USD"
634                        },
635                        {
636                            "channel_id": 667159,
637                            "enabled_currencies": [
638                                "USD"
639                            ],
640                            "default_currency": "USD"
641                        }
642                    ]
643                }
644            }
645        }]);
646
647        let json_path = "$";
648
649        let result = generate_schema(&input, json_path);
650        info!("result: {:#?}", result);
651
652        // assert_eq!(result, expected_output);
653    }
654
655    #[ignore]
656    #[test]
657    fn test_extract_flat_primitive_keys() {
658        let filter = EnvFilter::builder()
659            .with_default_directive(LevelFilter::DEBUG.into())
660            .from_env_lossy();
661        tracing_subscriber::fmt().with_env_filter(filter).init();
662
663        let input = json!({
664            "name": "John",
665            "age": 30,
666            "is_student": false,
667            "score": null,
668            "address": {
669                "city": "New York",
670                "postalCodes": [10001, 10002]
671            },
672        });
673
674        let _expected_output = json!({
675            "type": "object",
676            "properties": {
677                "name": { "type": "string", "path": "$.name" },
678                "age": { "type": "number", "path": "$.age" },
679                "is_student": { "type": "boolean", "path": "$.is_student" },
680                "score": { "type": "null", "path": "$.score" },
681            }
682        });
683
684        let result = extract_flat_primitive_keys(&input);
685        info!("result: {:#?}", result);
686    }
687
688    #[ignore]
689    #[test]
690    fn test_extract_nested_keys() {
691        let filter = EnvFilter::builder()
692            .with_default_directive(LevelFilter::DEBUG.into())
693            .from_env_lossy();
694        tracing_subscriber::fmt().with_env_filter(filter).init();
695
696        let input = json!({
697            "name": "John",
698            "profile": { "city": "New York" },
699            "scores": [10, 20, 30]
700        });
701
702        let json_path = "$";
703
704        let _expected_output = json!({
705            "type": "object",
706            "properties": {
707                "profile": { "type": "object", "path": "$.profile" },
708                "scores": { "type": "array", "path": "$.scores" }
709            }
710        });
711
712        let result = extract_nested_keys(&input, json_path);
713        info!("result: {:#?}", result);
714    }
715
716    #[test]
717    fn test_schemars() {
718        use schemars::schema_for;
719        use schemars::JsonSchema;
720
721        #[derive(Debug, Serialize, Deserialize, JsonSchema)]
722        struct Response {
723            /// The map array for all fields
724            map: Vec<Map>,
725
726            /// The comments for the map
727            comments: String,
728
729            /// The potential issues with the map
730            potential_issues: String,
731        }
732
733        #[derive(Debug, Serialize, Deserialize, JsonSchema)]
734        struct Map {
735            /// The name of the field in the source model
736            source_field_name: String,
737
738            /// The name of the field in the destination model, empty if no match found
739            destination_field_name: String,
740
741            /// Whether a match was found or not
742            match_found: bool,
743
744            /// The confidence score, a number between 0 and 1.
745            confidence_score: f64,
746
747            /// The transformation function needed, if not needed then identity.
748            source_to_destination_transformation: String,
749
750            /// The transformation function needed, if not needed then identity.
751            destination_to_source_transformation: String,
752
753            /// The reasoning for the match
754            reasoning: String,
755
756            /// The potential issues with this mapping when confidenceScore is low, this could be empty, or a text to explain potential issues with the map or the transformation function
757            potential_issues: String,
758        }
759
760        let schema = schema_for!(Response);
761
762        println!("{:#?}", serde_json::to_value(&schema).unwrap());
763    }
764}