mold_cli/parser/
json.rs

1use crate::types::{Field, MoldError, NestedType, ObjectType, Schema, SchemaType};
2use crate::utils::{path_to_type_name, to_pascal_case};
3use anyhow::Result;
4use serde_json::Value;
5use std::collections::HashSet;
6use std::path::Path;
7
8/// Parse a JSON file into a Schema
9pub fn parse_json_file(path: &Path, name: Option<&str>, flat_mode: bool) -> Result<Schema> {
10    let content = std::fs::read_to_string(path)?;
11    let root_name = name
12        .map(|s| to_pascal_case(s))
13        .unwrap_or_else(|| to_pascal_case(crate::utils::get_file_stem(path).as_str()));
14
15    parse_json_string(&content, &root_name, flat_mode)
16}
17
18/// Parse a JSON string into a Schema
19pub fn parse_json_string(json: &str, name: &str, flat_mode: bool) -> Result<Schema> {
20    let value: Value = serde_json::from_str(json)?;
21    parse_json_value(&value, name, flat_mode)
22}
23
24/// Parse a JSON Value into a Schema
25pub fn parse_json_value(value: &Value, name: &str, flat_mode: bool) -> Result<Schema> {
26    let mut nested_types = Vec::new();
27    let mut path = vec![name.to_string()];
28
29    let root_type = if flat_mode {
30        infer_type_flat(value)
31    } else {
32        infer_type_with_extraction(value, &mut path, &mut nested_types)
33    };
34
35    // Ensure root is an object
36    if !matches!(root_type, SchemaType::Object(_)) {
37        return Err(MoldError::InvalidRoot(format!("{:?}", value)).into());
38    }
39
40    Ok(Schema::new(name, root_type).with_nested_types(nested_types))
41}
42
43/// Infer type from JSON value (flat mode - no extraction)
44fn infer_type_flat(value: &Value) -> SchemaType {
45    match value {
46        Value::Null => SchemaType::Null,
47        Value::Bool(_) => SchemaType::Boolean,
48        Value::Number(n) => {
49            if n.is_i64() || n.is_u64() {
50                // Check if it's a whole number
51                if let Some(f) = n.as_f64() {
52                    if f.fract() == 0.0 {
53                        return SchemaType::Integer;
54                    }
55                }
56            }
57            SchemaType::Number
58        }
59        Value::String(_) => SchemaType::String,
60        Value::Array(arr) => {
61            if arr.is_empty() {
62                SchemaType::Array(Box::new(SchemaType::Any))
63            } else {
64                let types: Vec<SchemaType> = arr.iter().map(infer_type_flat).collect();
65                let unified = unify_types(&types);
66                SchemaType::Array(Box::new(unified))
67            }
68        }
69        Value::Object(obj) => {
70            let fields: Vec<Field> = obj
71                .iter()
72                .map(|(key, val)| Field::new(key.clone(), infer_type_flat(val)))
73                .collect();
74            SchemaType::Object(ObjectType::new(fields))
75        }
76    }
77}
78
79/// Infer type from JSON value with nested type extraction
80fn infer_type_with_extraction(
81    value: &Value,
82    path: &mut Vec<String>,
83    nested_types: &mut Vec<NestedType>,
84) -> SchemaType {
85    match value {
86        Value::Null => SchemaType::Null,
87        Value::Bool(_) => SchemaType::Boolean,
88        Value::Number(n) => {
89            if n.is_i64() || n.is_u64() {
90                if let Some(f) = n.as_f64() {
91                    if f.fract() == 0.0 {
92                        return SchemaType::Integer;
93                    }
94                }
95            }
96            SchemaType::Number
97        }
98        Value::String(_) => SchemaType::String,
99        Value::Array(arr) => {
100            if arr.is_empty() {
101                SchemaType::Array(Box::new(SchemaType::Any))
102            } else {
103                // For arrays, we need to handle object extraction differently
104                let types: Vec<SchemaType> = arr
105                    .iter()
106                    .map(|val| {
107                        if val.is_object() {
108                            // For objects in arrays, use singular form of parent + "Item"
109                            path.push("Item".to_string());
110                            let t = infer_type_with_extraction(val, path, nested_types);
111                            path.pop();
112                            t
113                        } else {
114                            infer_type_with_extraction(val, path, nested_types)
115                        }
116                    })
117                    .collect();
118                let unified = unify_types(&types);
119                SchemaType::Array(Box::new(unified))
120            }
121        }
122        Value::Object(obj) => {
123            let fields: Vec<Field> = obj
124                .iter()
125                .map(|(key, val)| {
126                    let field_type = if val.is_object() && !val.as_object().unwrap().is_empty() {
127                        // This is a nested object - extract it
128                        path.push(key.clone());
129                        let nested_type =
130                            infer_type_with_extraction(val, path, nested_types);
131
132                        // Extract to nested_types if it's an object
133                        if let SchemaType::Object(ref obj_type) = nested_type {
134                            let type_name = path_to_type_name(path);
135                            nested_types.push(NestedType::new(type_name.clone(), obj_type.clone()));
136                            path.pop();
137                            // Return a reference to the extracted type
138                            // We'll use Object with empty fields as a marker, and store the name
139                            // Actually, let's create a special handling for this
140                            return Field::new(key.clone(), SchemaType::Object(obj_type.clone()));
141                        }
142                        path.pop();
143                        nested_type
144                    } else {
145                        infer_type_with_extraction(val, path, nested_types)
146                    };
147                    Field::new(key.clone(), field_type)
148                })
149                .collect();
150            SchemaType::Object(ObjectType::new(fields))
151        }
152    }
153}
154
155/// Unify multiple types into a single type
156fn unify_types(types: &[SchemaType]) -> SchemaType {
157    if types.is_empty() {
158        return SchemaType::Any;
159    }
160
161    // Deduplicate types
162    let unique: Vec<&SchemaType> = {
163        let mut seen = HashSet::new();
164        types
165            .iter()
166            .filter(|t| {
167                let key = format!("{:?}", t);
168                seen.insert(key)
169            })
170            .collect()
171    };
172
173    if unique.len() == 1 {
174        return unique[0].clone();
175    }
176
177    // If we have Integer and Number, prefer Number
178    let has_integer = unique.iter().any(|t| matches!(t, SchemaType::Integer));
179    let has_number = unique.iter().any(|t| matches!(t, SchemaType::Number));
180    if has_integer && has_number {
181        let filtered: Vec<SchemaType> = unique
182            .iter()
183            .filter(|t| !matches!(t, SchemaType::Integer))
184            .map(|t| (*t).clone())
185            .collect();
186        if filtered.len() == 1 {
187            return filtered[0].clone();
188        }
189        return SchemaType::Union(filtered);
190    }
191
192    // Multiple different types - create a union
193    SchemaType::Union(unique.iter().map(|t| (*t).clone()).collect())
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    #[test]
201    fn test_parse_simple_object() {
202        let json = r#"{"name": "John", "age": 30}"#;
203        let schema = parse_json_string(json, "User", true).unwrap();
204
205        assert_eq!(schema.name, "User");
206        if let SchemaType::Object(obj) = &schema.root_type {
207            assert_eq!(obj.fields.len(), 2);
208        } else {
209            panic!("Expected Object type");
210        }
211    }
212
213    #[test]
214    fn test_infer_integer_vs_number() {
215        let int_json = r#"{"count": 42}"#;
216        let float_json = r#"{"price": 19.99}"#;
217
218        let int_schema = parse_json_string(int_json, "Test", true).unwrap();
219        let float_schema = parse_json_string(float_json, "Test", true).unwrap();
220
221        if let SchemaType::Object(obj) = &int_schema.root_type {
222            assert_eq!(obj.fields[0].field_type, SchemaType::Integer);
223        }
224
225        if let SchemaType::Object(obj) = &float_schema.root_type {
226            assert_eq!(obj.fields[0].field_type, SchemaType::Number);
227        }
228    }
229
230    #[test]
231    fn test_parse_array() {
232        let json = r#"{"tags": ["a", "b", "c"]}"#;
233        let schema = parse_json_string(json, "Test", true).unwrap();
234
235        if let SchemaType::Object(obj) = &schema.root_type {
236            if let SchemaType::Array(inner) = &obj.fields[0].field_type {
237                assert_eq!(**inner, SchemaType::String);
238            } else {
239                panic!("Expected Array type");
240            }
241        }
242    }
243
244    #[test]
245    fn test_parse_empty_array() {
246        let json = r#"{"items": []}"#;
247        let schema = parse_json_string(json, "Test", true).unwrap();
248
249        if let SchemaType::Object(obj) = &schema.root_type {
250            if let SchemaType::Array(inner) = &obj.fields[0].field_type {
251                assert_eq!(**inner, SchemaType::Any);
252            } else {
253                panic!("Expected Array type");
254            }
255        }
256    }
257
258    #[test]
259    fn test_parse_mixed_array() {
260        let json = r#"{"mixed": [1, "two", true]}"#;
261        let schema = parse_json_string(json, "Test", true).unwrap();
262
263        if let SchemaType::Object(obj) = &schema.root_type {
264            if let SchemaType::Array(inner) = &obj.fields[0].field_type {
265                assert!(matches!(**inner, SchemaType::Union(_)));
266            } else {
267                panic!("Expected Array type");
268            }
269        }
270    }
271
272    #[test]
273    fn test_nested_extraction() {
274        let json = r#"{
275            "user": {
276                "profile": {
277                    "name": "John"
278                }
279            }
280        }"#;
281        let schema = parse_json_string(json, "Root", false).unwrap();
282
283        // Should have extracted nested types
284        assert!(!schema.nested_types.is_empty());
285    }
286}