1use crate::types::{Field, MoldError, NestedType, ObjectType, Schema, SchemaType};
2use crate::utils::{path_to_type_name, to_pascal_case};
3use anyhow::Result;
4use serde_json::Value;
5use std::collections::HashSet;
6use std::path::Path;
7
8pub fn parse_json_file(path: &Path, name: Option<&str>, flat_mode: bool) -> Result<Schema> {
10 let content = std::fs::read_to_string(path)?;
11 let root_name = name
12 .map(|s| to_pascal_case(s))
13 .unwrap_or_else(|| to_pascal_case(crate::utils::get_file_stem(path).as_str()));
14
15 parse_json_string(&content, &root_name, flat_mode)
16}
17
18pub fn parse_json_string(json: &str, name: &str, flat_mode: bool) -> Result<Schema> {
20 let value: Value = serde_json::from_str(json)?;
21 parse_json_value(&value, name, flat_mode)
22}
23
24pub fn parse_json_value(value: &Value, name: &str, flat_mode: bool) -> Result<Schema> {
26 let mut nested_types = Vec::new();
27 let mut path = vec![name.to_string()];
28
29 let root_type = if flat_mode {
30 infer_type_flat(value)
31 } else {
32 infer_type_with_extraction(value, &mut path, &mut nested_types)
33 };
34
35 if !matches!(root_type, SchemaType::Object(_)) {
37 return Err(MoldError::InvalidRoot(format!("{:?}", value)).into());
38 }
39
40 Ok(Schema::new(name, root_type).with_nested_types(nested_types))
41}
42
43fn infer_type_flat(value: &Value) -> SchemaType {
45 match value {
46 Value::Null => SchemaType::Null,
47 Value::Bool(_) => SchemaType::Boolean,
48 Value::Number(n) => {
49 if n.is_i64() || n.is_u64() {
50 if let Some(f) = n.as_f64() {
52 if f.fract() == 0.0 {
53 return SchemaType::Integer;
54 }
55 }
56 }
57 SchemaType::Number
58 }
59 Value::String(_) => SchemaType::String,
60 Value::Array(arr) => {
61 if arr.is_empty() {
62 SchemaType::Array(Box::new(SchemaType::Any))
63 } else {
64 let types: Vec<SchemaType> = arr.iter().map(infer_type_flat).collect();
65 let unified = unify_types(&types);
66 SchemaType::Array(Box::new(unified))
67 }
68 }
69 Value::Object(obj) => {
70 let fields: Vec<Field> = obj
71 .iter()
72 .map(|(key, val)| Field::new(key.clone(), infer_type_flat(val)))
73 .collect();
74 SchemaType::Object(ObjectType::new(fields))
75 }
76 }
77}
78
79fn infer_type_with_extraction(
81 value: &Value,
82 path: &mut Vec<String>,
83 nested_types: &mut Vec<NestedType>,
84) -> SchemaType {
85 match value {
86 Value::Null => SchemaType::Null,
87 Value::Bool(_) => SchemaType::Boolean,
88 Value::Number(n) => {
89 if n.is_i64() || n.is_u64() {
90 if let Some(f) = n.as_f64() {
91 if f.fract() == 0.0 {
92 return SchemaType::Integer;
93 }
94 }
95 }
96 SchemaType::Number
97 }
98 Value::String(_) => SchemaType::String,
99 Value::Array(arr) => {
100 if arr.is_empty() {
101 SchemaType::Array(Box::new(SchemaType::Any))
102 } else {
103 let types: Vec<SchemaType> = arr
105 .iter()
106 .map(|val| {
107 if val.is_object() {
108 path.push("Item".to_string());
110 let t = infer_type_with_extraction(val, path, nested_types);
111 path.pop();
112 t
113 } else {
114 infer_type_with_extraction(val, path, nested_types)
115 }
116 })
117 .collect();
118 let unified = unify_types(&types);
119 SchemaType::Array(Box::new(unified))
120 }
121 }
122 Value::Object(obj) => {
123 let fields: Vec<Field> = obj
124 .iter()
125 .map(|(key, val)| {
126 let field_type = if val.is_object() && !val.as_object().unwrap().is_empty() {
127 path.push(key.clone());
129 let nested_type =
130 infer_type_with_extraction(val, path, nested_types);
131
132 if let SchemaType::Object(ref obj_type) = nested_type {
134 let type_name = path_to_type_name(path);
135 nested_types.push(NestedType::new(type_name.clone(), obj_type.clone()));
136 path.pop();
137 return Field::new(key.clone(), SchemaType::Object(obj_type.clone()));
141 }
142 path.pop();
143 nested_type
144 } else {
145 infer_type_with_extraction(val, path, nested_types)
146 };
147 Field::new(key.clone(), field_type)
148 })
149 .collect();
150 SchemaType::Object(ObjectType::new(fields))
151 }
152 }
153}
154
155fn unify_types(types: &[SchemaType]) -> SchemaType {
157 if types.is_empty() {
158 return SchemaType::Any;
159 }
160
161 let unique: Vec<&SchemaType> = {
163 let mut seen = HashSet::new();
164 types
165 .iter()
166 .filter(|t| {
167 let key = format!("{:?}", t);
168 seen.insert(key)
169 })
170 .collect()
171 };
172
173 if unique.len() == 1 {
174 return unique[0].clone();
175 }
176
177 let has_integer = unique.iter().any(|t| matches!(t, SchemaType::Integer));
179 let has_number = unique.iter().any(|t| matches!(t, SchemaType::Number));
180 if has_integer && has_number {
181 let filtered: Vec<SchemaType> = unique
182 .iter()
183 .filter(|t| !matches!(t, SchemaType::Integer))
184 .map(|t| (*t).clone())
185 .collect();
186 if filtered.len() == 1 {
187 return filtered[0].clone();
188 }
189 return SchemaType::Union(filtered);
190 }
191
192 SchemaType::Union(unique.iter().map(|t| (*t).clone()).collect())
194}
195
196#[cfg(test)]
197mod tests {
198 use super::*;
199
200 #[test]
201 fn test_parse_simple_object() {
202 let json = r#"{"name": "John", "age": 30}"#;
203 let schema = parse_json_string(json, "User", true).unwrap();
204
205 assert_eq!(schema.name, "User");
206 if let SchemaType::Object(obj) = &schema.root_type {
207 assert_eq!(obj.fields.len(), 2);
208 } else {
209 panic!("Expected Object type");
210 }
211 }
212
213 #[test]
214 fn test_infer_integer_vs_number() {
215 let int_json = r#"{"count": 42}"#;
216 let float_json = r#"{"price": 19.99}"#;
217
218 let int_schema = parse_json_string(int_json, "Test", true).unwrap();
219 let float_schema = parse_json_string(float_json, "Test", true).unwrap();
220
221 if let SchemaType::Object(obj) = &int_schema.root_type {
222 assert_eq!(obj.fields[0].field_type, SchemaType::Integer);
223 }
224
225 if let SchemaType::Object(obj) = &float_schema.root_type {
226 assert_eq!(obj.fields[0].field_type, SchemaType::Number);
227 }
228 }
229
230 #[test]
231 fn test_parse_array() {
232 let json = r#"{"tags": ["a", "b", "c"]}"#;
233 let schema = parse_json_string(json, "Test", true).unwrap();
234
235 if let SchemaType::Object(obj) = &schema.root_type {
236 if let SchemaType::Array(inner) = &obj.fields[0].field_type {
237 assert_eq!(**inner, SchemaType::String);
238 } else {
239 panic!("Expected Array type");
240 }
241 }
242 }
243
244 #[test]
245 fn test_parse_empty_array() {
246 let json = r#"{"items": []}"#;
247 let schema = parse_json_string(json, "Test", true).unwrap();
248
249 if let SchemaType::Object(obj) = &schema.root_type {
250 if let SchemaType::Array(inner) = &obj.fields[0].field_type {
251 assert_eq!(**inner, SchemaType::Any);
252 } else {
253 panic!("Expected Array type");
254 }
255 }
256 }
257
258 #[test]
259 fn test_parse_mixed_array() {
260 let json = r#"{"mixed": [1, "two", true]}"#;
261 let schema = parse_json_string(json, "Test", true).unwrap();
262
263 if let SchemaType::Object(obj) = &schema.root_type {
264 if let SchemaType::Array(inner) = &obj.fields[0].field_type {
265 assert!(matches!(**inner, SchemaType::Union(_)));
266 } else {
267 panic!("Expected Array type");
268 }
269 }
270 }
271
272 #[test]
273 fn test_nested_extraction() {
274 let json = r#"{
275 "user": {
276 "profile": {
277 "name": "John"
278 }
279 }
280 }"#;
281 let schema = parse_json_string(json, "Root", false).unwrap();
282
283 assert!(!schema.nested_types.is_empty());
285 }
286}