base_d/encoders/algorithms/schema/parsers/
json.rs

1use crate::encoders::algorithms::schema::fiche::NEST_SEP;
2use crate::encoders::algorithms::schema::parsers::InputParser;
3use crate::encoders::algorithms::schema::types::*;
4use serde_json::{Map, Value};
5use std::collections::HashMap;
6
7pub struct JsonParser;
8
9impl InputParser for JsonParser {
10    type Error = SchemaError;
11
12    fn parse(input: &str) -> Result<IntermediateRepresentation, Self::Error> {
13        let parsed: Value = serde_json::from_str(input).map_err(|e| {
14            SchemaError::InvalidInput(format!(
15                "Invalid JSON syntax: {}\n\
16                 Ensure the input is valid JSON.",
17                e
18            ))
19        })?;
20
21        match parsed {
22            Value::Array(arr) => parse_array(arr),
23            Value::Object(obj) => parse_object(obj),
24            _ => Err(SchemaError::InvalidInput(
25                "Expected JSON object or array at root level.\n\
26                 Schema encoding works with:\n\
27                 - Single object: {\"name\": \"value\"}\n\
28                 - Array of objects: [{\"id\": 1}, {\"id\": 2}]\n\
29                 - Object with array: {\"users\": [{\"id\": 1}]}"
30                    .to_string(),
31            )),
32        }
33    }
34}
35
36/// Parse array of objects (tabular data)
37fn parse_array(arr: Vec<Value>) -> Result<IntermediateRepresentation, SchemaError> {
38    if arr.is_empty() {
39        return Err(SchemaError::InvalidInput(
40            "Empty array - cannot infer schema from zero rows.\n\
41             Provide at least one object in the array."
42                .to_string(),
43        ));
44    }
45
46    let row_count = arr.len();
47    let mut all_rows: Vec<Map<String, Value>> = Vec::new();
48
49    // Extract objects from array
50    for (idx, item) in arr.into_iter().enumerate() {
51        match item {
52            Value::Object(obj) => all_rows.push(obj),
53            other => {
54                let type_name = match other {
55                    Value::Null => "null",
56                    Value::Bool(_) => "boolean",
57                    Value::Number(_) => "number",
58                    Value::String(_) => "string",
59                    Value::Array(_) => "array",
60                    Value::Object(_) => unreachable!(),
61                };
62                return Err(SchemaError::InvalidInput(format!(
63                    "Array must contain only objects (tabular data). Found {} at index {}.\n\
64                     Schema encoding expects arrays of objects like: [{{\"id\": 1}}, {{\"id\": 2}}]",
65                    type_name, idx
66                )));
67            }
68        }
69    }
70
71    // Flatten all objects and collect field names
72    let mut flattened_rows: Vec<HashMap<String, Value>> = Vec::new();
73    let mut all_field_names = std::collections::BTreeSet::new();
74    let mut array_markers = std::collections::BTreeSet::new();
75
76    for obj in &all_rows {
77        let flattened = flatten_object(obj, "");
78        for key in flattened.keys() {
79            if key.ends_with("[]") {
80                // This is an array marker, track it separately
81                array_markers.insert(key.clone());
82            } else {
83                all_field_names.insert(key.clone());
84            }
85        }
86        flattened_rows.push(flattened);
87    }
88
89    // Add array markers as fields with special marker type
90    let mut field_names: Vec<String> = all_field_names.into_iter().collect();
91    let array_marker_names: Vec<String> = array_markers.into_iter().collect();
92    field_names.extend(array_marker_names);
93
94    // Infer types and build fields
95    let mut fields = Vec::new();
96    let mut has_nulls = false;
97
98    for field_name in &field_names {
99        if field_name.ends_with("[]") {
100            // Array marker - use a special type to indicate this is metadata
101            fields.push(FieldDef::new(field_name.clone(), FieldType::Null));
102        } else {
103            let field_type = infer_field_type(&flattened_rows, field_name, &mut has_nulls)?;
104            fields.push(FieldDef::new(field_name.clone(), field_type));
105        }
106    }
107
108    // Build values and null bitmap
109    let mut values = Vec::new();
110    let total_values = row_count * fields.len();
111    let bitmap_bytes = total_values.div_ceil(8);
112    let mut null_bitmap = vec![0u8; bitmap_bytes];
113
114    for (row_idx, row) in flattened_rows.iter().enumerate() {
115        for (field_idx, field) in fields.iter().enumerate() {
116            let value_idx = row_idx * fields.len() + field_idx;
117
118            // Handle array markers - always null
119            if field.name.ends_with("[]") {
120                values.push(SchemaValue::Null);
121                set_null_bit(&mut null_bitmap, value_idx);
122                has_nulls = true;
123                continue;
124            }
125
126            if let Some(json_value) = row.get(&field.name)
127                && json_value.is_null()
128            {
129                values.push(SchemaValue::Null);
130                set_null_bit(&mut null_bitmap, value_idx);
131                has_nulls = true;
132            } else if let Some(json_value) = row.get(&field.name) {
133                values.push(json_to_schema_value(json_value, &field.field_type)?);
134            } else {
135                // Missing field = null
136                values.push(SchemaValue::Null);
137                set_null_bit(&mut null_bitmap, value_idx);
138                has_nulls = true;
139            }
140        }
141    }
142
143    // Build header
144    let mut header = SchemaHeader::new(row_count, fields);
145    if has_nulls {
146        header.null_bitmap = Some(null_bitmap);
147        header.set_flag(FLAG_HAS_NULLS);
148    }
149
150    IntermediateRepresentation::new(header, values)
151}
152
153/// Parse single object (may have root key)
154fn parse_object(obj: Map<String, Value>) -> Result<IntermediateRepresentation, SchemaError> {
155    // Check for common pagination wrapper keys
156    const WRAPPER_KEYS: &[&str] = &["results", "data", "items", "records"];
157
158    // Check for metadata pattern: scalar fields + one array field
159    let mut array_field: Option<(String, Vec<Value>)> = None;
160    let mut scalar_fields: std::collections::HashMap<String, String> =
161        std::collections::HashMap::new();
162
163    for (key, value) in &obj {
164        match value {
165            Value::Array(arr)
166                if !arr.is_empty() && arr.iter().all(|item| matches!(item, Value::Object(_))) =>
167            {
168                if array_field.is_none() {
169                    array_field = Some((key.clone(), arr.clone()));
170                } else {
171                    // Multiple arrays - not metadata pattern
172                    array_field = None;
173                    scalar_fields.clear();
174                    break;
175                }
176            }
177            Value::String(s) => {
178                scalar_fields.insert(key.clone(), s.clone());
179            }
180            Value::Number(n) => {
181                scalar_fields.insert(key.clone(), n.to_string());
182            }
183            Value::Bool(b) => {
184                scalar_fields.insert(key.clone(), b.to_string());
185            }
186            Value::Null => {
187                // Encode null metadata as ∅ symbol
188                scalar_fields.insert(key.clone(), "∅".to_string());
189            }
190            _ => {
191                // Non-scalar or nested object - not metadata pattern
192                scalar_fields.clear();
193                array_field = None;
194                break;
195            }
196        }
197    }
198
199    // If we have exactly one array field and at least one scalar field, extract metadata
200    if let Some((array_key, arr)) = array_field
201        && !scalar_fields.is_empty()
202    {
203        let mut ir = parse_array(arr)?;
204        ir.header.root_key = Some(array_key);
205        ir.header.set_flag(FLAG_HAS_ROOT_KEY);
206        ir.header.metadata = Some(scalar_fields);
207        return Ok(ir);
208    }
209
210    // Check if this is a wrapper object with one of the known keys
211    if obj.len() == 1 {
212        // Check if value is an array of objects before consuming
213        let is_root_key_pattern = obj
214            .values()
215            .next()
216            .map(|v| {
217                if let Value::Array(arr) = v {
218                    // Only treat as root key if array contains objects (tabular data)
219                    !arr.is_empty() && arr.iter().all(|item| matches!(item, Value::Object(_)))
220                } else {
221                    false
222                }
223            })
224            .unwrap_or(false);
225
226        if is_root_key_pattern {
227            // Extract key and value by consuming the map
228            let (key, value) = obj.into_iter().next().unwrap();
229            // We already checked it's an array
230            let arr = match value {
231                Value::Array(a) => a,
232                _ => unreachable!(),
233            };
234
235            // Parse as array with root key
236            let mut ir = parse_array(arr)?;
237            ir.header.root_key = Some(key);
238            ir.header.set_flag(FLAG_HAS_ROOT_KEY);
239            return Ok(ir);
240        }
241    }
242
243    // Check for known wrapper patterns and unwrap them
244    for wrapper_key in WRAPPER_KEYS {
245        if let Some(Value::Array(arr)) = obj.get(*wrapper_key)
246            && !arr.is_empty()
247            && arr.iter().all(|item| matches!(item, Value::Object(_)))
248        {
249            // Found a wrapper key - unwrap and parse the array
250            let arr = arr.clone();
251            let mut ir = parse_array(arr)?;
252            ir.header.root_key = Some((*wrapper_key).to_string());
253            ir.header.set_flag(FLAG_HAS_ROOT_KEY);
254            return Ok(ir);
255        }
256    }
257
258    // Single object - treat as single row
259    let flattened = flatten_object(&obj, "");
260    // Preserve field order from original object (serde_json preserves insertion order)
261    let mut field_names = Vec::new();
262    let mut array_markers = Vec::new();
263    collect_field_names_ordered(&obj, "", &mut field_names);
264
265    // Separate array markers from regular fields
266    let mut regular_fields = Vec::new();
267    for name in field_names {
268        if name.ends_with("[]") {
269            array_markers.push(name);
270        } else {
271            regular_fields.push(name);
272        }
273    }
274    // Add array markers at the end
275    regular_fields.extend(array_markers);
276    let field_names = regular_fields;
277
278    let mut fields = Vec::new();
279    let mut has_nulls = false;
280
281    for field_name in &field_names {
282        if field_name.ends_with("[]") {
283            // Array marker
284            fields.push(FieldDef::new(field_name.clone(), FieldType::Null));
285            has_nulls = true;
286        } else {
287            let value = &flattened[field_name];
288            let field_type = infer_type(value);
289            if value.is_null() {
290                has_nulls = true;
291            }
292            fields.push(FieldDef::new(field_name.clone(), field_type));
293        }
294    }
295
296    // Build values and null bitmap
297    let mut values = Vec::new();
298    let total_values = fields.len();
299    let bitmap_bytes = total_values.div_ceil(8);
300    let mut null_bitmap = vec![0u8; bitmap_bytes];
301
302    for (field_idx, field) in fields.iter().enumerate() {
303        // Handle array markers
304        if field.name.ends_with("[]") {
305            values.push(SchemaValue::Null);
306            set_null_bit(&mut null_bitmap, field_idx);
307            continue;
308        }
309
310        let json_value = &flattened[&field.name];
311        if json_value.is_null() {
312            values.push(SchemaValue::Null);
313            set_null_bit(&mut null_bitmap, field_idx);
314        } else {
315            values.push(json_to_schema_value(json_value, &field.field_type)?);
316        }
317    }
318
319    // Build header
320    let mut header = SchemaHeader::new(1, fields);
321    if has_nulls {
322        header.null_bitmap = Some(null_bitmap);
323        header.set_flag(FLAG_HAS_NULLS);
324    }
325
326    IntermediateRepresentation::new(header, values)
327}
328
329/// Collect field names in order from nested object
330fn collect_field_names_ordered(obj: &Map<String, Value>, prefix: &str, names: &mut Vec<String>) {
331    for (key, value) in obj {
332        let full_key = if prefix.is_empty() {
333            key.clone()
334        } else {
335            format!("{}{}{}", prefix, NEST_SEP, key)
336        };
337
338        match value {
339            Value::Object(nested) => {
340                collect_field_names_ordered(nested, &full_key, names);
341            }
342            Value::Array(arr) => {
343                // Mark this as an array
344                names.push(format!("{}[]", full_key));
345
346                // Collect indexed field names for array elements
347                for (idx, item) in arr.iter().enumerate() {
348                    let indexed_key = format!("{}{}{}", full_key, NEST_SEP, idx);
349                    collect_field_names_from_value(item, &indexed_key, names);
350                }
351            }
352            _ => {
353                names.push(full_key);
354            }
355        }
356    }
357}
358
359/// Helper to collect field names from any value type
360fn collect_field_names_from_value(value: &Value, prefix: &str, names: &mut Vec<String>) {
361    match value {
362        Value::Object(obj) => {
363            collect_field_names_ordered(obj, prefix, names);
364        }
365        Value::Array(arr) => {
366            // Mark this as an array
367            names.push(format!("{}[]", prefix));
368
369            for (idx, item) in arr.iter().enumerate() {
370                let indexed_key = format!("{}{}{}", prefix, NEST_SEP, idx);
371                collect_field_names_from_value(item, &indexed_key, names);
372            }
373        }
374        _ => {
375            names.push(prefix.to_string());
376        }
377    }
378}
379
380/// Flatten nested object with NEST_SEP delimiter
381/// Returns (flattened_map, array_paths) where array_paths tracks which keys are arrays
382fn flatten_object(obj: &Map<String, Value>, prefix: &str) -> HashMap<String, Value> {
383    let mut result = HashMap::new();
384
385    for (key, value) in obj {
386        let full_key = if prefix.is_empty() {
387            key.clone()
388        } else {
389            format!("{}{}{}", prefix, NEST_SEP, key)
390        };
391
392        match value {
393            Value::Object(nested) => {
394                result.extend(flatten_object(nested, &full_key));
395            }
396            Value::Array(arr) => {
397                // Mark this key as an array by inserting a marker
398                result.insert(format!("{}[]", full_key), Value::Null);
399
400                // Flatten array elements with indexed keys
401                for (idx, item) in arr.iter().enumerate() {
402                    let indexed_key = format!("{}{}{}", full_key, NEST_SEP, idx);
403                    match item {
404                        Value::Object(nested_obj) => {
405                            // Recursively flatten nested object
406                            result.extend(flatten_object(nested_obj, &indexed_key));
407                        }
408                        Value::Array(nested_arr) => {
409                            // Recursively handle nested arrays
410                            for (nested_idx, nested_item) in nested_arr.iter().enumerate() {
411                                let nested_indexed_key =
412                                    format!("{}{}{}", indexed_key, NEST_SEP, nested_idx);
413                                flatten_value(&nested_indexed_key, nested_item, &mut result);
414                            }
415                        }
416                        _ => {
417                            // Primitive values get direct insertion
418                            result.insert(indexed_key, item.clone());
419                        }
420                    }
421                }
422            }
423            _ => {
424                result.insert(full_key, value.clone());
425            }
426        }
427    }
428
429    result
430}
431
432/// Helper function to recursively flatten any value type
433fn flatten_value(key: &str, value: &Value, result: &mut HashMap<String, Value>) {
434    match value {
435        Value::Object(obj) => {
436            result.extend(flatten_object(obj, key));
437        }
438        Value::Array(arr) => {
439            // Mark this key as an array
440            result.insert(format!("{}[]", key), Value::Null);
441
442            for (idx, item) in arr.iter().enumerate() {
443                let indexed_key = format!("{}{}{}", key, NEST_SEP, idx);
444                flatten_value(&indexed_key, item, result);
445            }
446        }
447        _ => {
448            result.insert(key.to_string(), value.clone());
449        }
450    }
451}
452
453/// Infer type from a single JSON value
454fn infer_type(value: &Value) -> FieldType {
455    match value {
456        Value::Null => FieldType::Null,
457        Value::Bool(_) => FieldType::Bool,
458        Value::Number(n) => {
459            if n.is_f64() {
460                // Check if it has a fractional part
461                if let Some(f) = n.as_f64()
462                    && (f.fract() != 0.0 || f.is_infinite() || f.is_nan())
463                {
464                    return FieldType::F64;
465                }
466            }
467
468            if let Some(i) = n.as_i64() {
469                if i < 0 {
470                    FieldType::I64
471                } else {
472                    FieldType::U64
473                }
474            } else if n.as_u64().is_some() {
475                FieldType::U64
476            } else {
477                FieldType::F64
478            }
479        }
480        Value::String(_) => FieldType::String,
481        Value::Array(arr) => {
482            if arr.is_empty() {
483                FieldType::Array(Box::new(FieldType::Null))
484            } else {
485                // Infer from first non-null element
486                let element_type = arr
487                    .iter()
488                    .find(|v| !v.is_null())
489                    .map(infer_type)
490                    .unwrap_or(FieldType::Null);
491                FieldType::Array(Box::new(element_type))
492            }
493        }
494        Value::Object(_) => {
495            // This shouldn't happen after flattening
496            FieldType::String
497        }
498    }
499}
500
501/// Infer field type across multiple rows
502fn infer_field_type(
503    rows: &[HashMap<String, Value>],
504    field_name: &str,
505    has_nulls: &mut bool,
506) -> Result<FieldType, SchemaError> {
507    let mut inferred_type: Option<FieldType> = None;
508
509    for row in rows {
510        if let Some(value) = row.get(field_name) {
511            if value.is_null() {
512                *has_nulls = true;
513                continue;
514            }
515
516            let current_type = infer_type(value);
517
518            if let Some(ref existing_type) = inferred_type {
519                // Special case: Array(Null) unifies with Array(T) → Array(T)
520                if let (FieldType::Array(existing_inner), FieldType::Array(current_inner)) =
521                    (existing_type, &current_type)
522                {
523                    if **existing_inner == FieldType::Null && **current_inner != FieldType::Null {
524                        // Upgrade from Array(Null) to Array(T)
525                        inferred_type = Some(current_type.clone());
526                        continue;
527                    } else if **current_inner == FieldType::Null
528                        && **existing_inner != FieldType::Null
529                    {
530                        // Keep existing Array(T), ignore Array(Null)
531                        continue;
532                    }
533                }
534
535                if *existing_type != current_type {
536                    // Type conflict - use Any
537                    return Ok(FieldType::Any);
538                }
539            } else {
540                inferred_type = Some(current_type);
541            }
542        } else {
543            *has_nulls = true;
544        }
545    }
546
547    Ok(inferred_type.unwrap_or(FieldType::Null))
548}
549
550/// Convert JSON value to SchemaValue
551fn json_to_schema_value(
552    value: &Value,
553    expected_type: &FieldType,
554) -> Result<SchemaValue, SchemaError> {
555    match value {
556        Value::Null => Ok(SchemaValue::Null),
557        Value::Bool(b) => Ok(SchemaValue::Bool(*b)),
558        Value::Number(n) => match expected_type {
559            FieldType::U64 | FieldType::Any => {
560                if let Some(u) = n.as_u64() {
561                    Ok(SchemaValue::U64(u))
562                } else if let Some(i) = n.as_i64() {
563                    Ok(SchemaValue::I64(i))
564                } else {
565                    Ok(SchemaValue::F64(n.as_f64().unwrap()))
566                }
567            }
568            FieldType::I64 => {
569                if let Some(i) = n.as_i64() {
570                    Ok(SchemaValue::I64(i))
571                } else {
572                    Ok(SchemaValue::I64(n.as_f64().unwrap() as i64))
573                }
574            }
575            FieldType::F64 => Ok(SchemaValue::F64(n.as_f64().unwrap())),
576            _ => Err(SchemaError::InvalidInput(format!(
577                "Type mismatch: expected {}, but found number.\n\
578                 The field type was inferred or specified as {}, which doesn't accept numeric values.",
579                expected_type.display_name(),
580                expected_type.display_name()
581            ))),
582        },
583        Value::String(s) => Ok(SchemaValue::String(s.clone())),
584        Value::Array(arr) => {
585            let element_type = if let FieldType::Array(et) = expected_type {
586                et.as_ref()
587            } else {
588                return Err(SchemaError::InvalidInput(format!(
589                    "Internal error: Expected array type but found {}. This is a bug in type inference.",
590                    expected_type.display_name()
591                )));
592            };
593
594            let mut schema_values = Vec::new();
595            for item in arr {
596                schema_values.push(json_to_schema_value(item, element_type)?);
597            }
598            Ok(SchemaValue::Array(schema_values))
599        }
600        Value::Object(_) => Err(SchemaError::InvalidInput(
601            "Internal error: Encountered nested object that wasn't flattened. This is a bug in the JSON parser."
602                .to_string(),
603        )),
604    }
605}
606
607/// Set a bit in the null bitmap
608fn set_null_bit(bitmap: &mut [u8], index: usize) {
609    let byte_idx = index / 8;
610    let bit_idx = index % 8;
611    bitmap[byte_idx] |= 1 << bit_idx;
612}
613
614#[cfg(test)]
615mod tests {
616    use super::*;
617
618    #[test]
619    fn test_simple_object() {
620        let input = r#"{"id":1,"name":"alice"}"#;
621        let ir = JsonParser::parse(input).unwrap();
622
623        assert_eq!(ir.header.row_count, 1);
624        assert_eq!(ir.header.fields.len(), 2);
625        assert_eq!(ir.values.len(), 2);
626    }
627
628    #[test]
629    fn test_array_of_objects() {
630        let input = r#"[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]"#;
631        let ir = JsonParser::parse(input).unwrap();
632
633        assert_eq!(ir.header.row_count, 2);
634        assert_eq!(ir.header.fields.len(), 2);
635        assert_eq!(ir.values.len(), 4);
636    }
637
638    #[test]
639    fn test_nested_object() {
640        let input = r#"{"user":{"profile":{"name":"alice"}}}"#;
641        let ir = JsonParser::parse(input).unwrap();
642
643        assert_eq!(ir.header.row_count, 1);
644        assert_eq!(ir.header.fields.len(), 1);
645        assert_eq!(ir.header.fields[0].name, "user჻profile჻name");
646    }
647
648    #[test]
649    fn test_root_key() {
650        let input = r#"{"users":[{"id":1}]}"#;
651        let ir = JsonParser::parse(input).unwrap();
652
653        assert_eq!(ir.header.root_key, Some("users".to_string()));
654        assert!(ir.header.has_flag(FLAG_HAS_ROOT_KEY));
655    }
656
657    #[test]
658    fn test_all_types() {
659        let input = r#"{"u":1,"i":-1,"f":3.14,"s":"test","b":true,"n":null}"#;
660        let ir = JsonParser::parse(input).unwrap();
661
662        assert_eq!(ir.header.fields.len(), 6);
663        assert!(ir.header.has_flag(FLAG_HAS_NULLS));
664    }
665
666    #[test]
667    fn test_null_handling() {
668        let input = r#"{"name":"alice","age":null}"#;
669        let ir = JsonParser::parse(input).unwrap();
670
671        assert!(ir.header.has_flag(FLAG_HAS_NULLS));
672
673        // Find which field is "age"
674        let age_idx = ir
675            .header
676            .fields
677            .iter()
678            .position(|f| f.name == "age")
679            .unwrap();
680        assert!(ir.is_null(0, age_idx)); // age field is null
681    }
682
683    #[test]
684    fn test_homogeneous_array() {
685        // Arrays now flatten to indexed fields plus array marker
686        let input = r#"{"scores":[1,2,3]}"#;
687        let ir = JsonParser::parse(input).unwrap();
688
689        // Should have 4 fields: scores.0, scores.1, scores.2, scores[]
690        assert_eq!(ir.header.fields.len(), 4);
691        assert_eq!(ir.header.fields[0].name, "scores჻0");
692        assert_eq!(ir.header.fields[0].field_type, FieldType::U64);
693        assert_eq!(ir.header.fields[1].name, "scores჻1");
694        assert_eq!(ir.header.fields[2].name, "scores჻2");
695        assert_eq!(ir.header.fields[3].name, "scores[]");
696    }
697
698    #[test]
699    fn test_empty_array() {
700        // Empty arrays flatten to just the array marker
701        let input = r#"{"items":[]}"#;
702        let ir = JsonParser::parse(input).unwrap();
703
704        // Empty array produces just the marker field
705        assert_eq!(ir.header.fields.len(), 1);
706        assert_eq!(ir.header.fields[0].name, "items[]");
707    }
708
709    #[test]
710    fn test_deep_nesting() {
711        let input = r#"{"a":{"b":{"c":{"d":1}}}}"#;
712        let ir = JsonParser::parse(input).unwrap();
713
714        assert_eq!(ir.header.fields[0].name, "a჻b჻c჻d");
715    }
716
717    #[test]
718    fn test_flatten_object() {
719        let obj: Map<String, Value> = serde_json::from_str(r#"{"a":{"b":1}}"#).unwrap();
720        let flattened = flatten_object(&obj, "");
721
722        assert_eq!(flattened.len(), 1);
723        assert!(flattened.contains_key("a჻b"));
724    }
725
726    #[test]
727    fn test_single_level_nesting() {
728        let input = r#"{"id":"A1","name":"Jim","grade":{"math":60,"physics":66,"chemistry":61}}"#;
729        let ir = JsonParser::parse(input).unwrap();
730
731        assert_eq!(ir.header.row_count, 1);
732        assert_eq!(ir.header.fields.len(), 5);
733
734        // Check field names
735        let field_names: Vec<String> = ir.header.fields.iter().map(|f| f.name.clone()).collect();
736        assert!(field_names.contains(&"id".to_string()));
737        assert!(field_names.contains(&"name".to_string()));
738        assert!(field_names.contains(&"grade჻math".to_string()));
739        assert!(field_names.contains(&"grade჻physics".to_string()));
740        assert!(field_names.contains(&"grade჻chemistry".to_string()));
741    }
742
743    #[test]
744    fn test_array_of_nested_objects() {
745        let input = r#"{"students":[{"id":"A1","name":"Jim","grade":{"math":60,"physics":66}}]}"#;
746        let ir = JsonParser::parse(input).unwrap();
747
748        assert_eq!(ir.header.row_count, 1);
749        assert_eq!(ir.header.root_key, Some("students".to_string()));
750
751        let field_names: Vec<String> = ir.header.fields.iter().map(|f| f.name.clone()).collect();
752        assert!(field_names.contains(&"id".to_string()));
753        assert!(field_names.contains(&"name".to_string()));
754        assert!(field_names.contains(&"grade჻math".to_string()));
755        assert!(field_names.contains(&"grade჻physics".to_string()));
756    }
757
758    #[test]
759    fn test_multiple_nested_levels() {
760        let input = r#"{"data":{"user":{"profile":{"address":{"city":"Boston"}}}}}"#;
761        let ir = JsonParser::parse(input).unwrap();
762
763        assert_eq!(ir.header.fields.len(), 1);
764        assert_eq!(ir.header.fields[0].name, "data჻user჻profile჻address჻city");
765    }
766
767    #[test]
768    fn test_mixed_arrays_and_objects() {
769        // Arrays now flatten to indexed fields
770        let input =
771            r#"{"person":{"name":"Alice","tags":["admin","user"],"address":{"city":"NYC"}}}"#;
772        let ir = JsonParser::parse(input).unwrap();
773
774        let field_names: Vec<String> = ir.header.fields.iter().map(|f| f.name.clone()).collect();
775        assert!(field_names.contains(&"person჻name".to_string()));
776        // tags array flattens to indexed fields
777        assert!(field_names.contains(&"person჻tags჻0".to_string()));
778        assert!(field_names.contains(&"person჻tags჻1".to_string()));
779        assert!(field_names.contains(&"person჻address჻city".to_string()));
780
781        // Verify tags.0 is a string type (no longer Array)
782        let tags_field = ir
783            .header
784            .fields
785            .iter()
786            .find(|f| f.name == "person჻tags჻0")
787            .unwrap();
788        assert_eq!(tags_field.field_type, FieldType::String);
789    }
790
791    #[test]
792    fn test_metadata_pattern() {
793        let input = r#"{"school_name": "Springfield High", "class": "Year 1", "students": [{"id": "A1"}, {"id": "B2"}]}"#;
794        let ir = JsonParser::parse(input).unwrap();
795
796        // Should extract metadata
797        assert!(ir.header.metadata.is_some());
798        let metadata = ir.header.metadata.as_ref().unwrap();
799        assert_eq!(
800            metadata.get("school_name"),
801            Some(&"Springfield High".to_string())
802        );
803        assert_eq!(metadata.get("class"), Some(&"Year 1".to_string()));
804
805        // Array becomes the data rows
806        assert_eq!(ir.header.root_key, Some("students".to_string()));
807        assert_eq!(ir.header.row_count, 2);
808        assert_eq!(ir.header.fields.len(), 1);
809        assert_eq!(ir.header.fields[0].name, "id");
810    }
811
812    #[test]
813    fn test_metadata_with_null() {
814        let input = r#"{"note": null, "total": 2, "users": [{"id": 1}, {"id": 2}]}"#;
815        let ir = JsonParser::parse(input).unwrap();
816
817        // Should extract metadata including null
818        assert!(ir.header.metadata.is_some());
819        let metadata = ir.header.metadata.as_ref().unwrap();
820        assert_eq!(metadata.get("note"), Some(&"∅".to_string()));
821        assert_eq!(metadata.get("total"), Some(&"2".to_string()));
822
823        // Array data
824        assert_eq!(ir.header.root_key, Some("users".to_string()));
825        assert_eq!(ir.header.row_count, 2);
826        assert_eq!(ir.header.fields.len(), 1);
827        assert_eq!(ir.header.fields[0].name, "id");
828    }
829}