base_d/encoders/algorithms/schema/parsers/
json.rs

1use crate::encoders::algorithms::schema::parsers::InputParser;
2use crate::encoders::algorithms::schema::stele::NEST_SEP;
3use crate::encoders::algorithms::schema::types::*;
4use serde_json::{Map, Value};
5use std::collections::HashMap;
6
7pub struct JsonParser;
8
9impl InputParser for JsonParser {
10    type Error = SchemaError;
11
12    fn parse(input: &str) -> Result<IntermediateRepresentation, Self::Error> {
13        let parsed: Value = serde_json::from_str(input).map_err(|e| {
14            SchemaError::InvalidInput(format!(
15                "Invalid JSON syntax: {}\n\
16                 Ensure the input is valid JSON.",
17                e
18            ))
19        })?;
20
21        match parsed {
22            Value::Array(arr) => parse_array(arr),
23            Value::Object(obj) => parse_object(obj),
24            _ => Err(SchemaError::InvalidInput(
25                "Expected JSON object or array at root level.\n\
26                 Schema encoding works with:\n\
27                 - Single object: {\"name\": \"value\"}\n\
28                 - Array of objects: [{\"id\": 1}, {\"id\": 2}]\n\
29                 - Object with array: {\"users\": [{\"id\": 1}]}"
30                    .to_string(),
31            )),
32        }
33    }
34}
35
36/// Parse array of objects (tabular data)
37fn parse_array(arr: Vec<Value>) -> Result<IntermediateRepresentation, SchemaError> {
38    if arr.is_empty() {
39        return Err(SchemaError::InvalidInput(
40            "Empty array - cannot infer schema from zero rows.\n\
41             Provide at least one object in the array."
42                .to_string(),
43        ));
44    }
45
46    let row_count = arr.len();
47    let mut all_rows: Vec<Map<String, Value>> = Vec::new();
48
49    // Extract objects from array
50    for (idx, item) in arr.into_iter().enumerate() {
51        match item {
52            Value::Object(obj) => all_rows.push(obj),
53            other => {
54                let type_name = match other {
55                    Value::Null => "null",
56                    Value::Bool(_) => "boolean",
57                    Value::Number(_) => "number",
58                    Value::String(_) => "string",
59                    Value::Array(_) => "array",
60                    Value::Object(_) => unreachable!(),
61                };
62                return Err(SchemaError::InvalidInput(format!(
63                    "Array must contain only objects (tabular data). Found {} at index {}.\n\
64                     Schema encoding expects arrays of objects like: [{{\"id\": 1}}, {{\"id\": 2}}]",
65                    type_name, idx
66                )));
67            }
68        }
69    }
70
71    // Flatten all objects and collect field names
72    let mut flattened_rows: Vec<HashMap<String, Value>> = Vec::new();
73    let mut all_field_names = std::collections::BTreeSet::new();
74    let mut array_markers = std::collections::BTreeSet::new();
75
76    for obj in &all_rows {
77        let flattened = flatten_object(obj, "");
78        for key in flattened.keys() {
79            if key.ends_with("⟦⟧") {
80                // This is an array marker, track it separately
81                array_markers.insert(key.clone());
82            } else {
83                all_field_names.insert(key.clone());
84            }
85        }
86        flattened_rows.push(flattened);
87    }
88
89    // Add array markers as fields with special marker type
90    let mut field_names: Vec<String> = all_field_names.into_iter().collect();
91    let array_marker_names: Vec<String> = array_markers.into_iter().collect();
92    field_names.extend(array_marker_names);
93
94    // Infer types and build fields
95    let mut fields = Vec::new();
96    let mut has_nulls = false;
97
98    for field_name in &field_names {
99        if field_name.ends_with("⟦⟧") {
100            // Array marker - use a special type to indicate this is metadata
101            fields.push(FieldDef::new(field_name.clone(), FieldType::Null));
102        } else {
103            let field_type = infer_field_type(&flattened_rows, field_name, &mut has_nulls)?;
104            fields.push(FieldDef::new(field_name.clone(), field_type));
105        }
106    }
107
108    // Build values and null bitmap
109    let mut values = Vec::new();
110    let total_values = row_count * fields.len();
111    let bitmap_bytes = total_values.div_ceil(8);
112    let mut null_bitmap = vec![0u8; bitmap_bytes];
113
114    for (row_idx, row) in flattened_rows.iter().enumerate() {
115        for (field_idx, field) in fields.iter().enumerate() {
116            let value_idx = row_idx * fields.len() + field_idx;
117
118            // Handle array markers - always null
119            if field.name.ends_with("⟦⟧") {
120                values.push(SchemaValue::Null);
121                set_null_bit(&mut null_bitmap, value_idx);
122                has_nulls = true;
123                continue;
124            }
125
126            if let Some(json_value) = row.get(&field.name)
127                && json_value.is_null()
128            {
129                values.push(SchemaValue::Null);
130                set_null_bit(&mut null_bitmap, value_idx);
131                has_nulls = true;
132            } else if let Some(json_value) = row.get(&field.name) {
133                values.push(json_to_schema_value(json_value, &field.field_type)?);
134            } else {
135                // Missing field = null
136                values.push(SchemaValue::Null);
137                set_null_bit(&mut null_bitmap, value_idx);
138                has_nulls = true;
139            }
140        }
141    }
142
143    // Build header
144    let mut header = SchemaHeader::new(row_count, fields);
145    if has_nulls {
146        header.null_bitmap = Some(null_bitmap);
147        header.set_flag(FLAG_HAS_NULLS);
148    }
149
150    IntermediateRepresentation::new(header, values)
151}
152
153/// Parse single object (may have root key)
154fn parse_object(obj: Map<String, Value>) -> Result<IntermediateRepresentation, SchemaError> {
155    // Check for common pagination wrapper keys
156    const WRAPPER_KEYS: &[&str] = &["results", "data", "items", "records"];
157
158    // Check for metadata pattern: scalar fields + one array field
159    let mut array_field: Option<(String, Vec<Value>)> = None;
160    let mut scalar_fields: std::collections::HashMap<String, String> =
161        std::collections::HashMap::new();
162
163    for (key, value) in &obj {
164        match value {
165            // Primitive arrays are NOT metadata - they become inline fields with ◈
166            // Skip them here so they get handled by flatten_object()
167            Value::Array(arr) if is_primitive_array(arr) => {
168                // Don't treat as metadata - will be handled as inline array field
169            }
170            Value::Array(arr)
171                if !arr.is_empty() && arr.iter().all(|item| matches!(item, Value::Object(_))) =>
172            {
173                if array_field.is_none() {
174                    array_field = Some((key.clone(), arr.clone()));
175                } else {
176                    // Multiple object arrays - not metadata pattern
177                    array_field = None;
178                    scalar_fields.clear();
179                    break;
180                }
181            }
182            Value::String(s) => {
183                scalar_fields.insert(key.clone(), s.clone());
184            }
185            Value::Number(n) => {
186                scalar_fields.insert(key.clone(), n.to_string());
187            }
188            Value::Bool(b) => {
189                scalar_fields.insert(key.clone(), b.to_string());
190            }
191            Value::Null => {
192                // Encode null metadata as ∅ symbol
193                scalar_fields.insert(key.clone(), "∅".to_string());
194            }
195            _ => {
196                // Non-scalar or nested object - not metadata pattern
197                scalar_fields.clear();
198                array_field = None;
199                break;
200            }
201        }
202    }
203
204    // If we have exactly one array field and at least one scalar field, extract metadata
205    if let Some((array_key, arr)) = array_field
206        && !scalar_fields.is_empty()
207    {
208        let mut ir = parse_array(arr)?;
209        ir.header.root_key = Some(array_key);
210        ir.header.set_flag(FLAG_HAS_ROOT_KEY);
211        ir.header.metadata = Some(scalar_fields);
212        return Ok(ir);
213    }
214
215    // Check if this is a wrapper object with one of the known keys
216    if obj.len() == 1 {
217        // Check if value is an array of objects before consuming
218        let is_root_key_pattern = obj
219            .values()
220            .next()
221            .map(|v| {
222                if let Value::Array(arr) = v {
223                    // Only treat as root key if array contains objects (tabular data)
224                    !arr.is_empty() && arr.iter().all(|item| matches!(item, Value::Object(_)))
225                } else {
226                    false
227                }
228            })
229            .unwrap_or(false);
230
231        if is_root_key_pattern {
232            // Extract key and value by consuming the map
233            let (key, value) = obj.into_iter().next().unwrap();
234            // We already checked it's an array
235            let arr = match value {
236                Value::Array(a) => a,
237                _ => unreachable!(),
238            };
239
240            // Parse as array with root key
241            let mut ir = parse_array(arr)?;
242            ir.header.root_key = Some(key);
243            ir.header.set_flag(FLAG_HAS_ROOT_KEY);
244            return Ok(ir);
245        }
246    }
247
248    // Check for known wrapper patterns and unwrap them
249    // Only unwrap if the wrapper key is the ONLY field (or with scalar metadata)
250    // If there are other arrays (primitive or object), don't unwrap
251    let has_other_arrays = obj
252        .iter()
253        .any(|(k, v)| matches!(v, Value::Array(_)) && !WRAPPER_KEYS.contains(&k.as_str()));
254
255    if !has_other_arrays {
256        for wrapper_key in WRAPPER_KEYS {
257            if let Some(Value::Array(arr)) = obj.get(*wrapper_key)
258                && !arr.is_empty()
259                && arr.iter().all(|item| matches!(item, Value::Object(_)))
260            {
261                // Found a wrapper key - unwrap and parse the array
262                let arr = arr.clone();
263                let mut ir = parse_array(arr)?;
264                ir.header.root_key = Some((*wrapper_key).to_string());
265                ir.header.set_flag(FLAG_HAS_ROOT_KEY);
266                return Ok(ir);
267            }
268        }
269    }
270
271    // Single object - treat as single row
272    let flattened = flatten_object(&obj, "");
273    // Preserve field order from original object (serde_json preserves insertion order)
274    let mut field_names = Vec::new();
275    let mut array_markers = Vec::new();
276    collect_field_names_ordered(&obj, "", &mut field_names);
277
278    // Separate array markers from regular fields
279    let mut regular_fields = Vec::new();
280    for name in field_names {
281        if name.ends_with("⟦⟧") {
282            array_markers.push(name);
283        } else {
284            regular_fields.push(name);
285        }
286    }
287    // Add array markers at the end
288    regular_fields.extend(array_markers);
289    let field_names = regular_fields;
290
291    let mut fields = Vec::new();
292    let mut has_nulls = false;
293
294    for field_name in &field_names {
295        if field_name.ends_with("⟦⟧") {
296            // Array marker
297            fields.push(FieldDef::new(field_name.clone(), FieldType::Null));
298            has_nulls = true;
299        } else if let Some(value) = flattened.get(field_name) {
300            let field_type = infer_type(value);
301            if value.is_null() {
302                has_nulls = true;
303            }
304            fields.push(FieldDef::new(field_name.clone(), field_type));
305        }
306        // Skip fields that don't exist in flattened (shouldn't happen but defensive)
307    }
308
309    // Build values and null bitmap
310    let mut values = Vec::new();
311    let total_values = fields.len();
312    let bitmap_bytes = total_values.div_ceil(8);
313    let mut null_bitmap = vec![0u8; bitmap_bytes];
314
315    for (field_idx, field) in fields.iter().enumerate() {
316        // Handle array markers
317        if field.name.ends_with("⟦⟧") {
318            values.push(SchemaValue::Null);
319            set_null_bit(&mut null_bitmap, field_idx);
320            continue;
321        }
322
323        let json_value = &flattened[&field.name];
324        if json_value.is_null() {
325            values.push(SchemaValue::Null);
326            set_null_bit(&mut null_bitmap, field_idx);
327        } else {
328            values.push(json_to_schema_value(json_value, &field.field_type)?);
329        }
330    }
331
332    // Build header
333    let mut header = SchemaHeader::new(1, fields);
334    if has_nulls {
335        header.null_bitmap = Some(null_bitmap);
336        header.set_flag(FLAG_HAS_NULLS);
337    }
338
339    IntermediateRepresentation::new(header, values)
340}
341
342/// Collect field names in order from nested object
343fn collect_field_names_ordered(obj: &Map<String, Value>, prefix: &str, names: &mut Vec<String>) {
344    for (key, value) in obj {
345        let full_key = if prefix.is_empty() {
346            key.clone()
347        } else {
348            format!("{}{}{}", prefix, NEST_SEP, key)
349        };
350
351        match value {
352            Value::Object(nested) => {
353                collect_field_names_ordered(nested, &full_key, names);
354            }
355            Value::Array(arr) => {
356                // Check if this is a primitive array
357                if is_primitive_array(arr) {
358                    // Inline primitive array: single field name (no marker suffix)
359                    names.push(full_key);
360                } else {
361                    // Arrays of objects: use marker + indexed paths
362                    // Mark this as an array
363                    names.push(format!("{}⟦⟧", full_key));
364
365                    // Collect indexed field names for array elements
366                    for (idx, item) in arr.iter().enumerate() {
367                        let indexed_key = format!("{}{}{}", full_key, NEST_SEP, idx);
368                        collect_field_names_from_value(item, &indexed_key, names);
369                    }
370                }
371            }
372            _ => {
373                names.push(full_key);
374            }
375        }
376    }
377}
378
379/// Helper to collect field names from any value type
380fn collect_field_names_from_value(value: &Value, prefix: &str, names: &mut Vec<String>) {
381    match value {
382        Value::Object(obj) => {
383            collect_field_names_ordered(obj, prefix, names);
384        }
385        Value::Array(arr) => {
386            // Check if this is a primitive array
387            if is_primitive_array(arr) {
388                // Inline primitive array: single field name
389                names.push(prefix.to_string());
390            } else {
391                // Arrays of objects: use marker + indexed paths
392                // Mark this as an array
393                names.push(format!("{}⟦⟧", prefix));
394
395                for (idx, item) in arr.iter().enumerate() {
396                    let indexed_key = format!("{}{}{}", prefix, NEST_SEP, idx);
397                    collect_field_names_from_value(item, &indexed_key, names);
398                }
399            }
400        }
401        _ => {
402            names.push(prefix.to_string());
403        }
404    }
405}
406
407/// Check if array contains only primitive values (not objects/arrays)
408fn is_primitive_array(arr: &[Value]) -> bool {
409    arr.iter().all(|v| {
410        matches!(
411            v,
412            Value::String(_) | Value::Number(_) | Value::Bool(_) | Value::Null
413        )
414    })
415}
416
417/// Flatten nested object with NEST_SEP delimiter
418/// Returns (flattened_map, array_paths) where array_paths tracks which keys are arrays
419fn flatten_object(obj: &Map<String, Value>, prefix: &str) -> HashMap<String, Value> {
420    let mut result = HashMap::new();
421
422    for (key, value) in obj {
423        let full_key = if prefix.is_empty() {
424            key.clone()
425        } else {
426            format!("{}{}{}", prefix, NEST_SEP, key)
427        };
428
429        match value {
430            Value::Object(nested) => {
431                result.extend(flatten_object(nested, &full_key));
432            }
433            Value::Array(arr) => {
434                // Check if this is a primitive array
435                if is_primitive_array(arr) {
436                    // Inline primitive array: store as single field with type⟦⟧
437                    // Store as Value::Array to preserve array type even when empty
438                    result.insert(full_key, Value::Array(arr.clone()));
439                } else {
440                    // Arrays of objects/arrays: use indexed paths (current behavior)
441                    // Mark this key as an array by inserting a marker
442                    result.insert(format!("{}⟦⟧", full_key), Value::Null);
443
444                    // Flatten array elements with indexed keys
445                    for (idx, item) in arr.iter().enumerate() {
446                        let indexed_key = format!("{}{}{}", full_key, NEST_SEP, idx);
447                        match item {
448                            Value::Object(nested_obj) => {
449                                // Recursively flatten nested object
450                                result.extend(flatten_object(nested_obj, &indexed_key));
451                            }
452                            Value::Array(nested_arr) => {
453                                // Check if nested array is primitive
454                                if is_primitive_array(nested_arr) {
455                                    // Store primitive array directly at indexed position
456                                    result.insert(indexed_key, Value::Array(nested_arr.clone()));
457                                } else {
458                                    // Mark this indexed element as an array
459                                    result.insert(format!("{}⟦⟧", indexed_key), Value::Null);
460                                    // Recursively handle nested arrays
461                                    for (nested_idx, nested_item) in nested_arr.iter().enumerate() {
462                                        let nested_indexed_key =
463                                            format!("{}{}{}", indexed_key, NEST_SEP, nested_idx);
464                                        flatten_value(
465                                            &nested_indexed_key,
466                                            nested_item,
467                                            &mut result,
468                                        );
469                                    }
470                                }
471                            }
472                            _ => {
473                                // Primitive values get direct insertion
474                                result.insert(indexed_key, item.clone());
475                            }
476                        }
477                    }
478                }
479            }
480            _ => {
481                result.insert(full_key, value.clone());
482            }
483        }
484    }
485
486    result
487}
488
489/// Helper function to recursively flatten any value type
490fn flatten_value(key: &str, value: &Value, result: &mut HashMap<String, Value>) {
491    match value {
492        Value::Object(obj) => {
493            result.extend(flatten_object(obj, key));
494        }
495        Value::Array(arr) => {
496            // Check if this is a primitive array
497            if is_primitive_array(arr) {
498                // Inline primitive array - store as Value::Array to preserve type
499                result.insert(key.to_string(), Value::Array(arr.clone()));
500            } else {
501                // Arrays of objects/arrays: use indexed paths
502                // Mark this key as an array
503                result.insert(format!("{}⟦⟧", key), Value::Null);
504
505                for (idx, item) in arr.iter().enumerate() {
506                    let indexed_key = format!("{}{}{}", key, NEST_SEP, idx);
507                    flatten_value(&indexed_key, item, result);
508                }
509            }
510        }
511        _ => {
512            result.insert(key.to_string(), value.clone());
513        }
514    }
515}
516
517/// Infer type from a single JSON value
518fn infer_type(value: &Value) -> FieldType {
519    match value {
520        Value::Null => FieldType::Null,
521        Value::Bool(_) => FieldType::Bool,
522        Value::Number(n) => {
523            if n.is_f64() {
524                // Check if it has a fractional part
525                if let Some(f) = n.as_f64()
526                    && (f.fract() != 0.0 || f.is_infinite() || f.is_nan())
527                {
528                    return FieldType::F64;
529                }
530            }
531
532            if let Some(i) = n.as_i64() {
533                if i < 0 {
534                    FieldType::I64
535                } else {
536                    FieldType::U64
537                }
538            } else if n.as_u64().is_some() {
539                FieldType::U64
540            } else {
541                FieldType::F64
542            }
543        }
544        Value::String(_) => FieldType::String,
545        Value::Array(arr) => {
546            if arr.is_empty() {
547                FieldType::Array(Box::new(FieldType::Null))
548            } else {
549                // Infer from first non-null element
550                let element_type = arr
551                    .iter()
552                    .find(|v| !v.is_null())
553                    .map(infer_type)
554                    .unwrap_or(FieldType::Null);
555                FieldType::Array(Box::new(element_type))
556            }
557        }
558        Value::Object(_) => {
559            // This shouldn't happen after flattening
560            FieldType::String
561        }
562    }
563}
564
565/// Infer field type across multiple rows
566fn infer_field_type(
567    rows: &[HashMap<String, Value>],
568    field_name: &str,
569    has_nulls: &mut bool,
570) -> Result<FieldType, SchemaError> {
571    let mut inferred_type: Option<FieldType> = None;
572
573    for row in rows {
574        if let Some(value) = row.get(field_name) {
575            if value.is_null() {
576                *has_nulls = true;
577                continue;
578            }
579
580            let current_type = infer_type(value);
581
582            if let Some(ref existing_type) = inferred_type {
583                // Special case: Array(Null) unifies with Array(T) → Array(T)
584                if let (FieldType::Array(existing_inner), FieldType::Array(current_inner)) =
585                    (existing_type, &current_type)
586                {
587                    if **existing_inner == FieldType::Null && **current_inner != FieldType::Null {
588                        // Upgrade from Array(Null) to Array(T)
589                        inferred_type = Some(current_type.clone());
590                        continue;
591                    } else if **current_inner == FieldType::Null
592                        && **existing_inner != FieldType::Null
593                    {
594                        // Keep existing Array(T), ignore Array(Null)
595                        continue;
596                    }
597                }
598
599                if *existing_type != current_type {
600                    // Type conflict - use Any
601                    return Ok(FieldType::Any);
602                }
603            } else {
604                inferred_type = Some(current_type);
605            }
606        } else {
607            *has_nulls = true;
608        }
609    }
610
611    Ok(inferred_type.unwrap_or(FieldType::Null))
612}
613
614/// Convert JSON value to SchemaValue
615fn json_to_schema_value(
616    value: &Value,
617    expected_type: &FieldType,
618) -> Result<SchemaValue, SchemaError> {
619    match value {
620        Value::Null => Ok(SchemaValue::Null),
621        Value::Bool(b) => Ok(SchemaValue::Bool(*b)),
622        Value::Number(n) => match expected_type {
623            FieldType::U64 | FieldType::Any => {
624                if let Some(u) = n.as_u64() {
625                    Ok(SchemaValue::U64(u))
626                } else if let Some(i) = n.as_i64() {
627                    Ok(SchemaValue::I64(i))
628                } else {
629                    Ok(SchemaValue::F64(n.as_f64().unwrap()))
630                }
631            }
632            FieldType::I64 => {
633                if let Some(i) = n.as_i64() {
634                    Ok(SchemaValue::I64(i))
635                } else {
636                    Ok(SchemaValue::I64(n.as_f64().unwrap() as i64))
637                }
638            }
639            FieldType::F64 => Ok(SchemaValue::F64(n.as_f64().unwrap())),
640            _ => Err(SchemaError::InvalidInput(format!(
641                "Type mismatch: expected {}, but found number.\n\
642                 The field type was inferred or specified as {}, which doesn't accept numeric values.",
643                expected_type.display_name(),
644                expected_type.display_name()
645            ))),
646        },
647        Value::String(s) => Ok(SchemaValue::String(s.clone())),
648        Value::Array(arr) => {
649            let element_type = if let FieldType::Array(et) = expected_type {
650                et.as_ref()
651            } else {
652                return Err(SchemaError::InvalidInput(format!(
653                    "Internal error: Expected array type but found {}. This is a bug in type inference.",
654                    expected_type.display_name()
655                )));
656            };
657
658            let mut schema_values = Vec::new();
659            for item in arr {
660                schema_values.push(json_to_schema_value(item, element_type)?);
661            }
662            Ok(SchemaValue::Array(schema_values))
663        }
664        Value::Object(_) => Err(SchemaError::InvalidInput(
665            "Internal error: Encountered nested object that wasn't flattened. This is a bug in the JSON parser."
666                .to_string(),
667        )),
668    }
669}
670
671/// Set a bit in the null bitmap
672fn set_null_bit(bitmap: &mut [u8], index: usize) {
673    let byte_idx = index / 8;
674    let bit_idx = index % 8;
675    bitmap[byte_idx] |= 1 << bit_idx;
676}
677
678#[cfg(test)]
679mod tests {
680    use super::*;
681
682    #[test]
683    fn test_simple_object() {
684        let input = r#"{"id":1,"name":"alice"}"#;
685        let ir = JsonParser::parse(input).unwrap();
686
687        assert_eq!(ir.header.row_count, 1);
688        assert_eq!(ir.header.fields.len(), 2);
689        assert_eq!(ir.values.len(), 2);
690    }
691
692    #[test]
693    fn test_array_of_objects() {
694        let input = r#"[{"id":1,"name":"alice"},{"id":2,"name":"bob"}]"#;
695        let ir = JsonParser::parse(input).unwrap();
696
697        assert_eq!(ir.header.row_count, 2);
698        assert_eq!(ir.header.fields.len(), 2);
699        assert_eq!(ir.values.len(), 4);
700    }
701
702    #[test]
703    fn test_nested_object() {
704        let input = r#"{"user":{"profile":{"name":"alice"}}}"#;
705        let ir = JsonParser::parse(input).unwrap();
706
707        assert_eq!(ir.header.row_count, 1);
708        assert_eq!(ir.header.fields.len(), 1);
709        assert_eq!(ir.header.fields[0].name, "user჻profile჻name");
710    }
711
712    #[test]
713    fn test_root_key() {
714        let input = r#"{"users":[{"id":1}]}"#;
715        let ir = JsonParser::parse(input).unwrap();
716
717        assert_eq!(ir.header.root_key, Some("users".to_string()));
718        assert!(ir.header.has_flag(FLAG_HAS_ROOT_KEY));
719    }
720
721    #[test]
722    fn test_all_types() {
723        let input = r#"{"u":1,"i":-1,"f":3.14,"s":"test","b":true,"n":null}"#;
724        let ir = JsonParser::parse(input).unwrap();
725
726        assert_eq!(ir.header.fields.len(), 6);
727        assert!(ir.header.has_flag(FLAG_HAS_NULLS));
728    }
729
730    #[test]
731    fn test_null_handling() {
732        let input = r#"{"name":"alice","age":null}"#;
733        let ir = JsonParser::parse(input).unwrap();
734
735        assert!(ir.header.has_flag(FLAG_HAS_NULLS));
736
737        // Find which field is "age"
738        let age_idx = ir
739            .header
740            .fields
741            .iter()
742            .position(|f| f.name == "age")
743            .unwrap();
744        assert!(ir.is_null(0, age_idx)); // age field is null
745    }
746
747    #[test]
748    fn test_homogeneous_array() {
749        // Primitive arrays now stored inline with single field
750        let input = r#"{"scores":[1,2,3]}"#;
751        let ir = JsonParser::parse(input).unwrap();
752
753        // Should have 1 field: scores with Array type
754        assert_eq!(ir.header.fields.len(), 1);
755        assert_eq!(ir.header.fields[0].name, "scores");
756        assert!(matches!(
757            ir.header.fields[0].field_type,
758            FieldType::Array(_)
759        ));
760
761        // Verify the array values
762        if let Some(SchemaValue::Array(arr)) = ir.get_value(0, 0) {
763            assert_eq!(arr.len(), 3);
764        } else {
765            panic!("Expected array value");
766        }
767    }
768
769    #[test]
770    fn test_empty_array() {
771        // Empty primitive arrays stored inline as empty SchemaValue::Array
772        let input = r#"{"items":[]}"#;
773        let ir = JsonParser::parse(input).unwrap();
774
775        // Empty array produces single field with Array type
776        assert_eq!(ir.header.fields.len(), 1);
777        assert_eq!(ir.header.fields[0].name, "items");
778        assert!(matches!(
779            ir.header.fields[0].field_type,
780            FieldType::Array(_)
781        ));
782
783        // Value should be empty array
784        if let Some(SchemaValue::Array(arr)) = ir.get_value(0, 0) {
785            assert_eq!(arr.len(), 0);
786        } else {
787            panic!("Expected empty array");
788        }
789    }
790
791    #[test]
792    fn test_deep_nesting() {
793        let input = r#"{"a":{"b":{"c":{"d":1}}}}"#;
794        let ir = JsonParser::parse(input).unwrap();
795
796        assert_eq!(ir.header.fields[0].name, "a჻b჻c჻d");
797    }
798
799    #[test]
800    fn test_flatten_object() {
801        let obj: Map<String, Value> = serde_json::from_str(r#"{"a":{"b":1}}"#).unwrap();
802        let flattened = flatten_object(&obj, "");
803
804        assert_eq!(flattened.len(), 1);
805        assert!(flattened.contains_key("a჻b"));
806    }
807
808    #[test]
809    fn test_single_level_nesting() {
810        let input = r#"{"id":"A1","name":"Jim","grade":{"math":60,"physics":66,"chemistry":61}}"#;
811        let ir = JsonParser::parse(input).unwrap();
812
813        assert_eq!(ir.header.row_count, 1);
814        assert_eq!(ir.header.fields.len(), 5);
815
816        // Check field names
817        let field_names: Vec<String> = ir.header.fields.iter().map(|f| f.name.clone()).collect();
818        assert!(field_names.contains(&"id".to_string()));
819        assert!(field_names.contains(&"name".to_string()));
820        assert!(field_names.contains(&"grade჻math".to_string()));
821        assert!(field_names.contains(&"grade჻physics".to_string()));
822        assert!(field_names.contains(&"grade჻chemistry".to_string()));
823    }
824
825    #[test]
826    fn test_array_of_nested_objects() {
827        let input = r#"{"students":[{"id":"A1","name":"Jim","grade":{"math":60,"physics":66}}]}"#;
828        let ir = JsonParser::parse(input).unwrap();
829
830        assert_eq!(ir.header.row_count, 1);
831        assert_eq!(ir.header.root_key, Some("students".to_string()));
832
833        let field_names: Vec<String> = ir.header.fields.iter().map(|f| f.name.clone()).collect();
834        assert!(field_names.contains(&"id".to_string()));
835        assert!(field_names.contains(&"name".to_string()));
836        assert!(field_names.contains(&"grade჻math".to_string()));
837        assert!(field_names.contains(&"grade჻physics".to_string()));
838    }
839
840    #[test]
841    fn test_multiple_nested_levels() {
842        let input = r#"{"data":{"user":{"profile":{"address":{"city":"Boston"}}}}}"#;
843        let ir = JsonParser::parse(input).unwrap();
844
845        assert_eq!(ir.header.fields.len(), 1);
846        assert_eq!(ir.header.fields[0].name, "data჻user჻profile჻address჻city");
847    }
848
849    #[test]
850    fn test_mixed_arrays_and_objects() {
851        // Primitive arrays now stored inline as single field
852        let input =
853            r#"{"person":{"name":"Alice","tags":["admin","user"],"address":{"city":"NYC"}}}"#;
854        let ir = JsonParser::parse(input).unwrap();
855
856        let field_names: Vec<String> = ir.header.fields.iter().map(|f| f.name.clone()).collect();
857        assert!(field_names.contains(&"person჻name".to_string()));
858        // tags array now a single inline field
859        assert!(field_names.contains(&"person჻tags".to_string()));
860        assert!(field_names.contains(&"person჻address჻city".to_string()));
861
862        // Verify tags is an Array type
863        let tags_field = ir
864            .header
865            .fields
866            .iter()
867            .find(|f| f.name == "person჻tags")
868            .unwrap();
869        assert!(matches!(tags_field.field_type, FieldType::Array(_)));
870    }
871
872    #[test]
873    fn test_metadata_pattern() {
874        let input = r#"{"school_name": "Springfield High", "class": "Year 1", "students": [{"id": "A1"}, {"id": "B2"}]}"#;
875        let ir = JsonParser::parse(input).unwrap();
876
877        // Should extract metadata
878        assert!(ir.header.metadata.is_some());
879        let metadata = ir.header.metadata.as_ref().unwrap();
880        assert_eq!(
881            metadata.get("school_name"),
882            Some(&"Springfield High".to_string())
883        );
884        assert_eq!(metadata.get("class"), Some(&"Year 1".to_string()));
885
886        // Array becomes the data rows
887        assert_eq!(ir.header.root_key, Some("students".to_string()));
888        assert_eq!(ir.header.row_count, 2);
889        assert_eq!(ir.header.fields.len(), 1);
890        assert_eq!(ir.header.fields[0].name, "id");
891    }
892
893    #[test]
894    fn test_metadata_with_null() {
895        let input = r#"{"note": null, "total": 2, "users": [{"id": 1}, {"id": 2}]}"#;
896        let ir = JsonParser::parse(input).unwrap();
897
898        // Should extract metadata including null
899        assert!(ir.header.metadata.is_some());
900        let metadata = ir.header.metadata.as_ref().unwrap();
901        assert_eq!(metadata.get("note"), Some(&"∅".to_string()));
902        assert_eq!(metadata.get("total"), Some(&"2".to_string()));
903
904        // Array data
905        assert_eq!(ir.header.root_key, Some("users".to_string()));
906        assert_eq!(ir.header.row_count, 2);
907        assert_eq!(ir.header.fields.len(), 1);
908        assert_eq!(ir.header.fields[0].name, "id");
909    }
910}