base_d/encoders/algorithms/schema/serializers/
json.rs

1use crate::encoders::algorithms::schema::serializers::OutputSerializer;
2use crate::encoders::algorithms::schema::stele::NEST_SEP;
3use crate::encoders::algorithms::schema::types::*;
4use serde_json::{Map, Value, json};
5use std::collections::HashMap;
6
7pub struct JsonSerializer;
8
9impl OutputSerializer for JsonSerializer {
10    type Error = SchemaError;
11
12    fn serialize(ir: &IntermediateRepresentation, pretty: bool) -> Result<String, Self::Error> {
13        if ir.header.row_count == 0 {
14            return Err(SchemaError::InvalidInput(
15                "No rows to serialize".to_string(),
16            ));
17        }
18
19        // Build rows
20        let mut rows = Vec::new();
21        for row_idx in 0..ir.header.row_count {
22            let mut row_map = HashMap::new();
23
24            for (field_idx, field) in ir.header.fields.iter().enumerate() {
25                let value = ir
26                    .get_value(row_idx, field_idx)
27                    .ok_or_else(|| SchemaError::InvalidInput("Missing value".to_string()))?;
28
29                let json_value = if ir.is_null(row_idx, field_idx) {
30                    // Check if field type is Array - null in array field means empty array
31                    if matches!(field.field_type, FieldType::Array(_)) {
32                        Value::Array(vec![])
33                    } else {
34                        Value::Null
35                    }
36                } else {
37                    schema_value_to_json(value)?
38                };
39
40                row_map.insert(field.name.clone(), json_value);
41            }
42
43            rows.push(row_map);
44        }
45
46        // Unflatten each row
47        let mut unflattened_rows = Vec::new();
48        for row_map in rows {
49            let unflattened = unflatten_object(row_map);
50            unflattened_rows.push(unflattened);
51        }
52
53        // Determine output format
54        let result = if ir.header.row_count == 1
55            && ir.header.metadata.is_none()
56            && ir.header.root_key.is_none()
57        {
58            // Single row without metadata and no root key - output as object
59            unflattened_rows.into_iter().next().unwrap()
60        } else {
61            // Multiple rows OR single row with metadata OR root key present - output as array
62            Value::Array(unflattened_rows)
63        };
64
65        // Apply root key and metadata if present
66        let final_result = if let Some(root_key) = &ir.header.root_key {
67            let mut obj = Map::new();
68
69            // Add metadata fields first (if present)
70            if let Some(ref metadata) = ir.header.metadata {
71                for (key, value) in metadata {
72                    // Convert ∅ symbol back to JSON null
73                    let json_value = if value == "∅" {
74                        Value::Null
75                    } else if value.starts_with('[') && value.ends_with(']') {
76                        // Try to parse as JSON array (for inline primitive arrays in metadata)
77                        serde_json::from_str(value).unwrap_or_else(|_| json!(value))
78                    } else {
79                        // Try to parse as number, bool, or keep as string
80                        if let Ok(num) = value.parse::<i64>() {
81                            json!(num)
82                        } else if let Ok(num) = value.parse::<f64>() {
83                            json!(num)
84                        } else if value == "true" {
85                            json!(true)
86                        } else if value == "false" {
87                            json!(false)
88                        } else {
89                            json!(value)
90                        }
91                    };
92                    obj.insert(key.clone(), json_value);
93                }
94            }
95
96            // Add array data under root key
97            obj.insert(root_key.clone(), result);
98            Value::Object(obj)
99        } else {
100            result
101        };
102
103        // Serialize to JSON string
104        if pretty {
105            serde_json::to_string_pretty(&final_result)
106                .map_err(|e| SchemaError::InvalidInput(format!("JSON serialization failed: {}", e)))
107        } else {
108            serde_json::to_string(&final_result)
109                .map_err(|e| SchemaError::InvalidInput(format!("JSON serialization failed: {}", e)))
110        }
111    }
112}
113
114/// Convert SchemaValue to JSON Value
115fn schema_value_to_json(value: &SchemaValue) -> Result<Value, SchemaError> {
116    match value {
117        SchemaValue::U64(n) => Ok(json!(*n)),
118        SchemaValue::I64(n) => Ok(json!(*n)),
119        SchemaValue::F64(n) => Ok(json!(*n)),
120        SchemaValue::String(s) => Ok(json!(s)),
121        SchemaValue::Bool(b) => Ok(json!(*b)),
122        SchemaValue::Null => Ok(Value::Null),
123        SchemaValue::Array(arr) => {
124            let mut json_arr = Vec::new();
125            for item in arr {
126                json_arr.push(schema_value_to_json(item)?);
127            }
128            Ok(Value::Array(json_arr))
129        }
130    }
131}
132
133/// Unflatten nested keys back to nested objects
134fn unflatten_object(flat: HashMap<String, Value>) -> Value {
135    // First pass: identify array markers (keep them for nested reconstruction)
136    let mut array_paths = std::collections::HashSet::new();
137    let mut array_markers = Vec::new();
138    for key in flat.keys() {
139        if key.ends_with("⟦⟧") {
140            // This marks an array path
141            let array_path = key.trim_end_matches("⟦⟧");
142            array_paths.insert(array_path.to_string());
143            array_markers.push(key.clone());
144        }
145    }
146
147    // Second pass: group indexed fields by their array path
148    // Sort array paths by length (SHORTEST first) to match outermost arrays first
149    let mut sorted_array_paths: Vec<String> = array_paths.into_iter().collect();
150    sorted_array_paths.sort_by_key(|a| a.len());
151
152    let mut array_elements: HashMap<String, Vec<(usize, String, Value)>> = HashMap::new();
153    let mut non_array_fields = HashMap::new();
154
155    for (key, value) in flat {
156        // Skip array markers themselves (but we've saved them)
157        if key.ends_with("⟦⟧") {
158            continue;
159        }
160
161        // Check if this key belongs to an array (shortest path first)
162        let mut belongs_to_array = false;
163        for array_path in &sorted_array_paths {
164            // Special case: empty array path (root-level array)
165            if array_path.is_empty() {
166                // Key should be a numeric index (no prefix)
167                let parts: Vec<&str> = key.split(NEST_SEP).collect();
168                if let Ok(idx) = parts[0].parse::<usize>() {
169                    let remaining = if parts.len() > 1 {
170                        parts[1..].join(&NEST_SEP.to_string())
171                    } else {
172                        String::new()
173                    };
174                    array_elements.entry(array_path.clone()).or_default().push((
175                        idx,
176                        remaining,
177                        value.clone(),
178                    ));
179                    belongs_to_array = true;
180                    break;
181                }
182            } else {
183                // Non-empty array path: match with separator
184                let separator = NEST_SEP.to_string();
185                let expected_prefix = format!("{}{}", array_path, separator);
186                if key.starts_with(&expected_prefix) {
187                    // Extract index and remaining path
188                    let after_array = &key[expected_prefix.len()..];
189                    let parts: Vec<&str> = after_array.split(NEST_SEP).collect();
190                    if let Ok(idx) = parts[0].parse::<usize>() {
191                        // This is an array element
192                        let remaining = if parts.len() > 1 {
193                            parts[1..].join(&NEST_SEP.to_string())
194                        } else {
195                            String::new()
196                        };
197                        array_elements.entry(array_path.clone()).or_default().push((
198                            idx,
199                            remaining,
200                            value.clone(),
201                        ));
202                        belongs_to_array = true;
203                        break;
204                    }
205                }
206            }
207        }
208
209        if !belongs_to_array {
210            non_array_fields.insert(key, value);
211        }
212    }
213
214    // Third pass: reconstruct arrays (longest paths first = innermost arrays first)
215    #[allow(clippy::type_complexity)]
216    let mut array_entries: Vec<(String, Vec<(usize, String, Value)>)> =
217        array_elements.into_iter().collect();
218    array_entries.sort_by(|(a, _), (b, _)| b.len().cmp(&a.len()));
219
220    for (array_path, mut elements) in array_entries {
221        // Sort by index
222        elements.sort_by_key(|(idx, _, _)| *idx);
223
224        // Find max index to determine array length
225        let max_idx = elements.iter().map(|(idx, _, _)| *idx).max().unwrap_or(0);
226        let mut arr = vec![Value::Null; max_idx + 1];
227
228        // Group elements by index
229        let mut by_index: HashMap<usize, Vec<(String, Value)>> = HashMap::new();
230        for (idx, remaining, value) in elements {
231            by_index.entry(idx).or_default().push((remaining, value));
232        }
233
234        // Build array elements
235        for (idx, fields) in by_index {
236            if fields.len() == 1 && fields[0].0.is_empty() {
237                // Simple value
238                arr[idx] = fields[0].1.clone();
239            } else {
240                // Nested object - reconstruct with relevant array markers
241                let mut obj_map = HashMap::new();
242                for (remaining, value) in fields {
243                    // Skip null values when building objects
244                    if !value.is_null() {
245                        obj_map.insert(remaining, value);
246                    }
247                }
248
249                // Include array markers that apply to this nested context
250                let nested_elem_path = if array_path.is_empty() {
251                    idx.to_string()
252                } else {
253                    format!("{}{}{}", array_path, NEST_SEP, idx)
254                };
255                let nested_prefix_with_sep = format!("{}{}", nested_elem_path, NEST_SEP);
256
257                for marker in &array_markers {
258                    if !marker.ends_with("⟦⟧") {
259                        continue;
260                    }
261
262                    // Remove the "⟦⟧" suffix to get the path
263                    let marker_path = marker.trim_end_matches("⟦⟧");
264
265                    // Check if this marker applies to nested context
266                    if marker_path.starts_with(&nested_prefix_with_sep) {
267                        // Nested marker like deep჻0჻field⟦⟧ -> relative: field⟦⟧
268                        let relative_path = &marker_path[nested_prefix_with_sep.len()..];
269                        obj_map.insert(format!("{}⟦⟧", relative_path), Value::Null);
270                    } else if marker_path == nested_elem_path {
271                        // Marker equals nested element path: deep჻0⟦⟧ where we're building deep[0]
272                        // This means the element itself is an array at the root level
273                        // Add empty-path array marker
274                        obj_map.insert("⟦⟧".to_string(), Value::Null);
275                    }
276                }
277
278                arr[idx] = unflatten_object(obj_map);
279            }
280        }
281
282        // Trim trailing nulls and empty objects from array
283        while !arr.is_empty() {
284            let last = &arr[arr.len() - 1];
285            let should_remove = last.is_null()
286                || (last.is_object() && last.as_object().is_some_and(|o| o.is_empty()));
287            if should_remove {
288                arr.pop();
289            } else {
290                break;
291            }
292        }
293
294        non_array_fields.insert(array_path, Value::Array(arr));
295    }
296
297    // Handle empty arrays - markers with no indexed fields
298    // Check which arrays actually got reconstructed
299    let reconstructed_arrays: std::collections::HashSet<String> = non_array_fields
300        .keys()
301        .filter(|k| non_array_fields.get(*k).is_some_and(|v| v.is_array()))
302        .cloned()
303        .collect();
304
305    // For arrays that have markers but weren't reconstructed, create empty arrays
306    for array_path in &sorted_array_paths {
307        if !reconstructed_arrays.contains(array_path) && !non_array_fields.contains_key(array_path)
308        {
309            // Check if this is nested inside another array element
310            // If so, don't insert - it will be handled by recursive unflatten_object calls
311            let is_nested_in_array = sorted_array_paths.iter().any(|parent| {
312                if parent.len() >= array_path.len() {
313                    return false;
314                }
315                let prefix = if parent.is_empty() {
316                    String::new()
317                } else {
318                    format!("{}{}", parent, NEST_SEP)
319                };
320                if !array_path.starts_with(&prefix) {
321                    return false;
322                }
323                let after = if prefix.is_empty() {
324                    array_path.as_str()
325                } else {
326                    &array_path[prefix.len()..]
327                };
328                after
329                    .split(NEST_SEP)
330                    .next()
331                    .unwrap_or("")
332                    .parse::<usize>()
333                    .is_ok()
334            });
335
336            if !is_nested_in_array {
337                non_array_fields.insert(array_path.clone(), Value::Array(vec![]));
338            }
339        }
340    }
341
342    // Fourth pass: build final object
343    // Special case: if there's only one field with empty key, return it directly
344    if non_array_fields.len() == 1 && non_array_fields.contains_key("") {
345        return non_array_fields.into_iter().next().unwrap().1;
346    }
347
348    let mut result = Map::new();
349    for (key, value) in non_array_fields {
350        let parts: Vec<&str> = key.split(NEST_SEP).collect();
351        insert_nested_simple(&mut result, &parts, value);
352    }
353
354    Value::Object(result)
355}
356
357/// Insert a value into nested structure (simple version without array handling)
358fn insert_nested_simple(obj: &mut Map<String, Value>, parts: &[&str], value: Value) {
359    if parts.is_empty() {
360        return;
361    }
362
363    if parts.len() == 1 {
364        obj.insert(parts[0].to_string(), value);
365        return;
366    }
367
368    let key = parts[0];
369    let remaining = &parts[1..];
370
371    let nested = obj
372        .entry(key.to_string())
373        .or_insert_with(|| Value::Object(Map::new()));
374
375    if let Value::Object(nested_obj) = nested {
376        insert_nested_simple(nested_obj, remaining, value);
377    }
378}
379
380#[cfg(test)]
381mod tests {
382    use super::*;
383
384    #[test]
385    fn test_simple_object() {
386        let fields = vec![
387            FieldDef::new("id", FieldType::U64),
388            FieldDef::new("name", FieldType::String),
389        ];
390        let header = SchemaHeader::new(1, fields);
391        let values = vec![
392            SchemaValue::U64(1),
393            SchemaValue::String("alice".to_string()),
394        ];
395        let ir = IntermediateRepresentation::new(header, values).unwrap();
396
397        let output = JsonSerializer::serialize(&ir, false).unwrap();
398        let parsed: Value = serde_json::from_str(&output).unwrap();
399
400        assert_eq!(parsed["id"], json!(1));
401        assert_eq!(parsed["name"], json!("alice"));
402    }
403
404    #[test]
405    fn test_array_of_objects() {
406        let fields = vec![FieldDef::new("id", FieldType::U64)];
407        let header = SchemaHeader::new(2, fields);
408        let values = vec![SchemaValue::U64(1), SchemaValue::U64(2)];
409        let ir = IntermediateRepresentation::new(header, values).unwrap();
410
411        let output = JsonSerializer::serialize(&ir, false).unwrap();
412        let parsed: Value = serde_json::from_str(&output).unwrap();
413
414        assert!(parsed.is_array());
415        assert_eq!(parsed[0]["id"], json!(1));
416        assert_eq!(parsed[1]["id"], json!(2));
417    }
418
419    #[test]
420    fn test_nested_object() {
421        let fields = vec![FieldDef::new("user჻profile჻name", FieldType::String)];
422        let header = SchemaHeader::new(1, fields);
423        let values = vec![SchemaValue::String("alice".to_string())];
424        let ir = IntermediateRepresentation::new(header, values).unwrap();
425
426        let output = JsonSerializer::serialize(&ir, false).unwrap();
427        let parsed: Value = serde_json::from_str(&output).unwrap();
428
429        assert_eq!(parsed["user"]["profile"]["name"], json!("alice"));
430    }
431
432    #[test]
433    fn test_root_key() {
434        let mut header = SchemaHeader::new(1, vec![FieldDef::new("id", FieldType::U64)]);
435        header.root_key = Some("users".to_string());
436        header.set_flag(FLAG_HAS_ROOT_KEY);
437
438        let values = vec![SchemaValue::U64(1)];
439        let ir = IntermediateRepresentation::new(header, values).unwrap();
440
441        let output = JsonSerializer::serialize(&ir, false).unwrap();
442        let parsed: Value = serde_json::from_str(&output).unwrap();
443
444        // With root key, output is array even for single row
445        assert!(parsed["users"].is_array());
446        assert_eq!(parsed["users"][0]["id"], json!(1));
447    }
448
449    #[test]
450    fn test_null_handling() {
451        let mut header = SchemaHeader::new(
452            1,
453            vec![
454                FieldDef::new("name", FieldType::String),
455                FieldDef::new("age", FieldType::U64),
456            ],
457        );
458
459        // Mark age as null
460        let mut null_bitmap = vec![0u8; 1];
461        null_bitmap[0] |= 1 << 1; // Set bit 1
462        header.null_bitmap = Some(null_bitmap);
463        header.set_flag(FLAG_HAS_NULLS);
464
465        let values = vec![SchemaValue::String("alice".to_string()), SchemaValue::Null];
466        let ir = IntermediateRepresentation::new(header, values).unwrap();
467
468        let output = JsonSerializer::serialize(&ir, false).unwrap();
469        let parsed: Value = serde_json::from_str(&output).unwrap();
470
471        assert_eq!(parsed["name"], json!("alice"));
472        assert_eq!(parsed["age"], Value::Null);
473    }
474
475    #[test]
476    fn test_homogeneous_array() {
477        let fields = vec![FieldDef::new(
478            "scores",
479            FieldType::Array(Box::new(FieldType::U64)),
480        )];
481        let header = SchemaHeader::new(1, fields);
482        let values = vec![SchemaValue::Array(vec![
483            SchemaValue::U64(1),
484            SchemaValue::U64(2),
485            SchemaValue::U64(3),
486        ])];
487        let ir = IntermediateRepresentation::new(header, values).unwrap();
488
489        let output = JsonSerializer::serialize(&ir, false).unwrap();
490        let parsed: Value = serde_json::from_str(&output).unwrap();
491
492        assert_eq!(parsed["scores"], json!([1, 2, 3]));
493    }
494
495    #[test]
496    fn test_empty_array() {
497        let fields = vec![FieldDef::new(
498            "items",
499            FieldType::Array(Box::new(FieldType::Null)),
500        )];
501        let header = SchemaHeader::new(1, fields);
502        let values = vec![SchemaValue::Array(vec![])];
503        let ir = IntermediateRepresentation::new(header, values).unwrap();
504
505        let output = JsonSerializer::serialize(&ir, false).unwrap();
506        let parsed: Value = serde_json::from_str(&output).unwrap();
507
508        assert_eq!(parsed["items"], json!([]));
509    }
510
511    #[test]
512    fn test_deep_nesting() {
513        let fields = vec![FieldDef::new("a჻b჻c჻d", FieldType::U64)];
514        let header = SchemaHeader::new(1, fields);
515        let values = vec![SchemaValue::U64(1)];
516        let ir = IntermediateRepresentation::new(header, values).unwrap();
517
518        let output = JsonSerializer::serialize(&ir, false).unwrap();
519        let parsed: Value = serde_json::from_str(&output).unwrap();
520
521        assert_eq!(parsed["a"]["b"]["c"]["d"], json!(1));
522    }
523
524    #[test]
525    fn test_unflatten_object() {
526        let mut flat = HashMap::new();
527        flat.insert("a჻b".to_string(), json!(1));
528
529        let unflattened = unflatten_object(flat);
530
531        assert_eq!(unflattened["a"]["b"], json!(1));
532    }
533
534    #[test]
535    fn test_unflatten_nested_array() {
536        // Simulate flattened deep: [[3,4],[5,6]]
537        let mut flat = HashMap::new();
538        flat.insert("deep⟦⟧".to_string(), Value::Null); // outer array marker
539        flat.insert("deep჻0⟦⟧".to_string(), Value::Null); // inner array marker for deep[0]
540        flat.insert("deep჻1⟦⟧".to_string(), Value::Null); // inner array marker for deep[1]
541        flat.insert("deep჻0჻0".to_string(), json!(3));
542        flat.insert("deep჻0჻1".to_string(), json!(4));
543        flat.insert("deep჻1჻0".to_string(), json!(5));
544        flat.insert("deep჻1჻1".to_string(), json!(6));
545
546        let unflattened = unflatten_object(flat);
547
548        assert_eq!(unflattened["deep"][0][0], json!(3));
549        assert_eq!(unflattened["deep"][0][1], json!(4));
550        assert_eq!(unflattened["deep"][1][0], json!(5));
551        assert_eq!(unflattened["deep"][1][1], json!(6));
552    }
553
554    #[test]
555    fn test_pretty_output() {
556        let fields = vec![
557            FieldDef::new("id", FieldType::U64),
558            FieldDef::new("name", FieldType::String),
559        ];
560        let header = SchemaHeader::new(1, fields);
561        let values = vec![
562            SchemaValue::U64(1),
563            SchemaValue::String("alice".to_string()),
564        ];
565        let ir = IntermediateRepresentation::new(header, values).unwrap();
566
567        // Test compact output
568        let compact = JsonSerializer::serialize(&ir, false).unwrap();
569        assert!(!compact.contains('\n'));
570        assert_eq!(compact, r#"{"id":1,"name":"alice"}"#);
571
572        // Test pretty output
573        let pretty = JsonSerializer::serialize(&ir, true).unwrap();
574        assert!(pretty.contains('\n'));
575        assert!(pretty.contains("  ")); // Indentation
576
577        // Both should parse to same JSON value
578        let compact_value: Value = serde_json::from_str(&compact).unwrap();
579        let pretty_value: Value = serde_json::from_str(&pretty).unwrap();
580        assert_eq!(compact_value, pretty_value);
581    }
582
583    #[test]
584    fn test_metadata_with_null() {
585        use std::collections::HashMap;
586
587        let fields = vec![FieldDef::new("id", FieldType::U64)];
588        let mut header = SchemaHeader::new(2, fields);
589        header.root_key = Some("users".to_string());
590        header.set_flag(FLAG_HAS_ROOT_KEY);
591
592        let mut metadata = HashMap::new();
593        metadata.insert("note".to_string(), "∅".to_string());
594        metadata.insert("total".to_string(), "2".to_string());
595        header.metadata = Some(metadata);
596
597        let values = vec![SchemaValue::U64(1), SchemaValue::U64(2)];
598        let ir = IntermediateRepresentation::new(header, values).unwrap();
599
600        let output = JsonSerializer::serialize(&ir, false).unwrap();
601        let parsed: Value = serde_json::from_str(&output).unwrap();
602
603        // Check metadata was reconstructed
604        assert_eq!(parsed["note"], Value::Null);
605        assert_eq!(parsed["total"], json!(2));
606
607        // Check array data
608        assert!(parsed["users"].is_array());
609        assert_eq!(parsed["users"][0]["id"], json!(1));
610        assert_eq!(parsed["users"][1]["id"], json!(2));
611    }
612}