base_d/encoders/algorithms/schema/serializers/
json.rs

1use crate::encoders::algorithms::schema::fiche::NEST_SEP;
2use crate::encoders::algorithms::schema::serializers::OutputSerializer;
3use crate::encoders::algorithms::schema::types::*;
4use serde_json::{Map, Value, json};
5use std::collections::HashMap;
6
7pub struct JsonSerializer;
8
9impl OutputSerializer for JsonSerializer {
10    type Error = SchemaError;
11
12    fn serialize(ir: &IntermediateRepresentation, pretty: bool) -> Result<String, Self::Error> {
13        if ir.header.row_count == 0 {
14            return Err(SchemaError::InvalidInput(
15                "No rows to serialize".to_string(),
16            ));
17        }
18
19        // Build rows
20        let mut rows = Vec::new();
21        for row_idx in 0..ir.header.row_count {
22            let mut row_map = HashMap::new();
23
24            for (field_idx, field) in ir.header.fields.iter().enumerate() {
25                let value = ir
26                    .get_value(row_idx, field_idx)
27                    .ok_or_else(|| SchemaError::InvalidInput("Missing value".to_string()))?;
28
29                let json_value = if ir.is_null(row_idx, field_idx) {
30                    Value::Null
31                } else {
32                    schema_value_to_json(value)?
33                };
34
35                row_map.insert(field.name.clone(), json_value);
36            }
37
38            rows.push(row_map);
39        }
40
41        // Unflatten each row
42        let mut unflattened_rows = Vec::new();
43        for row_map in rows {
44            let unflattened = unflatten_object(row_map);
45            unflattened_rows.push(unflattened);
46        }
47
48        // Determine output format
49        let result = if ir.header.row_count == 1 && ir.header.metadata.is_none() {
50            // Single row without metadata - output as object
51            unflattened_rows.into_iter().next().unwrap()
52        } else {
53            // Multiple rows OR single row with metadata - output as array
54            Value::Array(unflattened_rows)
55        };
56
57        // Apply root key and metadata if present
58        let final_result = if let Some(root_key) = &ir.header.root_key {
59            let mut obj = Map::new();
60
61            // Add metadata fields first (if present)
62            if let Some(ref metadata) = ir.header.metadata {
63                for (key, value) in metadata {
64                    // Convert ∅ symbol back to JSON null
65                    let json_value = if value == "∅" {
66                        Value::Null
67                    } else {
68                        // Try to parse as number, bool, or keep as string
69                        if let Ok(num) = value.parse::<i64>() {
70                            json!(num)
71                        } else if let Ok(num) = value.parse::<f64>() {
72                            json!(num)
73                        } else if value == "true" {
74                            json!(true)
75                        } else if value == "false" {
76                            json!(false)
77                        } else {
78                            json!(value)
79                        }
80                    };
81                    obj.insert(key.clone(), json_value);
82                }
83            }
84
85            // Add array data under root key
86            obj.insert(root_key.clone(), result);
87            Value::Object(obj)
88        } else {
89            result
90        };
91
92        // Serialize to JSON string
93        if pretty {
94            serde_json::to_string_pretty(&final_result)
95                .map_err(|e| SchemaError::InvalidInput(format!("JSON serialization failed: {}", e)))
96        } else {
97            serde_json::to_string(&final_result)
98                .map_err(|e| SchemaError::InvalidInput(format!("JSON serialization failed: {}", e)))
99        }
100    }
101}
102
103/// Convert SchemaValue to JSON Value
104fn schema_value_to_json(value: &SchemaValue) -> Result<Value, SchemaError> {
105    match value {
106        SchemaValue::U64(n) => Ok(json!(*n)),
107        SchemaValue::I64(n) => Ok(json!(*n)),
108        SchemaValue::F64(n) => Ok(json!(*n)),
109        SchemaValue::String(s) => Ok(json!(s)),
110        SchemaValue::Bool(b) => Ok(json!(*b)),
111        SchemaValue::Null => Ok(Value::Null),
112        SchemaValue::Array(arr) => {
113            let mut json_arr = Vec::new();
114            for item in arr {
115                json_arr.push(schema_value_to_json(item)?);
116            }
117            Ok(Value::Array(json_arr))
118        }
119    }
120}
121
122/// Unflatten nested keys back to nested objects
123fn unflatten_object(flat: HashMap<String, Value>) -> Value {
124    // First pass: identify array markers (keep them for nested reconstruction)
125    let mut array_paths = std::collections::HashSet::new();
126    let mut array_markers = Vec::new();
127    for key in flat.keys() {
128        if key.ends_with("⟦⟧") {
129            // This marks an array path
130            let array_path = key.trim_end_matches("⟦⟧");
131            array_paths.insert(array_path.to_string());
132            array_markers.push(key.clone());
133        }
134    }
135
136    // Second pass: group indexed fields by their array path
137    // Sort array paths by length (SHORTEST first) to match outermost arrays first
138    let mut sorted_array_paths: Vec<String> = array_paths.into_iter().collect();
139    sorted_array_paths.sort_by_key(|a| a.len());
140
141    let mut array_elements: HashMap<String, Vec<(usize, String, Value)>> = HashMap::new();
142    let mut non_array_fields = HashMap::new();
143
144    for (key, value) in flat {
145        // Skip array markers themselves (but we've saved them)
146        if key.ends_with("⟦⟧") {
147            continue;
148        }
149
150        // Check if this key belongs to an array (shortest path first)
151        let mut belongs_to_array = false;
152        for array_path in &sorted_array_paths {
153            // Special case: empty array path (root-level array)
154            if array_path.is_empty() {
155                // Key should be a numeric index (no prefix)
156                let parts: Vec<&str> = key.split(NEST_SEP).collect();
157                if let Ok(idx) = parts[0].parse::<usize>() {
158                    let remaining = if parts.len() > 1 {
159                        parts[1..].join(&NEST_SEP.to_string())
160                    } else {
161                        String::new()
162                    };
163                    array_elements.entry(array_path.clone()).or_default().push((
164                        idx,
165                        remaining,
166                        value.clone(),
167                    ));
168                    belongs_to_array = true;
169                    break;
170                }
171            } else {
172                // Non-empty array path: match with separator
173                let separator = NEST_SEP.to_string();
174                let expected_prefix = format!("{}{}", array_path, separator);
175                if key.starts_with(&expected_prefix) {
176                    // Extract index and remaining path
177                    let after_array = &key[expected_prefix.len()..];
178                    let parts: Vec<&str> = after_array.split(NEST_SEP).collect();
179                    if let Ok(idx) = parts[0].parse::<usize>() {
180                        // This is an array element
181                        let remaining = if parts.len() > 1 {
182                            parts[1..].join(&NEST_SEP.to_string())
183                        } else {
184                            String::new()
185                        };
186                        array_elements.entry(array_path.clone()).or_default().push((
187                            idx,
188                            remaining,
189                            value.clone(),
190                        ));
191                        belongs_to_array = true;
192                        break;
193                    }
194                }
195            }
196        }
197
198        if !belongs_to_array {
199            non_array_fields.insert(key, value);
200        }
201    }
202
203    // Third pass: reconstruct arrays (longest paths first = innermost arrays first)
204    #[allow(clippy::type_complexity)]
205    let mut array_entries: Vec<(String, Vec<(usize, String, Value)>)> =
206        array_elements.into_iter().collect();
207    array_entries.sort_by(|(a, _), (b, _)| b.len().cmp(&a.len()));
208
209    for (array_path, mut elements) in array_entries {
210        // Sort by index
211        elements.sort_by_key(|(idx, _, _)| *idx);
212
213        // Find max index to determine array length
214        let max_idx = elements.iter().map(|(idx, _, _)| *idx).max().unwrap_or(0);
215        let mut arr = vec![Value::Null; max_idx + 1];
216
217        // Group elements by index
218        let mut by_index: HashMap<usize, Vec<(String, Value)>> = HashMap::new();
219        for (idx, remaining, value) in elements {
220            by_index.entry(idx).or_default().push((remaining, value));
221        }
222
223        // Build array elements
224        for (idx, fields) in by_index {
225            if fields.len() == 1 && fields[0].0.is_empty() {
226                // Simple value
227                arr[idx] = fields[0].1.clone();
228            } else {
229                // Nested object - reconstruct with relevant array markers
230                let mut obj_map = HashMap::new();
231                for (remaining, value) in fields {
232                    // Skip null values when building objects
233                    if !value.is_null() {
234                        obj_map.insert(remaining, value);
235                    }
236                }
237
238                // Include array markers that apply to this nested context
239                let nested_elem_path = if array_path.is_empty() {
240                    idx.to_string()
241                } else {
242                    format!("{}{}{}", array_path, NEST_SEP, idx)
243                };
244                let nested_prefix_with_sep = format!("{}{}", nested_elem_path, NEST_SEP);
245
246                for marker in &array_markers {
247                    if !marker.ends_with("⟦⟧") {
248                        continue;
249                    }
250
251                    // Remove the "⟦⟧" suffix to get the path
252                    let marker_path = marker.trim_end_matches("⟦⟧");
253
254                    // Check if this marker applies to nested context
255                    if marker_path.starts_with(&nested_prefix_with_sep) {
256                        // Nested marker like deep჻0჻field⟦⟧ -> relative: field⟦⟧
257                        let relative_path = &marker_path[nested_prefix_with_sep.len()..];
258                        obj_map.insert(format!("{}⟦⟧", relative_path), Value::Null);
259                    } else if marker_path == nested_elem_path {
260                        // Marker equals nested element path: deep჻0⟦⟧ where we're building deep[0]
261                        // This means the element itself is an array at the root level
262                        // Add empty-path array marker
263                        obj_map.insert("⟦⟧".to_string(), Value::Null);
264                    }
265                }
266
267                arr[idx] = unflatten_object(obj_map);
268            }
269        }
270
271        // Trim trailing nulls and empty objects from array
272        while !arr.is_empty() {
273            let last = &arr[arr.len() - 1];
274            let should_remove = last.is_null()
275                || (last.is_object() && last.as_object().is_some_and(|o| o.is_empty()));
276            if should_remove {
277                arr.pop();
278            } else {
279                break;
280            }
281        }
282
283        non_array_fields.insert(array_path, Value::Array(arr));
284    }
285
286    // Handle empty arrays - markers with no indexed fields
287    // Check which arrays actually got reconstructed
288    let reconstructed_arrays: std::collections::HashSet<String> = non_array_fields
289        .keys()
290        .filter(|k| non_array_fields.get(*k).is_some_and(|v| v.is_array()))
291        .cloned()
292        .collect();
293
294    // For arrays that have markers but weren't reconstructed, create empty arrays
295    for array_path in &sorted_array_paths {
296        if !reconstructed_arrays.contains(array_path) && !non_array_fields.contains_key(array_path)
297        {
298            // Check if this is nested inside another array element
299            // If so, don't insert - it will be handled by recursive unflatten_object calls
300            let is_nested_in_array = sorted_array_paths.iter().any(|parent| {
301                if parent.len() >= array_path.len() {
302                    return false;
303                }
304                let prefix = if parent.is_empty() {
305                    String::new()
306                } else {
307                    format!("{}{}", parent, NEST_SEP)
308                };
309                if !array_path.starts_with(&prefix) {
310                    return false;
311                }
312                let after = if prefix.is_empty() {
313                    array_path.as_str()
314                } else {
315                    &array_path[prefix.len()..]
316                };
317                after
318                    .split(NEST_SEP)
319                    .next()
320                    .unwrap_or("")
321                    .parse::<usize>()
322                    .is_ok()
323            });
324
325            if !is_nested_in_array {
326                non_array_fields.insert(array_path.clone(), Value::Array(vec![]));
327            }
328        }
329    }
330
331    // Fourth pass: build final object
332    // Special case: if there's only one field with empty key, return it directly
333    if non_array_fields.len() == 1 && non_array_fields.contains_key("") {
334        return non_array_fields.into_iter().next().unwrap().1;
335    }
336
337    let mut result = Map::new();
338    for (key, value) in non_array_fields {
339        let parts: Vec<&str> = key.split(NEST_SEP).collect();
340        insert_nested_simple(&mut result, &parts, value);
341    }
342
343    Value::Object(result)
344}
345
346/// Insert a value into nested structure (simple version without array handling)
347fn insert_nested_simple(obj: &mut Map<String, Value>, parts: &[&str], value: Value) {
348    if parts.is_empty() {
349        return;
350    }
351
352    if parts.len() == 1 {
353        obj.insert(parts[0].to_string(), value);
354        return;
355    }
356
357    let key = parts[0];
358    let remaining = &parts[1..];
359
360    let nested = obj
361        .entry(key.to_string())
362        .or_insert_with(|| Value::Object(Map::new()));
363
364    if let Value::Object(nested_obj) = nested {
365        insert_nested_simple(nested_obj, remaining, value);
366    }
367}
368
369#[cfg(test)]
370mod tests {
371    use super::*;
372
373    #[test]
374    fn test_simple_object() {
375        let fields = vec![
376            FieldDef::new("id", FieldType::U64),
377            FieldDef::new("name", FieldType::String),
378        ];
379        let header = SchemaHeader::new(1, fields);
380        let values = vec![
381            SchemaValue::U64(1),
382            SchemaValue::String("alice".to_string()),
383        ];
384        let ir = IntermediateRepresentation::new(header, values).unwrap();
385
386        let output = JsonSerializer::serialize(&ir, false).unwrap();
387        let parsed: Value = serde_json::from_str(&output).unwrap();
388
389        assert_eq!(parsed["id"], json!(1));
390        assert_eq!(parsed["name"], json!("alice"));
391    }
392
393    #[test]
394    fn test_array_of_objects() {
395        let fields = vec![FieldDef::new("id", FieldType::U64)];
396        let header = SchemaHeader::new(2, fields);
397        let values = vec![SchemaValue::U64(1), SchemaValue::U64(2)];
398        let ir = IntermediateRepresentation::new(header, values).unwrap();
399
400        let output = JsonSerializer::serialize(&ir, false).unwrap();
401        let parsed: Value = serde_json::from_str(&output).unwrap();
402
403        assert!(parsed.is_array());
404        assert_eq!(parsed[0]["id"], json!(1));
405        assert_eq!(parsed[1]["id"], json!(2));
406    }
407
408    #[test]
409    fn test_nested_object() {
410        let fields = vec![FieldDef::new("user჻profile჻name", FieldType::String)];
411        let header = SchemaHeader::new(1, fields);
412        let values = vec![SchemaValue::String("alice".to_string())];
413        let ir = IntermediateRepresentation::new(header, values).unwrap();
414
415        let output = JsonSerializer::serialize(&ir, false).unwrap();
416        let parsed: Value = serde_json::from_str(&output).unwrap();
417
418        assert_eq!(parsed["user"]["profile"]["name"], json!("alice"));
419    }
420
421    #[test]
422    fn test_root_key() {
423        let mut header = SchemaHeader::new(1, vec![FieldDef::new("id", FieldType::U64)]);
424        header.root_key = Some("users".to_string());
425        header.set_flag(FLAG_HAS_ROOT_KEY);
426
427        let values = vec![SchemaValue::U64(1)];
428        let ir = IntermediateRepresentation::new(header, values).unwrap();
429
430        let output = JsonSerializer::serialize(&ir, false).unwrap();
431        let parsed: Value = serde_json::from_str(&output).unwrap();
432
433        assert!(parsed["users"].is_object());
434        assert_eq!(parsed["users"]["id"], json!(1));
435    }
436
437    #[test]
438    fn test_null_handling() {
439        let mut header = SchemaHeader::new(
440            1,
441            vec![
442                FieldDef::new("name", FieldType::String),
443                FieldDef::new("age", FieldType::U64),
444            ],
445        );
446
447        // Mark age as null
448        let mut null_bitmap = vec![0u8; 1];
449        null_bitmap[0] |= 1 << 1; // Set bit 1
450        header.null_bitmap = Some(null_bitmap);
451        header.set_flag(FLAG_HAS_NULLS);
452
453        let values = vec![SchemaValue::String("alice".to_string()), SchemaValue::Null];
454        let ir = IntermediateRepresentation::new(header, values).unwrap();
455
456        let output = JsonSerializer::serialize(&ir, false).unwrap();
457        let parsed: Value = serde_json::from_str(&output).unwrap();
458
459        assert_eq!(parsed["name"], json!("alice"));
460        assert_eq!(parsed["age"], Value::Null);
461    }
462
463    #[test]
464    fn test_homogeneous_array() {
465        let fields = vec![FieldDef::new(
466            "scores",
467            FieldType::Array(Box::new(FieldType::U64)),
468        )];
469        let header = SchemaHeader::new(1, fields);
470        let values = vec![SchemaValue::Array(vec![
471            SchemaValue::U64(1),
472            SchemaValue::U64(2),
473            SchemaValue::U64(3),
474        ])];
475        let ir = IntermediateRepresentation::new(header, values).unwrap();
476
477        let output = JsonSerializer::serialize(&ir, false).unwrap();
478        let parsed: Value = serde_json::from_str(&output).unwrap();
479
480        assert_eq!(parsed["scores"], json!([1, 2, 3]));
481    }
482
483    #[test]
484    fn test_empty_array() {
485        let fields = vec![FieldDef::new(
486            "items",
487            FieldType::Array(Box::new(FieldType::Null)),
488        )];
489        let header = SchemaHeader::new(1, fields);
490        let values = vec![SchemaValue::Array(vec![])];
491        let ir = IntermediateRepresentation::new(header, values).unwrap();
492
493        let output = JsonSerializer::serialize(&ir, false).unwrap();
494        let parsed: Value = serde_json::from_str(&output).unwrap();
495
496        assert_eq!(parsed["items"], json!([]));
497    }
498
499    #[test]
500    fn test_deep_nesting() {
501        let fields = vec![FieldDef::new("a჻b჻c჻d", FieldType::U64)];
502        let header = SchemaHeader::new(1, fields);
503        let values = vec![SchemaValue::U64(1)];
504        let ir = IntermediateRepresentation::new(header, values).unwrap();
505
506        let output = JsonSerializer::serialize(&ir, false).unwrap();
507        let parsed: Value = serde_json::from_str(&output).unwrap();
508
509        assert_eq!(parsed["a"]["b"]["c"]["d"], json!(1));
510    }
511
512    #[test]
513    fn test_unflatten_object() {
514        let mut flat = HashMap::new();
515        flat.insert("a჻b".to_string(), json!(1));
516
517        let unflattened = unflatten_object(flat);
518
519        assert_eq!(unflattened["a"]["b"], json!(1));
520    }
521
522    #[test]
523    fn test_pretty_output() {
524        let fields = vec![
525            FieldDef::new("id", FieldType::U64),
526            FieldDef::new("name", FieldType::String),
527        ];
528        let header = SchemaHeader::new(1, fields);
529        let values = vec![
530            SchemaValue::U64(1),
531            SchemaValue::String("alice".to_string()),
532        ];
533        let ir = IntermediateRepresentation::new(header, values).unwrap();
534
535        // Test compact output
536        let compact = JsonSerializer::serialize(&ir, false).unwrap();
537        assert!(!compact.contains('\n'));
538        assert_eq!(compact, r#"{"id":1,"name":"alice"}"#);
539
540        // Test pretty output
541        let pretty = JsonSerializer::serialize(&ir, true).unwrap();
542        assert!(pretty.contains('\n'));
543        assert!(pretty.contains("  ")); // Indentation
544
545        // Both should parse to same JSON value
546        let compact_value: Value = serde_json::from_str(&compact).unwrap();
547        let pretty_value: Value = serde_json::from_str(&pretty).unwrap();
548        assert_eq!(compact_value, pretty_value);
549    }
550
551    #[test]
552    fn test_metadata_with_null() {
553        use std::collections::HashMap;
554
555        let fields = vec![FieldDef::new("id", FieldType::U64)];
556        let mut header = SchemaHeader::new(2, fields);
557        header.root_key = Some("users".to_string());
558        header.set_flag(FLAG_HAS_ROOT_KEY);
559
560        let mut metadata = HashMap::new();
561        metadata.insert("note".to_string(), "∅".to_string());
562        metadata.insert("total".to_string(), "2".to_string());
563        header.metadata = Some(metadata);
564
565        let values = vec![SchemaValue::U64(1), SchemaValue::U64(2)];
566        let ir = IntermediateRepresentation::new(header, values).unwrap();
567
568        let output = JsonSerializer::serialize(&ir, false).unwrap();
569        let parsed: Value = serde_json::from_str(&output).unwrap();
570
571        // Check metadata was reconstructed
572        assert_eq!(parsed["note"], Value::Null);
573        assert_eq!(parsed["total"], json!(2));
574
575        // Check array data
576        assert!(parsed["users"].is_array());
577        assert_eq!(parsed["users"][0]["id"], json!(1));
578        assert_eq!(parsed["users"][1]["id"], json!(2));
579    }
580}