base_d/encoders/algorithms/schema/serializers/
json.rs1use crate::encoders::algorithms::schema::fiche::NEST_SEP;
2use crate::encoders::algorithms::schema::serializers::OutputSerializer;
3use crate::encoders::algorithms::schema::types::*;
4use serde_json::{Map, Value, json};
5use std::collections::HashMap;
6
7pub struct JsonSerializer;
8
9impl OutputSerializer for JsonSerializer {
10 type Error = SchemaError;
11
12 fn serialize(ir: &IntermediateRepresentation, pretty: bool) -> Result<String, Self::Error> {
13 if ir.header.row_count == 0 {
14 return Err(SchemaError::InvalidInput(
15 "No rows to serialize".to_string(),
16 ));
17 }
18
19 let mut rows = Vec::new();
21 for row_idx in 0..ir.header.row_count {
22 let mut row_map = HashMap::new();
23
24 for (field_idx, field) in ir.header.fields.iter().enumerate() {
25 let value = ir
26 .get_value(row_idx, field_idx)
27 .ok_or_else(|| SchemaError::InvalidInput("Missing value".to_string()))?;
28
29 let json_value = if ir.is_null(row_idx, field_idx) {
30 Value::Null
31 } else {
32 schema_value_to_json(value)?
33 };
34
35 row_map.insert(field.name.clone(), json_value);
36 }
37
38 rows.push(row_map);
39 }
40
41 let mut unflattened_rows = Vec::new();
43 for row_map in rows {
44 let unflattened = unflatten_object(row_map);
45 unflattened_rows.push(unflattened);
46 }
47
48 let result = if ir.header.row_count == 1 && ir.header.metadata.is_none() {
50 unflattened_rows.into_iter().next().unwrap()
52 } else {
53 Value::Array(unflattened_rows)
55 };
56
57 let final_result = if let Some(root_key) = &ir.header.root_key {
59 let mut obj = Map::new();
60
61 if let Some(ref metadata) = ir.header.metadata {
63 for (key, value) in metadata {
64 let json_value = if value == "∅" {
66 Value::Null
67 } else {
68 if let Ok(num) = value.parse::<i64>() {
70 json!(num)
71 } else if let Ok(num) = value.parse::<f64>() {
72 json!(num)
73 } else if value == "true" {
74 json!(true)
75 } else if value == "false" {
76 json!(false)
77 } else {
78 json!(value)
79 }
80 };
81 obj.insert(key.clone(), json_value);
82 }
83 }
84
85 obj.insert(root_key.clone(), result);
87 Value::Object(obj)
88 } else {
89 result
90 };
91
92 if pretty {
94 serde_json::to_string_pretty(&final_result)
95 .map_err(|e| SchemaError::InvalidInput(format!("JSON serialization failed: {}", e)))
96 } else {
97 serde_json::to_string(&final_result)
98 .map_err(|e| SchemaError::InvalidInput(format!("JSON serialization failed: {}", e)))
99 }
100 }
101}
102
103fn schema_value_to_json(value: &SchemaValue) -> Result<Value, SchemaError> {
105 match value {
106 SchemaValue::U64(n) => Ok(json!(*n)),
107 SchemaValue::I64(n) => Ok(json!(*n)),
108 SchemaValue::F64(n) => Ok(json!(*n)),
109 SchemaValue::String(s) => Ok(json!(s)),
110 SchemaValue::Bool(b) => Ok(json!(*b)),
111 SchemaValue::Null => Ok(Value::Null),
112 SchemaValue::Array(arr) => {
113 let mut json_arr = Vec::new();
114 for item in arr {
115 json_arr.push(schema_value_to_json(item)?);
116 }
117 Ok(Value::Array(json_arr))
118 }
119 }
120}
121
122fn unflatten_object(flat: HashMap<String, Value>) -> Value {
124 let mut array_paths = std::collections::HashSet::new();
126 let mut array_markers = Vec::new();
127 for key in flat.keys() {
128 if key.ends_with("⟦⟧") {
129 let array_path = key.trim_end_matches("⟦⟧");
131 array_paths.insert(array_path.to_string());
132 array_markers.push(key.clone());
133 }
134 }
135
136 let mut sorted_array_paths: Vec<String> = array_paths.into_iter().collect();
139 sorted_array_paths.sort_by_key(|a| a.len());
140
141 let mut array_elements: HashMap<String, Vec<(usize, String, Value)>> = HashMap::new();
142 let mut non_array_fields = HashMap::new();
143
144 for (key, value) in flat {
145 if key.ends_with("⟦⟧") {
147 continue;
148 }
149
150 let mut belongs_to_array = false;
152 for array_path in &sorted_array_paths {
153 if array_path.is_empty() {
155 let parts: Vec<&str> = key.split(NEST_SEP).collect();
157 if let Ok(idx) = parts[0].parse::<usize>() {
158 let remaining = if parts.len() > 1 {
159 parts[1..].join(&NEST_SEP.to_string())
160 } else {
161 String::new()
162 };
163 array_elements.entry(array_path.clone()).or_default().push((
164 idx,
165 remaining,
166 value.clone(),
167 ));
168 belongs_to_array = true;
169 break;
170 }
171 } else {
172 let separator = NEST_SEP.to_string();
174 let expected_prefix = format!("{}{}", array_path, separator);
175 if key.starts_with(&expected_prefix) {
176 let after_array = &key[expected_prefix.len()..];
178 let parts: Vec<&str> = after_array.split(NEST_SEP).collect();
179 if let Ok(idx) = parts[0].parse::<usize>() {
180 let remaining = if parts.len() > 1 {
182 parts[1..].join(&NEST_SEP.to_string())
183 } else {
184 String::new()
185 };
186 array_elements.entry(array_path.clone()).or_default().push((
187 idx,
188 remaining,
189 value.clone(),
190 ));
191 belongs_to_array = true;
192 break;
193 }
194 }
195 }
196 }
197
198 if !belongs_to_array {
199 non_array_fields.insert(key, value);
200 }
201 }
202
203 #[allow(clippy::type_complexity)]
205 let mut array_entries: Vec<(String, Vec<(usize, String, Value)>)> =
206 array_elements.into_iter().collect();
207 array_entries.sort_by(|(a, _), (b, _)| b.len().cmp(&a.len()));
208
209 for (array_path, mut elements) in array_entries {
210 elements.sort_by_key(|(idx, _, _)| *idx);
212
213 let max_idx = elements.iter().map(|(idx, _, _)| *idx).max().unwrap_or(0);
215 let mut arr = vec![Value::Null; max_idx + 1];
216
217 let mut by_index: HashMap<usize, Vec<(String, Value)>> = HashMap::new();
219 for (idx, remaining, value) in elements {
220 by_index.entry(idx).or_default().push((remaining, value));
221 }
222
223 for (idx, fields) in by_index {
225 if fields.len() == 1 && fields[0].0.is_empty() {
226 arr[idx] = fields[0].1.clone();
228 } else {
229 let mut obj_map = HashMap::new();
231 for (remaining, value) in fields {
232 if !value.is_null() {
234 obj_map.insert(remaining, value);
235 }
236 }
237
238 let nested_elem_path = if array_path.is_empty() {
240 idx.to_string()
241 } else {
242 format!("{}{}{}", array_path, NEST_SEP, idx)
243 };
244 let nested_prefix_with_sep = format!("{}{}", nested_elem_path, NEST_SEP);
245
246 for marker in &array_markers {
247 if !marker.ends_with("⟦⟧") {
248 continue;
249 }
250
251 let marker_path = marker.trim_end_matches("⟦⟧");
253
254 if marker_path.starts_with(&nested_prefix_with_sep) {
256 let relative_path = &marker_path[nested_prefix_with_sep.len()..];
258 obj_map.insert(format!("{}⟦⟧", relative_path), Value::Null);
259 } else if marker_path == nested_elem_path {
260 obj_map.insert("⟦⟧".to_string(), Value::Null);
264 }
265 }
266
267 arr[idx] = unflatten_object(obj_map);
268 }
269 }
270
271 while !arr.is_empty() {
273 let last = &arr[arr.len() - 1];
274 let should_remove = last.is_null()
275 || (last.is_object() && last.as_object().is_some_and(|o| o.is_empty()));
276 if should_remove {
277 arr.pop();
278 } else {
279 break;
280 }
281 }
282
283 non_array_fields.insert(array_path, Value::Array(arr));
284 }
285
286 let reconstructed_arrays: std::collections::HashSet<String> = non_array_fields
289 .keys()
290 .filter(|k| non_array_fields.get(*k).is_some_and(|v| v.is_array()))
291 .cloned()
292 .collect();
293
294 for array_path in &sorted_array_paths {
296 if !reconstructed_arrays.contains(array_path) && !non_array_fields.contains_key(array_path)
297 {
298 let is_nested_in_array = sorted_array_paths.iter().any(|parent| {
301 if parent.len() >= array_path.len() {
302 return false;
303 }
304 let prefix = if parent.is_empty() {
305 String::new()
306 } else {
307 format!("{}{}", parent, NEST_SEP)
308 };
309 if !array_path.starts_with(&prefix) {
310 return false;
311 }
312 let after = if prefix.is_empty() {
313 array_path.as_str()
314 } else {
315 &array_path[prefix.len()..]
316 };
317 after
318 .split(NEST_SEP)
319 .next()
320 .unwrap_or("")
321 .parse::<usize>()
322 .is_ok()
323 });
324
325 if !is_nested_in_array {
326 non_array_fields.insert(array_path.clone(), Value::Array(vec![]));
327 }
328 }
329 }
330
331 if non_array_fields.len() == 1 && non_array_fields.contains_key("") {
334 return non_array_fields.into_iter().next().unwrap().1;
335 }
336
337 let mut result = Map::new();
338 for (key, value) in non_array_fields {
339 let parts: Vec<&str> = key.split(NEST_SEP).collect();
340 insert_nested_simple(&mut result, &parts, value);
341 }
342
343 Value::Object(result)
344}
345
346fn insert_nested_simple(obj: &mut Map<String, Value>, parts: &[&str], value: Value) {
348 if parts.is_empty() {
349 return;
350 }
351
352 if parts.len() == 1 {
353 obj.insert(parts[0].to_string(), value);
354 return;
355 }
356
357 let key = parts[0];
358 let remaining = &parts[1..];
359
360 let nested = obj
361 .entry(key.to_string())
362 .or_insert_with(|| Value::Object(Map::new()));
363
364 if let Value::Object(nested_obj) = nested {
365 insert_nested_simple(nested_obj, remaining, value);
366 }
367}
368
369#[cfg(test)]
370mod tests {
371 use super::*;
372
373 #[test]
374 fn test_simple_object() {
375 let fields = vec![
376 FieldDef::new("id", FieldType::U64),
377 FieldDef::new("name", FieldType::String),
378 ];
379 let header = SchemaHeader::new(1, fields);
380 let values = vec![
381 SchemaValue::U64(1),
382 SchemaValue::String("alice".to_string()),
383 ];
384 let ir = IntermediateRepresentation::new(header, values).unwrap();
385
386 let output = JsonSerializer::serialize(&ir, false).unwrap();
387 let parsed: Value = serde_json::from_str(&output).unwrap();
388
389 assert_eq!(parsed["id"], json!(1));
390 assert_eq!(parsed["name"], json!("alice"));
391 }
392
393 #[test]
394 fn test_array_of_objects() {
395 let fields = vec![FieldDef::new("id", FieldType::U64)];
396 let header = SchemaHeader::new(2, fields);
397 let values = vec![SchemaValue::U64(1), SchemaValue::U64(2)];
398 let ir = IntermediateRepresentation::new(header, values).unwrap();
399
400 let output = JsonSerializer::serialize(&ir, false).unwrap();
401 let parsed: Value = serde_json::from_str(&output).unwrap();
402
403 assert!(parsed.is_array());
404 assert_eq!(parsed[0]["id"], json!(1));
405 assert_eq!(parsed[1]["id"], json!(2));
406 }
407
408 #[test]
409 fn test_nested_object() {
410 let fields = vec![FieldDef::new("user჻profile჻name", FieldType::String)];
411 let header = SchemaHeader::new(1, fields);
412 let values = vec![SchemaValue::String("alice".to_string())];
413 let ir = IntermediateRepresentation::new(header, values).unwrap();
414
415 let output = JsonSerializer::serialize(&ir, false).unwrap();
416 let parsed: Value = serde_json::from_str(&output).unwrap();
417
418 assert_eq!(parsed["user"]["profile"]["name"], json!("alice"));
419 }
420
421 #[test]
422 fn test_root_key() {
423 let mut header = SchemaHeader::new(1, vec![FieldDef::new("id", FieldType::U64)]);
424 header.root_key = Some("users".to_string());
425 header.set_flag(FLAG_HAS_ROOT_KEY);
426
427 let values = vec![SchemaValue::U64(1)];
428 let ir = IntermediateRepresentation::new(header, values).unwrap();
429
430 let output = JsonSerializer::serialize(&ir, false).unwrap();
431 let parsed: Value = serde_json::from_str(&output).unwrap();
432
433 assert!(parsed["users"].is_object());
434 assert_eq!(parsed["users"]["id"], json!(1));
435 }
436
437 #[test]
438 fn test_null_handling() {
439 let mut header = SchemaHeader::new(
440 1,
441 vec![
442 FieldDef::new("name", FieldType::String),
443 FieldDef::new("age", FieldType::U64),
444 ],
445 );
446
447 let mut null_bitmap = vec![0u8; 1];
449 null_bitmap[0] |= 1 << 1; header.null_bitmap = Some(null_bitmap);
451 header.set_flag(FLAG_HAS_NULLS);
452
453 let values = vec![SchemaValue::String("alice".to_string()), SchemaValue::Null];
454 let ir = IntermediateRepresentation::new(header, values).unwrap();
455
456 let output = JsonSerializer::serialize(&ir, false).unwrap();
457 let parsed: Value = serde_json::from_str(&output).unwrap();
458
459 assert_eq!(parsed["name"], json!("alice"));
460 assert_eq!(parsed["age"], Value::Null);
461 }
462
463 #[test]
464 fn test_homogeneous_array() {
465 let fields = vec![FieldDef::new(
466 "scores",
467 FieldType::Array(Box::new(FieldType::U64)),
468 )];
469 let header = SchemaHeader::new(1, fields);
470 let values = vec![SchemaValue::Array(vec![
471 SchemaValue::U64(1),
472 SchemaValue::U64(2),
473 SchemaValue::U64(3),
474 ])];
475 let ir = IntermediateRepresentation::new(header, values).unwrap();
476
477 let output = JsonSerializer::serialize(&ir, false).unwrap();
478 let parsed: Value = serde_json::from_str(&output).unwrap();
479
480 assert_eq!(parsed["scores"], json!([1, 2, 3]));
481 }
482
483 #[test]
484 fn test_empty_array() {
485 let fields = vec![FieldDef::new(
486 "items",
487 FieldType::Array(Box::new(FieldType::Null)),
488 )];
489 let header = SchemaHeader::new(1, fields);
490 let values = vec![SchemaValue::Array(vec![])];
491 let ir = IntermediateRepresentation::new(header, values).unwrap();
492
493 let output = JsonSerializer::serialize(&ir, false).unwrap();
494 let parsed: Value = serde_json::from_str(&output).unwrap();
495
496 assert_eq!(parsed["items"], json!([]));
497 }
498
499 #[test]
500 fn test_deep_nesting() {
501 let fields = vec![FieldDef::new("a჻b჻c჻d", FieldType::U64)];
502 let header = SchemaHeader::new(1, fields);
503 let values = vec![SchemaValue::U64(1)];
504 let ir = IntermediateRepresentation::new(header, values).unwrap();
505
506 let output = JsonSerializer::serialize(&ir, false).unwrap();
507 let parsed: Value = serde_json::from_str(&output).unwrap();
508
509 assert_eq!(parsed["a"]["b"]["c"]["d"], json!(1));
510 }
511
512 #[test]
513 fn test_unflatten_object() {
514 let mut flat = HashMap::new();
515 flat.insert("a჻b".to_string(), json!(1));
516
517 let unflattened = unflatten_object(flat);
518
519 assert_eq!(unflattened["a"]["b"], json!(1));
520 }
521
522 #[test]
523 fn test_pretty_output() {
524 let fields = vec![
525 FieldDef::new("id", FieldType::U64),
526 FieldDef::new("name", FieldType::String),
527 ];
528 let header = SchemaHeader::new(1, fields);
529 let values = vec![
530 SchemaValue::U64(1),
531 SchemaValue::String("alice".to_string()),
532 ];
533 let ir = IntermediateRepresentation::new(header, values).unwrap();
534
535 let compact = JsonSerializer::serialize(&ir, false).unwrap();
537 assert!(!compact.contains('\n'));
538 assert_eq!(compact, r#"{"id":1,"name":"alice"}"#);
539
540 let pretty = JsonSerializer::serialize(&ir, true).unwrap();
542 assert!(pretty.contains('\n'));
543 assert!(pretty.contains(" ")); let compact_value: Value = serde_json::from_str(&compact).unwrap();
547 let pretty_value: Value = serde_json::from_str(&pretty).unwrap();
548 assert_eq!(compact_value, pretty_value);
549 }
550
551 #[test]
552 fn test_metadata_with_null() {
553 use std::collections::HashMap;
554
555 let fields = vec![FieldDef::new("id", FieldType::U64)];
556 let mut header = SchemaHeader::new(2, fields);
557 header.root_key = Some("users".to_string());
558 header.set_flag(FLAG_HAS_ROOT_KEY);
559
560 let mut metadata = HashMap::new();
561 metadata.insert("note".to_string(), "∅".to_string());
562 metadata.insert("total".to_string(), "2".to_string());
563 header.metadata = Some(metadata);
564
565 let values = vec![SchemaValue::U64(1), SchemaValue::U64(2)];
566 let ir = IntermediateRepresentation::new(header, values).unwrap();
567
568 let output = JsonSerializer::serialize(&ir, false).unwrap();
569 let parsed: Value = serde_json::from_str(&output).unwrap();
570
571 assert_eq!(parsed["note"], Value::Null);
573 assert_eq!(parsed["total"], json!(2));
574
575 assert!(parsed["users"].is_array());
577 assert_eq!(parsed["users"][0]["id"], json!(1));
578 assert_eq!(parsed["users"][1]["id"], json!(2));
579 }
580}