1use crate::encoders::algorithms::schema::serializers::OutputSerializer;
2use crate::encoders::algorithms::schema::stele::NEST_SEP;
3use crate::encoders::algorithms::schema::types::*;
4use serde_json::{Map, Value, json};
5use std::collections::HashMap;
6
7pub struct JsonSerializer;
8
9impl OutputSerializer for JsonSerializer {
10 type Error = SchemaError;
11
12 fn serialize(ir: &IntermediateRepresentation, pretty: bool) -> Result<String, Self::Error> {
13 if ir.header.row_count == 0 {
14 return Err(SchemaError::InvalidInput(
15 "No rows to serialize".to_string(),
16 ));
17 }
18
19 let mut rows = Vec::new();
21 for row_idx in 0..ir.header.row_count {
22 let mut row_map = HashMap::new();
23
24 for (field_idx, field) in ir.header.fields.iter().enumerate() {
25 let value = ir
26 .get_value(row_idx, field_idx)
27 .ok_or_else(|| SchemaError::InvalidInput("Missing value".to_string()))?;
28
29 let json_value = if ir.is_null(row_idx, field_idx) {
30 if matches!(field.field_type, FieldType::Array(_)) {
32 Value::Array(vec![])
33 } else {
34 Value::Null
35 }
36 } else {
37 schema_value_to_json(value)?
38 };
39
40 row_map.insert(field.name.clone(), json_value);
41 }
42
43 rows.push(row_map);
44 }
45
46 let mut unflattened_rows = Vec::new();
48 for row_map in rows {
49 let unflattened = unflatten_object(row_map);
50 unflattened_rows.push(unflattened);
51 }
52
53 let result = if ir.header.row_count == 1
55 && ir.header.metadata.is_none()
56 && ir.header.root_key.is_none()
57 {
58 unflattened_rows.into_iter().next().unwrap()
60 } else {
61 Value::Array(unflattened_rows)
63 };
64
65 let final_result = if let Some(root_key) = &ir.header.root_key {
67 let mut obj = Map::new();
68
69 if let Some(ref metadata) = ir.header.metadata {
71 for (key, value) in metadata {
72 let json_value = if value == "∅" {
74 Value::Null
75 } else if value.starts_with('[') && value.ends_with(']') {
76 serde_json::from_str(value).unwrap_or_else(|_| json!(value))
78 } else {
79 if let Ok(num) = value.parse::<i64>() {
81 json!(num)
82 } else if let Ok(num) = value.parse::<f64>() {
83 json!(num)
84 } else if value == "true" {
85 json!(true)
86 } else if value == "false" {
87 json!(false)
88 } else {
89 json!(value)
90 }
91 };
92 obj.insert(key.clone(), json_value);
93 }
94 }
95
96 obj.insert(root_key.clone(), result);
98 Value::Object(obj)
99 } else {
100 result
101 };
102
103 if pretty {
105 serde_json::to_string_pretty(&final_result)
106 .map_err(|e| SchemaError::InvalidInput(format!("JSON serialization failed: {}", e)))
107 } else {
108 serde_json::to_string(&final_result)
109 .map_err(|e| SchemaError::InvalidInput(format!("JSON serialization failed: {}", e)))
110 }
111 }
112}
113
114fn schema_value_to_json(value: &SchemaValue) -> Result<Value, SchemaError> {
116 match value {
117 SchemaValue::U64(n) => Ok(json!(*n)),
118 SchemaValue::I64(n) => Ok(json!(*n)),
119 SchemaValue::F64(n) => Ok(json!(*n)),
120 SchemaValue::String(s) => Ok(json!(s)),
121 SchemaValue::Bool(b) => Ok(json!(*b)),
122 SchemaValue::Null => Ok(Value::Null),
123 SchemaValue::Array(arr) => {
124 let mut json_arr = Vec::new();
125 for item in arr {
126 json_arr.push(schema_value_to_json(item)?);
127 }
128 Ok(Value::Array(json_arr))
129 }
130 }
131}
132
133fn unflatten_object(flat: HashMap<String, Value>) -> Value {
135 let mut array_paths = std::collections::HashSet::new();
137 let mut array_markers = Vec::new();
138 for key in flat.keys() {
139 if key.ends_with("⟦⟧") {
140 let array_path = key.trim_end_matches("⟦⟧");
142 array_paths.insert(array_path.to_string());
143 array_markers.push(key.clone());
144 }
145 }
146
147 let mut sorted_array_paths: Vec<String> = array_paths.into_iter().collect();
150 sorted_array_paths.sort_by_key(|a| a.len());
151
152 let mut array_elements: HashMap<String, Vec<(usize, String, Value)>> = HashMap::new();
153 let mut non_array_fields = HashMap::new();
154
155 for (key, value) in flat {
156 if key.ends_with("⟦⟧") {
158 continue;
159 }
160
161 let mut belongs_to_array = false;
163 for array_path in &sorted_array_paths {
164 if array_path.is_empty() {
166 let parts: Vec<&str> = key.split(NEST_SEP).collect();
168 if let Ok(idx) = parts[0].parse::<usize>() {
169 let remaining = if parts.len() > 1 {
170 parts[1..].join(&NEST_SEP.to_string())
171 } else {
172 String::new()
173 };
174 array_elements.entry(array_path.clone()).or_default().push((
175 idx,
176 remaining,
177 value.clone(),
178 ));
179 belongs_to_array = true;
180 break;
181 }
182 } else {
183 let separator = NEST_SEP.to_string();
185 let expected_prefix = format!("{}{}", array_path, separator);
186 if key.starts_with(&expected_prefix) {
187 let after_array = &key[expected_prefix.len()..];
189 let parts: Vec<&str> = after_array.split(NEST_SEP).collect();
190 if let Ok(idx) = parts[0].parse::<usize>() {
191 let remaining = if parts.len() > 1 {
193 parts[1..].join(&NEST_SEP.to_string())
194 } else {
195 String::new()
196 };
197 array_elements.entry(array_path.clone()).or_default().push((
198 idx,
199 remaining,
200 value.clone(),
201 ));
202 belongs_to_array = true;
203 break;
204 }
205 }
206 }
207 }
208
209 if !belongs_to_array {
210 non_array_fields.insert(key, value);
211 }
212 }
213
214 #[allow(clippy::type_complexity)]
216 let mut array_entries: Vec<(String, Vec<(usize, String, Value)>)> =
217 array_elements.into_iter().collect();
218 array_entries.sort_by(|(a, _), (b, _)| b.len().cmp(&a.len()));
219
220 for (array_path, mut elements) in array_entries {
221 elements.sort_by_key(|(idx, _, _)| *idx);
223
224 let max_idx = elements.iter().map(|(idx, _, _)| *idx).max().unwrap_or(0);
226 let mut arr = vec![Value::Null; max_idx + 1];
227
228 let mut by_index: HashMap<usize, Vec<(String, Value)>> = HashMap::new();
230 for (idx, remaining, value) in elements {
231 by_index.entry(idx).or_default().push((remaining, value));
232 }
233
234 for (idx, fields) in by_index {
236 if fields.len() == 1 && fields[0].0.is_empty() {
237 arr[idx] = fields[0].1.clone();
239 } else {
240 let mut obj_map = HashMap::new();
242 for (remaining, value) in fields {
243 if !value.is_null() {
245 obj_map.insert(remaining, value);
246 }
247 }
248
249 let nested_elem_path = if array_path.is_empty() {
251 idx.to_string()
252 } else {
253 format!("{}{}{}", array_path, NEST_SEP, idx)
254 };
255 let nested_prefix_with_sep = format!("{}{}", nested_elem_path, NEST_SEP);
256
257 for marker in &array_markers {
258 if !marker.ends_with("⟦⟧") {
259 continue;
260 }
261
262 let marker_path = marker.trim_end_matches("⟦⟧");
264
265 if marker_path.starts_with(&nested_prefix_with_sep) {
267 let relative_path = &marker_path[nested_prefix_with_sep.len()..];
269 obj_map.insert(format!("{}⟦⟧", relative_path), Value::Null);
270 } else if marker_path == nested_elem_path {
271 obj_map.insert("⟦⟧".to_string(), Value::Null);
275 }
276 }
277
278 arr[idx] = unflatten_object(obj_map);
279 }
280 }
281
282 while !arr.is_empty() {
284 let last = &arr[arr.len() - 1];
285 let should_remove = last.is_null()
286 || (last.is_object() && last.as_object().is_some_and(|o| o.is_empty()));
287 if should_remove {
288 arr.pop();
289 } else {
290 break;
291 }
292 }
293
294 non_array_fields.insert(array_path, Value::Array(arr));
295 }
296
297 let reconstructed_arrays: std::collections::HashSet<String> = non_array_fields
300 .keys()
301 .filter(|k| non_array_fields.get(*k).is_some_and(|v| v.is_array()))
302 .cloned()
303 .collect();
304
305 for array_path in &sorted_array_paths {
307 if !reconstructed_arrays.contains(array_path) && !non_array_fields.contains_key(array_path)
308 {
309 let is_nested_in_array = sorted_array_paths.iter().any(|parent| {
312 if parent.len() >= array_path.len() {
313 return false;
314 }
315 let prefix = if parent.is_empty() {
316 String::new()
317 } else {
318 format!("{}{}", parent, NEST_SEP)
319 };
320 if !array_path.starts_with(&prefix) {
321 return false;
322 }
323 let after = if prefix.is_empty() {
324 array_path.as_str()
325 } else {
326 &array_path[prefix.len()..]
327 };
328 after
329 .split(NEST_SEP)
330 .next()
331 .unwrap_or("")
332 .parse::<usize>()
333 .is_ok()
334 });
335
336 if !is_nested_in_array {
337 non_array_fields.insert(array_path.clone(), Value::Array(vec![]));
338 }
339 }
340 }
341
342 if non_array_fields.len() == 1 && non_array_fields.contains_key("") {
345 return non_array_fields.into_iter().next().unwrap().1;
346 }
347
348 let mut result = Map::new();
349 for (key, value) in non_array_fields {
350 let parts: Vec<&str> = key.split(NEST_SEP).collect();
351 insert_nested_simple(&mut result, &parts, value);
352 }
353
354 Value::Object(result)
355}
356
357fn insert_nested_simple(obj: &mut Map<String, Value>, parts: &[&str], value: Value) {
359 if parts.is_empty() {
360 return;
361 }
362
363 if parts.len() == 1 {
364 obj.insert(parts[0].to_string(), value);
365 return;
366 }
367
368 let key = parts[0];
369 let remaining = &parts[1..];
370
371 let nested = obj
372 .entry(key.to_string())
373 .or_insert_with(|| Value::Object(Map::new()));
374
375 if let Value::Object(nested_obj) = nested {
376 insert_nested_simple(nested_obj, remaining, value);
377 }
378}
379
380#[cfg(test)]
381mod tests {
382 use super::*;
383
384 #[test]
385 fn test_simple_object() {
386 let fields = vec![
387 FieldDef::new("id", FieldType::U64),
388 FieldDef::new("name", FieldType::String),
389 ];
390 let header = SchemaHeader::new(1, fields);
391 let values = vec![
392 SchemaValue::U64(1),
393 SchemaValue::String("alice".to_string()),
394 ];
395 let ir = IntermediateRepresentation::new(header, values).unwrap();
396
397 let output = JsonSerializer::serialize(&ir, false).unwrap();
398 let parsed: Value = serde_json::from_str(&output).unwrap();
399
400 assert_eq!(parsed["id"], json!(1));
401 assert_eq!(parsed["name"], json!("alice"));
402 }
403
404 #[test]
405 fn test_array_of_objects() {
406 let fields = vec![FieldDef::new("id", FieldType::U64)];
407 let header = SchemaHeader::new(2, fields);
408 let values = vec![SchemaValue::U64(1), SchemaValue::U64(2)];
409 let ir = IntermediateRepresentation::new(header, values).unwrap();
410
411 let output = JsonSerializer::serialize(&ir, false).unwrap();
412 let parsed: Value = serde_json::from_str(&output).unwrap();
413
414 assert!(parsed.is_array());
415 assert_eq!(parsed[0]["id"], json!(1));
416 assert_eq!(parsed[1]["id"], json!(2));
417 }
418
419 #[test]
420 fn test_nested_object() {
421 let fields = vec![FieldDef::new("user჻profile჻name", FieldType::String)];
422 let header = SchemaHeader::new(1, fields);
423 let values = vec![SchemaValue::String("alice".to_string())];
424 let ir = IntermediateRepresentation::new(header, values).unwrap();
425
426 let output = JsonSerializer::serialize(&ir, false).unwrap();
427 let parsed: Value = serde_json::from_str(&output).unwrap();
428
429 assert_eq!(parsed["user"]["profile"]["name"], json!("alice"));
430 }
431
432 #[test]
433 fn test_root_key() {
434 let mut header = SchemaHeader::new(1, vec![FieldDef::new("id", FieldType::U64)]);
435 header.root_key = Some("users".to_string());
436 header.set_flag(FLAG_HAS_ROOT_KEY);
437
438 let values = vec![SchemaValue::U64(1)];
439 let ir = IntermediateRepresentation::new(header, values).unwrap();
440
441 let output = JsonSerializer::serialize(&ir, false).unwrap();
442 let parsed: Value = serde_json::from_str(&output).unwrap();
443
444 assert!(parsed["users"].is_array());
446 assert_eq!(parsed["users"][0]["id"], json!(1));
447 }
448
449 #[test]
450 fn test_null_handling() {
451 let mut header = SchemaHeader::new(
452 1,
453 vec![
454 FieldDef::new("name", FieldType::String),
455 FieldDef::new("age", FieldType::U64),
456 ],
457 );
458
459 let mut null_bitmap = vec![0u8; 1];
461 null_bitmap[0] |= 1 << 1; header.null_bitmap = Some(null_bitmap);
463 header.set_flag(FLAG_HAS_NULLS);
464
465 let values = vec![SchemaValue::String("alice".to_string()), SchemaValue::Null];
466 let ir = IntermediateRepresentation::new(header, values).unwrap();
467
468 let output = JsonSerializer::serialize(&ir, false).unwrap();
469 let parsed: Value = serde_json::from_str(&output).unwrap();
470
471 assert_eq!(parsed["name"], json!("alice"));
472 assert_eq!(parsed["age"], Value::Null);
473 }
474
475 #[test]
476 fn test_homogeneous_array() {
477 let fields = vec![FieldDef::new(
478 "scores",
479 FieldType::Array(Box::new(FieldType::U64)),
480 )];
481 let header = SchemaHeader::new(1, fields);
482 let values = vec![SchemaValue::Array(vec![
483 SchemaValue::U64(1),
484 SchemaValue::U64(2),
485 SchemaValue::U64(3),
486 ])];
487 let ir = IntermediateRepresentation::new(header, values).unwrap();
488
489 let output = JsonSerializer::serialize(&ir, false).unwrap();
490 let parsed: Value = serde_json::from_str(&output).unwrap();
491
492 assert_eq!(parsed["scores"], json!([1, 2, 3]));
493 }
494
495 #[test]
496 fn test_empty_array() {
497 let fields = vec![FieldDef::new(
498 "items",
499 FieldType::Array(Box::new(FieldType::Null)),
500 )];
501 let header = SchemaHeader::new(1, fields);
502 let values = vec![SchemaValue::Array(vec![])];
503 let ir = IntermediateRepresentation::new(header, values).unwrap();
504
505 let output = JsonSerializer::serialize(&ir, false).unwrap();
506 let parsed: Value = serde_json::from_str(&output).unwrap();
507
508 assert_eq!(parsed["items"], json!([]));
509 }
510
511 #[test]
512 fn test_deep_nesting() {
513 let fields = vec![FieldDef::new("a჻b჻c჻d", FieldType::U64)];
514 let header = SchemaHeader::new(1, fields);
515 let values = vec![SchemaValue::U64(1)];
516 let ir = IntermediateRepresentation::new(header, values).unwrap();
517
518 let output = JsonSerializer::serialize(&ir, false).unwrap();
519 let parsed: Value = serde_json::from_str(&output).unwrap();
520
521 assert_eq!(parsed["a"]["b"]["c"]["d"], json!(1));
522 }
523
524 #[test]
525 fn test_unflatten_object() {
526 let mut flat = HashMap::new();
527 flat.insert("a჻b".to_string(), json!(1));
528
529 let unflattened = unflatten_object(flat);
530
531 assert_eq!(unflattened["a"]["b"], json!(1));
532 }
533
534 #[test]
535 fn test_unflatten_nested_array() {
536 let mut flat = HashMap::new();
538 flat.insert("deep⟦⟧".to_string(), Value::Null); flat.insert("deep჻0⟦⟧".to_string(), Value::Null); flat.insert("deep჻1⟦⟧".to_string(), Value::Null); flat.insert("deep჻0჻0".to_string(), json!(3));
542 flat.insert("deep჻0჻1".to_string(), json!(4));
543 flat.insert("deep჻1჻0".to_string(), json!(5));
544 flat.insert("deep჻1჻1".to_string(), json!(6));
545
546 let unflattened = unflatten_object(flat);
547
548 assert_eq!(unflattened["deep"][0][0], json!(3));
549 assert_eq!(unflattened["deep"][0][1], json!(4));
550 assert_eq!(unflattened["deep"][1][0], json!(5));
551 assert_eq!(unflattened["deep"][1][1], json!(6));
552 }
553
554 #[test]
555 fn test_pretty_output() {
556 let fields = vec![
557 FieldDef::new("id", FieldType::U64),
558 FieldDef::new("name", FieldType::String),
559 ];
560 let header = SchemaHeader::new(1, fields);
561 let values = vec![
562 SchemaValue::U64(1),
563 SchemaValue::String("alice".to_string()),
564 ];
565 let ir = IntermediateRepresentation::new(header, values).unwrap();
566
567 let compact = JsonSerializer::serialize(&ir, false).unwrap();
569 assert!(!compact.contains('\n'));
570 assert_eq!(compact, r#"{"id":1,"name":"alice"}"#);
571
572 let pretty = JsonSerializer::serialize(&ir, true).unwrap();
574 assert!(pretty.contains('\n'));
575 assert!(pretty.contains(" ")); let compact_value: Value = serde_json::from_str(&compact).unwrap();
579 let pretty_value: Value = serde_json::from_str(&pretty).unwrap();
580 assert_eq!(compact_value, pretty_value);
581 }
582
583 #[test]
584 fn test_metadata_with_null() {
585 use std::collections::HashMap;
586
587 let fields = vec![FieldDef::new("id", FieldType::U64)];
588 let mut header = SchemaHeader::new(2, fields);
589 header.root_key = Some("users".to_string());
590 header.set_flag(FLAG_HAS_ROOT_KEY);
591
592 let mut metadata = HashMap::new();
593 metadata.insert("note".to_string(), "∅".to_string());
594 metadata.insert("total".to_string(), "2".to_string());
595 header.metadata = Some(metadata);
596
597 let values = vec![SchemaValue::U64(1), SchemaValue::U64(2)];
598 let ir = IntermediateRepresentation::new(header, values).unwrap();
599
600 let output = JsonSerializer::serialize(&ir, false).unwrap();
601 let parsed: Value = serde_json::from_str(&output).unwrap();
602
603 assert_eq!(parsed["note"], Value::Null);
605 assert_eq!(parsed["total"], json!(2));
606
607 assert!(parsed["users"].is_array());
609 assert_eq!(parsed["users"][0]["id"], json!(1));
610 assert_eq!(parsed["users"][1]["id"], json!(2));
611 }
612}