1use std::collections::HashMap;
8
9use panproto_schema::{Edge, Schema};
10use serde_json::json;
11
12use crate::error::ParseError;
13use crate::metadata::Node;
14use crate::value::{FieldPresence, Value};
15use crate::wtype::WInstance;
16
17struct ParseState {
19 nodes: HashMap<u32, Node>,
20 arcs: Vec<(u32, u32, Edge)>,
21 next_id: u32,
22}
23
24impl ParseState {
25 fn new() -> Self {
26 Self {
27 nodes: HashMap::new(),
28 arcs: Vec::new(),
29 next_id: 0,
30 }
31 }
32
33 const fn alloc_id(&mut self) -> u32 {
34 let id = self.next_id;
35 self.next_id += 1;
36 id
37 }
38}
39
40pub fn parse_json(
52 schema: &Schema,
53 root_vertex: &str,
54 json_val: &serde_json::Value,
55) -> Result<WInstance, ParseError> {
56 if !schema.has_vertex(root_vertex) {
57 return Err(ParseError::RootVertexNotFound(root_vertex.to_string()));
58 }
59
60 let mut state = ParseState::new();
61 let root_id = state.alloc_id();
62
63 walk_json(schema, root_vertex, json_val, root_id, &mut state, "$")?;
64
65 Ok(WInstance::new(
66 state.nodes,
67 state.arcs,
68 Vec::new(),
69 root_id,
70 panproto_gat::Name::from(root_vertex),
71 ))
72}
73
74fn walk_json(
76 schema: &Schema,
77 vertex_id: &str,
78 json_val: &serde_json::Value,
79 node_id: u32,
80 state: &mut ParseState,
81 path: &str,
82) -> Result<(), ParseError> {
83 let _vertex = schema
84 .vertex(vertex_id)
85 .ok_or_else(|| ParseError::RootVertexNotFound(vertex_id.to_string()))?;
86
87 match json_val {
88 serde_json::Value::Object(map) => {
89 parse_object(schema, vertex_id, map, node_id, state, path)?;
90 }
91 serde_json::Value::Array(arr) => {
92 parse_array(schema, vertex_id, arr, node_id, state, path)?;
93 }
94 _ => {
95 let value = json_to_field_presence(json_val);
97 let node = Node::new(node_id, vertex_id).with_value(value);
98 state.nodes.insert(node_id, node);
99 }
100 }
101
102 Ok(())
103}
104
105fn parse_object(
107 schema: &Schema,
108 vertex_id: &str,
109 map: &serde_json::Map<String, serde_json::Value>,
110 node_id: u32,
111 state: &mut ParseState,
112 path: &str,
113) -> Result<(), ParseError> {
114 let mut node = Node::new(node_id, vertex_id);
115
116 if let Some(serde_json::Value::String(disc)) = map.get("$type") {
118 node.discriminator = Some(panproto_gat::Name::from(disc.as_str()));
119 }
120
121 let outgoing: Vec<Edge> = schema.outgoing_edges(vertex_id).to_vec();
123
124 let mut handled_fields = std::collections::HashSet::new();
126
127 for edge in &outgoing {
128 let field_name = edge.name.as_deref().unwrap_or(&*edge.tgt);
129 handled_fields.insert(field_name.to_string());
130
131 if let Some(field_val) = map.get(field_name) {
132 let child_id = state.alloc_id();
133 let child_path = format!("{path}.{field_name}");
134 walk_json(schema, &edge.tgt, field_val, child_id, state, &child_path)?;
135 state.arcs.push((node_id, child_id, edge.clone()));
136 }
137 }
138
139 for (key, val) in map {
141 if key == "$type" || handled_fields.contains(key.as_str()) {
142 continue;
143 }
144 node.extra_fields
145 .insert(key.clone(), json_value_to_value(val));
146 }
147
148 state.nodes.insert(node_id, node);
149 Ok(())
150}
151
152fn parse_array(
162 schema: &Schema,
163 vertex_id: &str,
164 arr: &[serde_json::Value],
165 node_id: u32,
166 state: &mut ParseState,
167 path: &str,
168) -> Result<(), ParseError> {
169 let node = Node::new(node_id, vertex_id);
170 state.nodes.insert(node_id, node);
171
172 let outgoing: Vec<Edge> = schema.outgoing_edges(vertex_id).to_vec();
173 let item_edge = outgoing.iter().find(|e| e.name.is_none());
178
179 if let Some(edge) = item_edge {
180 for (i, item) in arr.iter().enumerate() {
181 let child_id = state.alloc_id();
182 let child_path = format!("{path}[{i}]");
183 walk_json(schema, &edge.tgt, item, child_id, state, &child_path)?;
184 state.arcs.push((node_id, child_id, edge.clone()));
185 }
186 }
187 Ok(())
188}
189
190fn json_to_field_presence(val: &serde_json::Value) -> FieldPresence {
192 match val {
193 serde_json::Value::Null => FieldPresence::Null,
194 serde_json::Value::Bool(b) => FieldPresence::Present(Value::Bool(*b)),
195 serde_json::Value::Number(n) => n.as_i64().map_or_else(
196 || {
197 n.as_f64().map_or_else(
198 || FieldPresence::Present(Value::Str(n.to_string())),
199 |f| FieldPresence::Present(Value::Float(f)),
200 )
201 },
202 |i| FieldPresence::Present(Value::Int(i)),
203 ),
204 serde_json::Value::String(s) => FieldPresence::Present(Value::Str(s.clone())),
205 serde_json::Value::Array(_) | serde_json::Value::Object(_) => {
206 FieldPresence::Present(json_value_to_value(val))
207 }
208 }
209}
210
211fn json_value_to_value(val: &serde_json::Value) -> Value {
221 match val {
222 serde_json::Value::Null => Value::Null,
223 serde_json::Value::Bool(b) => Value::Bool(*b),
224 serde_json::Value::Number(n) => n.as_i64().map_or_else(
225 || {
226 n.as_f64()
227 .map_or_else(|| Value::Str(n.to_string()), Value::Float)
228 },
229 Value::Int,
230 ),
231 serde_json::Value::String(s) => Value::Str(s.clone()),
232 serde_json::Value::Array(arr) => Value::List(arr.iter().map(json_value_to_value).collect()),
233 serde_json::Value::Object(map) => {
234 let fields: HashMap<String, Value> = map
235 .iter()
236 .map(|(k, v)| (k.clone(), json_value_to_value(v)))
237 .collect();
238 Value::Unknown(fields)
239 }
240 }
241}
242
243#[must_use]
248pub fn to_json(schema: &Schema, instance: &WInstance) -> serde_json::Value {
249 node_to_json(schema, instance, instance.root)
250}
251
252fn node_to_json(schema: &Schema, instance: &WInstance, node_id: u32) -> serde_json::Value {
254 let Some(node) = instance.node(node_id) else {
255 return serde_json::Value::Null;
256 };
257
258 if let Some(ref presence) = node.value {
260 return match presence {
261 FieldPresence::Present(val) => value_to_json(val),
262 FieldPresence::Null | FieldPresence::Absent => serde_json::Value::Null,
263 };
264 }
265
266 let list_via_schema = is_list_vertex(schema, &node.anchor);
280 let list_via_annotation = node.is_list();
281 let list_via_instance_arcs = is_list_via_instance_arcs(instance, node_id);
282 let object_only_signals = !node.extra_fields.is_empty() || node.discriminator.is_some();
292 let is_list =
293 (list_via_schema && !object_only_signals) || list_via_annotation || list_via_instance_arcs;
294 if is_list {
295 let children = instance.children(node_id);
296 let items: Vec<serde_json::Value> = children
297 .iter()
298 .map(|&child_id| node_to_json(schema, instance, child_id))
299 .collect();
300 return serde_json::Value::Array(items);
301 }
302
303 let mut map = serde_json::Map::new();
305
306 if let Some(ref disc) = node.discriminator {
308 map.insert("$type".to_string(), json!(&**disc));
309 }
310
311 for &(parent, child, ref edge) in &instance.arcs {
313 if parent == node_id {
314 let field_name = edge.name.as_deref().unwrap_or(&*edge.tgt);
315 map.insert(
316 field_name.to_string(),
317 node_to_json(schema, instance, child),
318 );
319 }
320 }
321
322 for (key, val) in &node.extra_fields {
329 map.insert(key.clone(), value_to_json(val));
330 }
331
332 serde_json::Value::Object(map)
333}
334
335fn value_to_json(val: &Value) -> serde_json::Value {
343 match val {
344 Value::Bool(b) => json!(b),
345 Value::Int(i) => json!(i),
346 Value::Float(f) => json!(f),
347 Value::Str(s) => json!(s),
348 Value::Bytes(b) => serde_json::Value::String(base64_encode(b)),
349 Value::CidLink(s) => json!({"$link": s}),
350 Value::Blob { ref_, mime, size } => {
351 json!({"$type": "blob", "ref": ref_, "mimeType": mime, "size": size})
352 }
353 Value::Token(t) => json!(t),
354 Value::Null => serde_json::Value::Null,
355 Value::Opaque { type_, fields } => {
356 let mut map = serde_json::Map::new();
357 map.insert("$type".to_string(), json!(type_));
358 for (k, v) in fields {
359 map.insert(k.clone(), value_to_json(v));
360 }
361 serde_json::Value::Object(map)
362 }
363 Value::Unknown(fields) => {
364 let map: serde_json::Map<String, serde_json::Value> = fields
365 .iter()
366 .map(|(k, v)| (k.clone(), value_to_json(v)))
367 .collect();
368 serde_json::Value::Object(map)
369 }
370 Value::List(items) => serde_json::Value::Array(items.iter().map(value_to_json).collect()),
371 }
372}
373
374fn is_list_vertex(schema: &Schema, vertex_id: &str) -> bool {
390 let outgoing = schema.outgoing_edges(vertex_id);
391 !outgoing.is_empty() && outgoing.iter().all(|e| e.name.is_none())
392}
393
394fn is_list_via_instance_arcs(instance: &WInstance, node_id: u32) -> bool {
403 let mut signature: Option<(panproto_gat::Name, Option<panproto_gat::Name>)> = None;
404 let mut count = 0_usize;
405 for &(parent, _, ref edge) in &instance.arcs {
406 if parent != node_id {
407 continue;
408 }
409 let key = (edge.kind.clone(), edge.name.clone());
410 match &signature {
411 Some(existing) if existing != &key => return false,
412 Some(_) => {}
413 None => signature = Some(key),
414 }
415 count += 1;
416 }
417 count >= 2
418}
419
420fn base64_encode(bytes: &[u8]) -> String {
422 const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
423 let mut result = String::new();
424 for chunk in bytes.chunks(3) {
425 let b0 = u32::from(chunk[0]);
426 let b1 = u32::from(chunk.get(1).copied().unwrap_or_default());
427 let b2 = u32::from(chunk.get(2).copied().unwrap_or_default());
428 let triple = (b0 << 16) | (b1 << 8) | b2;
429
430 result.push(CHARS[((triple >> 18) & 0x3F) as usize] as char);
431 result.push(CHARS[((triple >> 12) & 0x3F) as usize] as char);
432 if chunk.len() > 1 {
433 result.push(CHARS[((triple >> 6) & 0x3F) as usize] as char);
434 }
435 if chunk.len() > 2 {
436 result.push(CHARS[(triple & 0x3F) as usize] as char);
437 }
438 }
439 result
440}
441
442#[cfg(test)]
443#[allow(clippy::unwrap_used)]
444mod tests {
445 use super::*;
446 use panproto_schema::{Protocol, SchemaBuilder};
447 use smallvec::smallvec;
448
449 fn test_schema() -> Schema {
451 let mut vertices = HashMap::new();
452 vertices.insert(
453 "post:body".into(),
454 panproto_schema::Vertex {
455 id: "post:body".into(),
456 kind: "object".into(),
457 nsid: None,
458 },
459 );
460 vertices.insert(
461 "post:body.text".into(),
462 panproto_schema::Vertex {
463 id: "post:body.text".into(),
464 kind: "string".into(),
465 nsid: None,
466 },
467 );
468 vertices.insert(
469 "post:body.createdAt".into(),
470 panproto_schema::Vertex {
471 id: "post:body.createdAt".into(),
472 kind: "string".into(),
473 nsid: None,
474 },
475 );
476
477 let text_edge = Edge {
478 src: "post:body".into(),
479 tgt: "post:body.text".into(),
480 kind: "prop".into(),
481 name: Some("text".into()),
482 };
483 let date_edge = Edge {
484 src: "post:body".into(),
485 tgt: "post:body.createdAt".into(),
486 kind: "prop".into(),
487 name: Some("createdAt".into()),
488 };
489
490 let mut edges = HashMap::new();
491 edges.insert(text_edge.clone(), "prop".into());
492 edges.insert(date_edge.clone(), "prop".into());
493
494 let mut outgoing = HashMap::new();
495 outgoing.insert(
496 "post:body".into(),
497 smallvec![text_edge.clone(), date_edge.clone()],
498 );
499
500 let mut incoming = HashMap::new();
501 incoming.insert("post:body.text".into(), smallvec![text_edge.clone()]);
502 incoming.insert("post:body.createdAt".into(), smallvec![date_edge.clone()]);
503
504 let mut between = HashMap::new();
505 between.insert(
506 ("post:body".into(), "post:body.text".into()),
507 smallvec![text_edge],
508 );
509 between.insert(
510 ("post:body".into(), "post:body.createdAt".into()),
511 smallvec![date_edge],
512 );
513
514 Schema {
515 protocol: "test".into(),
516 vertices,
517 edges,
518 hyper_edges: HashMap::new(),
519 constraints: HashMap::new(),
520 required: HashMap::new(),
521 nsids: HashMap::new(),
522 entries: Vec::new(),
523 variants: HashMap::new(),
524 orderings: HashMap::new(),
525 recursion_points: HashMap::new(),
526 spans: HashMap::new(),
527 usage_modes: HashMap::new(),
528 nominal: HashMap::new(),
529 coercions: HashMap::new(),
530 mergers: HashMap::new(),
531 defaults: HashMap::new(),
532 policies: HashMap::new(),
533 outgoing,
534 incoming,
535 between,
536 }
537 }
538
539 #[test]
540 fn parse_json_simple_object() {
541 let schema = test_schema();
542 let json_val = json!({
543 "text": "hello world",
544 "createdAt": "2024-01-01T00:00:00Z"
545 });
546
547 let result = parse_json(&schema, "post:body", &json_val);
548 assert!(result.is_ok(), "parse failed: {result:?}");
549
550 let inst = result.unwrap_or_else(|_| {
551 WInstance::new(
552 HashMap::new(),
553 vec![],
554 vec![],
555 0,
556 panproto_gat::Name::default(),
557 )
558 });
559 assert_eq!(inst.node_count(), 3);
560 assert_eq!(inst.arc_count(), 2);
561 }
562
563 #[test]
564 fn json_round_trip() {
565 let schema = test_schema();
566 let json_val = json!({
567 "text": "hello world",
568 "createdAt": "2024-01-01T00:00:00Z"
569 });
570
571 let inst = parse_json(&schema, "post:body", &json_val);
572 assert!(inst.is_ok());
573 let inst = inst.unwrap_or_else(|_| {
574 WInstance::new(
575 HashMap::new(),
576 vec![],
577 vec![],
578 0,
579 panproto_gat::Name::default(),
580 )
581 });
582
583 let output = to_json(&schema, &inst);
584 assert!(output.is_object());
585 assert_eq!(output["text"], "hello world");
586 assert_eq!(output["createdAt"], "2024-01-01T00:00:00Z");
587 }
588
589 #[test]
590 fn parse_json_missing_root_vertex() {
591 let schema = test_schema();
592 let json_val = json!({"text": "hello"});
593 let result = parse_json(&schema, "nonexistent", &json_val);
594 assert!(result.is_err());
595 }
596
597 #[test]
598 fn parse_array_with_items_edge_kind() {
599 let proto = Protocol {
603 name: "test".into(),
604 schema_theory: "ThTest".into(),
605 instance_theory: "ThWType".into(),
606 edge_rules: vec![],
607 obj_kinds: vec!["object".into(), "string".into(), "array".into()],
608 constraint_sorts: vec![],
609 ..Protocol::default()
610 };
611 let schema = SchemaBuilder::new(&proto)
612 .vertex("root", "object", None::<&str>)
613 .unwrap()
614 .vertex("root.tags", "array", None::<&str>)
615 .unwrap()
616 .vertex("tag", "string", None::<&str>)
617 .unwrap()
618 .edge("root", "root.tags", "prop", Some("tags"))
619 .unwrap()
620 .edge("root.tags", "tag", "items", None::<&str>)
621 .unwrap()
622 .build()
623 .unwrap();
624
625 let json_val = json!({"tags": ["alpha", "beta", "gamma"]});
626 let inst = parse_json(&schema, "root", &json_val).unwrap();
627
628 let output = to_json(&schema, &inst);
629 assert!(output["tags"].is_array());
630 let tags = output["tags"].as_array().unwrap();
631 assert_eq!(tags.len(), 3, "array elements should not be dropped");
632 assert_eq!(tags[0], "alpha");
633 assert_eq!(tags[1], "beta");
634 assert_eq!(tags[2], "gamma");
635 }
636
637 fn list_schema_with_kind(list_vertex_kind: &str) -> Schema {
645 let proto = Protocol {
646 name: "generic".into(),
647 schema_theory: "ThTest".into(),
648 instance_theory: "ThWType".into(),
649 edge_rules: vec![],
650 obj_kinds: vec!["object".into(), "string".into(), list_vertex_kind.into()],
651 constraint_sorts: vec![],
652 ..Protocol::default()
653 };
654 SchemaBuilder::new(&proto)
655 .vertex("root", "object", None::<&str>)
656 .unwrap()
657 .vertex("root.items", list_vertex_kind, None::<&str>)
658 .unwrap()
659 .vertex("item", "string", None::<&str>)
660 .unwrap()
661 .edge("root", "root.items", "prop", Some("items"))
662 .unwrap()
663 .edge("root.items", "item", "anonymous-edge-kind", None::<&str>)
666 .unwrap()
667 .build()
668 .unwrap()
669 }
670
671 #[test]
672 fn to_json_emits_list_regardless_of_kind_string() {
673 for kind in ["sequence", "list", "bag", "ordered-multi"] {
677 let schema = list_schema_with_kind(kind);
678 let input = json!({"items": ["alpha", "beta"]});
679 let inst = parse_json(&schema, "root", &input).unwrap();
680 let output = to_json(&schema, &inst);
681 assert!(
682 output["items"].is_array(),
683 "kind={kind}: expected JSON array, got {}",
684 output["items"]
685 );
686 assert_eq!(output["items"][0], "alpha", "kind={kind}");
687 assert_eq!(output["items"][1], "beta", "kind={kind}");
688 }
689 }
690
691 #[test]
692 fn is_list_vertex_detects_by_anonymous_edges() {
693 let list_schema = list_schema_with_kind("whatever");
695 assert!(
696 is_list_vertex(&list_schema, "root.items"),
697 "a vertex with only anonymous outgoing edges is a list vertex"
698 );
699
700 assert!(
702 !is_list_vertex(&list_schema, "root"),
703 "a vertex with named outgoing edges is a record vertex, not a list"
704 );
705
706 assert!(
708 !is_list_vertex(&list_schema, "item"),
709 "a leaf vertex with no outgoing edges is not a list vertex"
710 );
711 }
712
713 #[test]
714 fn to_json_empty_list_vertex_renders_as_empty_json_array() {
715 let schema = list_schema_with_kind("collection");
720 let input = json!({"items": []});
721 let inst = parse_json(&schema, "root", &input).unwrap();
722 let output = to_json(&schema, &inst);
723 assert_eq!(output["items"], json!([]));
724 }
725
726 #[test]
727 fn json_value_to_value_preserves_array_as_list() {
728 let input = json!([1, "two", true, null]);
732 let v = json_value_to_value(&input);
733 match v {
734 Value::List(items) => {
735 assert_eq!(items.len(), 4);
736 assert_eq!(items[0], Value::Int(1));
737 assert_eq!(items[1], Value::Str("two".into()));
738 assert_eq!(items[2], Value::Bool(true));
739 assert_eq!(items[3], Value::Null);
740 }
741 other => panic!("expected Value::List, got {other:?}"),
742 }
743 }
744
745 #[test]
746 fn value_to_json_renders_list_as_json_array() {
747 let v = Value::List(vec![
748 Value::Int(1),
749 Value::Str("two".into()),
750 Value::Bool(true),
751 Value::Null,
752 ]);
753 let j = value_to_json(&v);
754 assert_eq!(j, json!([1, "two", true, null]));
755 }
756
757 #[test]
758 fn value_json_round_trip_is_faithful_for_arrays() {
759 let cases = vec![
764 json!([]),
765 json!(["en"]),
766 json!(["panproto", "atproto", "schemas"]),
767 json!([[1, 2], [3, 4]]),
768 json!([{"a": 1}, {"b": 2}]),
769 json!({"tags": ["x", "y"]}),
770 json!({"nested": {"tags": ["x", "y"]}}),
771 ];
772 for original in cases {
773 let roundtrip = value_to_json(&json_value_to_value(&original));
774 assert_eq!(
775 roundtrip, original,
776 "round trip should be faithful for {original}"
777 );
778 }
779 }
780
781 #[test]
782 fn to_json_extra_field_array_round_trips_via_value_list() {
783 let schema = test_schema();
788 let input = json!({
789 "text": "Hello",
790 "createdAt": "2024-01-15T12:00:00.000Z",
791 "langs": ["en"],
792 "tags": ["panproto", "atproto", "schemas"]
793 });
794
795 let inst = parse_json(&schema, "post:body", &input).unwrap();
796 let output = to_json(&schema, &inst);
797
798 assert_eq!(output["text"], "Hello");
800 assert_eq!(output["createdAt"], "2024-01-15T12:00:00.000Z");
801
802 assert!(
805 output["langs"].is_array(),
806 "langs should be a JSON array, got {}",
807 output["langs"]
808 );
809 assert_eq!(output["langs"], json!(["en"]));
810
811 assert!(
812 output["tags"].is_array(),
813 "tags should be a JSON array, got {}",
814 output["tags"]
815 );
816 assert_eq!(output["tags"], json!(["panproto", "atproto", "schemas"]));
817 }
818
819 #[test]
820 fn to_json_record_with_anonymous_edges_emits_extra_fields_not_empty_array() {
821 let proto = Protocol {
830 name: "test".into(),
831 schema_theory: "ThTestSchema".into(),
832 instance_theory: "ThTestInstance".into(),
833 edge_rules: vec![],
834 obj_kinds: vec!["record".into(), "field".into(), "long".into()],
835 constraint_sorts: vec![],
836 ..Protocol::default()
837 };
838 let schema = SchemaBuilder::new(&proto)
839 .vertex("event", "record", Some("Event"))
840 .unwrap()
841 .vertex("event.tick", "field", Some("tick"))
842 .unwrap()
843 .vertex("event.tick:t", "long", None::<&str>)
844 .unwrap()
845 .edge("event", "event.tick", "field-of", None::<&str>)
848 .unwrap()
849 .edge("event.tick", "event.tick:t", "type-of", None::<&str>)
850 .unwrap()
851 .build()
852 .unwrap();
853
854 let input = json!({"tick": 480});
855 let inst = parse_json(&schema, "event", &input).unwrap();
856
857 let output = to_json(&schema, &inst);
861 assert!(
862 output.is_object(),
863 "node with extra_fields must emit as an object, not an array; got {output}"
864 );
865 assert_eq!(
866 output["tick"], 480,
867 "extra_fields content must round-trip through to_json"
868 );
869 }
870}