Skip to main content

panproto_inst/
parse.rs

1//! JSON parsing for W-type instances.
2//!
3//! Converts JSON data into a [`WInstance`] guided by a schema, and
4//! serializes instances back to JSON. The parser recursively walks
5//! the JSON structure, matching properties to schema edges.
6
7use std::collections::HashMap;
8
9use panproto_schema::{Edge, Schema};
10use serde_json::json;
11
12use crate::error::ParseError;
13use crate::metadata::Node;
14use crate::value::{FieldPresence, Value};
15use crate::wtype::WInstance;
16
17/// Accumulated state during JSON parsing.
18struct ParseState {
19    nodes: HashMap<u32, Node>,
20    arcs: Vec<(u32, u32, Edge)>,
21    next_id: u32,
22}
23
24impl ParseState {
25    fn new() -> Self {
26        Self {
27            nodes: HashMap::new(),
28            arcs: Vec::new(),
29            next_id: 0,
30        }
31    }
32
33    const fn alloc_id(&mut self) -> u32 {
34        let id = self.next_id;
35        self.next_id += 1;
36        id
37    }
38}
39
40/// Parse JSON into a W-type instance, guided by a schema.
41///
42/// The parser starts at `root_vertex` in the schema and recursively
43/// walks the JSON structure, creating nodes for each schema vertex
44/// encountered. Property edges guide which JSON fields become child
45/// nodes.
46///
47/// # Errors
48///
49/// Returns `ParseError` if the JSON structure doesn't match the
50/// schema or contains invalid values.
51pub fn parse_json(
52    schema: &Schema,
53    root_vertex: &str,
54    json_val: &serde_json::Value,
55) -> Result<WInstance, ParseError> {
56    if !schema.has_vertex(root_vertex) {
57        return Err(ParseError::RootVertexNotFound(root_vertex.to_string()));
58    }
59
60    let mut state = ParseState::new();
61    let root_id = state.alloc_id();
62
63    walk_json(schema, root_vertex, json_val, root_id, &mut state, "$")?;
64
65    Ok(WInstance::new(
66        state.nodes,
67        state.arcs,
68        Vec::new(),
69        root_id,
70        panproto_gat::Name::from(root_vertex),
71    ))
72}
73
74/// Recursive JSON walker.
75fn walk_json(
76    schema: &Schema,
77    vertex_id: &str,
78    json_val: &serde_json::Value,
79    node_id: u32,
80    state: &mut ParseState,
81    path: &str,
82) -> Result<(), ParseError> {
83    let _vertex = schema
84        .vertex(vertex_id)
85        .ok_or_else(|| ParseError::RootVertexNotFound(vertex_id.to_string()))?;
86
87    match json_val {
88        serde_json::Value::Object(map) => {
89            parse_object(schema, vertex_id, map, node_id, state, path)?;
90        }
91        serde_json::Value::Array(arr) => {
92            parse_array(schema, vertex_id, arr, node_id, state, path)?;
93        }
94        _ => {
95            // Leaf value
96            let value = json_to_field_presence(json_val);
97            let node = Node::new(node_id, vertex_id).with_value(value);
98            state.nodes.insert(node_id, node);
99        }
100    }
101
102    Ok(())
103}
104
105/// Parse a JSON object into a node with children.
106fn parse_object(
107    schema: &Schema,
108    vertex_id: &str,
109    map: &serde_json::Map<String, serde_json::Value>,
110    node_id: u32,
111    state: &mut ParseState,
112    path: &str,
113) -> Result<(), ParseError> {
114    let mut node = Node::new(node_id, vertex_id);
115
116    // Check for discriminator ($type field)
117    if let Some(serde_json::Value::String(disc)) = map.get("$type") {
118        node.discriminator = Some(panproto_gat::Name::from(disc.as_str()));
119    }
120
121    // Get outgoing edges from schema for this vertex
122    let outgoing: Vec<Edge> = schema.outgoing_edges(vertex_id).to_vec();
123
124    // Track which fields we've handled
125    let mut handled_fields = std::collections::HashSet::new();
126
127    for edge in &outgoing {
128        let field_name = edge.name.as_deref().unwrap_or(&*edge.tgt);
129        handled_fields.insert(field_name.to_string());
130
131        if let Some(field_val) = map.get(field_name) {
132            let child_id = state.alloc_id();
133            let child_path = format!("{path}.{field_name}");
134            walk_json(schema, &edge.tgt, field_val, child_id, state, &child_path)?;
135            state.arcs.push((node_id, child_id, edge.clone()));
136        }
137    }
138
139    // Preserve unhandled fields as extra_fields
140    for (key, val) in map {
141        if key == "$type" || handled_fields.contains(key.as_str()) {
142            continue;
143        }
144        node.extra_fields
145            .insert(key.clone(), json_value_to_value(val));
146    }
147
148    state.nodes.insert(node_id, node);
149    Ok(())
150}
151
152/// Parse a JSON array into a node with item children.
153///
154/// The item edge is identified by the generic free-schema rule: a list
155/// vertex has a single anonymous outgoing edge (`name == None`), which
156/// is the item slot. This is protocol-agnostic and matches the rule in
157/// [`is_list_vertex`] used by `to_json`; the two must agree so that
158/// parse/serialize is a round-trip on list vertices regardless of which
159/// string the protocol uses to name the edge kind ("items", "line-of",
160/// "element", etc.).
161fn parse_array(
162    schema: &Schema,
163    vertex_id: &str,
164    arr: &[serde_json::Value],
165    node_id: u32,
166    state: &mut ParseState,
167    path: &str,
168) -> Result<(), ParseError> {
169    let node = Node::new(node_id, vertex_id);
170    state.nodes.insert(node_id, node);
171
172    let outgoing: Vec<Edge> = schema.outgoing_edges(vertex_id).to_vec();
173    // Prefer the first anonymous outgoing edge. In a well-formed list
174    // vertex there is exactly one, but if the schema happens to carry
175    // additional named edges we still pick the anonymous one as the
176    // item slot (named edges would be field projections, not items).
177    let item_edge = outgoing.iter().find(|e| e.name.is_none());
178
179    if let Some(edge) = item_edge {
180        for (i, item) in arr.iter().enumerate() {
181            let child_id = state.alloc_id();
182            let child_path = format!("{path}[{i}]");
183            walk_json(schema, &edge.tgt, item, child_id, state, &child_path)?;
184            state.arcs.push((node_id, child_id, edge.clone()));
185        }
186    }
187    Ok(())
188}
189
190/// Convert a JSON value to a `FieldPresence`.
191fn json_to_field_presence(val: &serde_json::Value) -> FieldPresence {
192    match val {
193        serde_json::Value::Null => FieldPresence::Null,
194        serde_json::Value::Bool(b) => FieldPresence::Present(Value::Bool(*b)),
195        serde_json::Value::Number(n) => n.as_i64().map_or_else(
196            || {
197                n.as_f64().map_or_else(
198                    || FieldPresence::Present(Value::Str(n.to_string())),
199                    |f| FieldPresence::Present(Value::Float(f)),
200                )
201            },
202            |i| FieldPresence::Present(Value::Int(i)),
203        ),
204        serde_json::Value::String(s) => FieldPresence::Present(Value::Str(s.clone())),
205        serde_json::Value::Array(_) | serde_json::Value::Object(_) => {
206            FieldPresence::Present(json_value_to_value(val))
207        }
208    }
209}
210
211/// Convert a `serde_json::Value` to our `Value` type.
212///
213/// JSON arrays map to [`Value::List`] and JSON objects map to
214/// [`Value::Unknown`]. The two branches are the categorical
215/// constructors for the free JSON-like term algebra, so
216/// `json_value_to_value` is a faithful embedding: every
217/// `serde_json::Value` has a unique preimage (up to the numeric
218/// `Int`/`Float` split dictated by the source) and survives a
219/// `value_to_json` round trip.
220fn json_value_to_value(val: &serde_json::Value) -> Value {
221    match val {
222        serde_json::Value::Null => Value::Null,
223        serde_json::Value::Bool(b) => Value::Bool(*b),
224        serde_json::Value::Number(n) => n.as_i64().map_or_else(
225            || {
226                n.as_f64()
227                    .map_or_else(|| Value::Str(n.to_string()), Value::Float)
228            },
229            Value::Int,
230        ),
231        serde_json::Value::String(s) => Value::Str(s.clone()),
232        serde_json::Value::Array(arr) => Value::List(arr.iter().map(json_value_to_value).collect()),
233        serde_json::Value::Object(map) => {
234            let fields: HashMap<String, Value> = map
235                .iter()
236                .map(|(k, v)| (k.clone(), json_value_to_value(v)))
237                .collect();
238            Value::Unknown(fields)
239        }
240    }
241}
242
243/// Serialize a W-type instance to JSON.
244///
245/// Reconstructs the JSON structure by walking the instance tree
246/// from the root, using schema edges as property names.
247#[must_use]
248pub fn to_json(schema: &Schema, instance: &WInstance) -> serde_json::Value {
249    node_to_json(schema, instance, instance.root)
250}
251
252/// Recursively convert a node to JSON.
253fn node_to_json(schema: &Schema, instance: &WInstance, node_id: u32) -> serde_json::Value {
254    let Some(node) = instance.node(node_id) else {
255        return serde_json::Value::Null;
256    };
257
258    // Leaf node: return value directly
259    if let Some(ref presence) = node.value {
260        return match presence {
261            FieldPresence::Present(val) => value_to_json(val),
262            FieldPresence::Null | FieldPresence::Absent => serde_json::Value::Null,
263        };
264    }
265
266    // List (ordered-collection) node. We treat a node as a list when
267    // any of three signals fires:
268    //
269    // 1. The schema marks the anchor vertex as a list (its outgoing
270    //    edges are nonempty and all anonymous), or
271    // 2. The node carries the `$list` annotation set by the CST
272    //    extractor at parse time (catches empty and singleton arrays
273    //    that the instance-arc heuristic cannot distinguish from
274    //    plain `{ "item": x }` objects), or
275    // 3. The instance carries multiple outgoing arcs that share the
276    //    same `(kind, name)` pair (recovers list shape when the
277    //    annotation is absent, e.g. instances built by callers that
278    //    bypass the CST extraction path).
279    let list_via_schema = is_list_vertex(schema, &node.anchor);
280    let list_via_annotation = node.is_list();
281    let list_via_instance_arcs = is_list_via_instance_arcs(instance, node_id);
282    // The schema-shape signal (`list_via_schema`) is a heuristic: it fires
283    // whenever every outgoing edge is anonymous, which is also true of a
284    // hand-built record whose author didn't supply edge names. Object-only
285    // signals on the *node* (a discriminator or extra_fields populated by
286    // the parser when the JSON was an object) are direct evidence the data
287    // is map-shaped, so they veto the schema heuristic. The CST `$list`
288    // annotation and the structural same-name-arcs signal are not vetoed:
289    // both are positive evidence about the data, not the schema, and
290    // cannot coexist with object content.
291    let object_only_signals = !node.extra_fields.is_empty() || node.discriminator.is_some();
292    let is_list =
293        (list_via_schema && !object_only_signals) || list_via_annotation || list_via_instance_arcs;
294    if is_list {
295        let children = instance.children(node_id);
296        let items: Vec<serde_json::Value> = children
297            .iter()
298            .map(|&child_id| node_to_json(schema, instance, child_id))
299            .collect();
300        return serde_json::Value::Array(items);
301    }
302
303    // Object node: reconstruct as JSON object
304    let mut map = serde_json::Map::new();
305
306    // Add discriminator if present
307    if let Some(ref disc) = node.discriminator {
308        map.insert("$type".to_string(), json!(&**disc));
309    }
310
311    // Add children as properties
312    for &(parent, child, ref edge) in &instance.arcs {
313        if parent == node_id {
314            let field_name = edge.name.as_deref().unwrap_or(&*edge.tgt);
315            map.insert(
316                field_name.to_string(),
317                node_to_json(schema, instance, child),
318            );
319        }
320    }
321
322    // Add extra fields. These are serialized AFTER children: when both
323    // contain the same key, extra_fields take precedence. This is by
324    // design: field transforms (`ComputeField` deriving from child
325    // scalars, `ApplyExpr` transforming a child scalar value) write
326    // results to extra_fields, and the transform output must be
327    // authoritative over the original child value.
328    for (key, val) in &node.extra_fields {
329        map.insert(key.clone(), value_to_json(val));
330    }
331
332    serde_json::Value::Object(map)
333}
334
335/// Convert a `Value` to a `serde_json::Value`.
336///
337/// This is the right inverse of [`json_value_to_value`] on the image
338/// of that map: if `v = json_value_to_value(j)`, then
339/// `value_to_json(&v)` returns `j` up to the `Int`/`Float` normalization
340/// performed on numeric literals. In particular, [`Value::List`]
341/// round-trips to a JSON array and [`Value::Unknown`] to a JSON object.
342fn value_to_json(val: &Value) -> serde_json::Value {
343    match val {
344        Value::Bool(b) => json!(b),
345        Value::Int(i) => json!(i),
346        Value::Float(f) => json!(f),
347        Value::Str(s) => json!(s),
348        Value::Bytes(b) => serde_json::Value::String(base64_encode(b)),
349        Value::CidLink(s) => json!({"$link": s}),
350        Value::Blob { ref_, mime, size } => {
351            json!({"$type": "blob", "ref": ref_, "mimeType": mime, "size": size})
352        }
353        Value::Token(t) => json!(t),
354        Value::Null => serde_json::Value::Null,
355        Value::Opaque { type_, fields } => {
356            let mut map = serde_json::Map::new();
357            map.insert("$type".to_string(), json!(type_));
358            for (k, v) in fields {
359                map.insert(k.clone(), value_to_json(v));
360            }
361            serde_json::Value::Object(map)
362        }
363        Value::Unknown(fields) => {
364            let map: serde_json::Map<String, serde_json::Value> = fields
365                .iter()
366                .map(|(k, v)| (k.clone(), value_to_json(v)))
367                .collect();
368            serde_json::Value::Object(map)
369        }
370        Value::List(items) => serde_json::Value::Array(items.iter().map(value_to_json).collect()),
371    }
372}
373
374/// Decide whether a schema vertex should be rendered as a JSON list.
375///
376/// A vertex is a **list vertex** iff its outgoing edges in the schema
377/// are nonempty and all anonymous (i.e. every edge has `name == None`).
378/// Intuitively, a record sort has one projection per named field, while
379/// a list sort has a single anonymous "item" edge (possibly repeated in
380/// the instance). Anonymous edges therefore identify exactly the free
381/// list / free monoid constructor in the schema theory, independent of
382/// the protocol-specific spelling of the vertex kind.
383///
384/// This is the category-theoretically generic rule: it does not depend
385/// on string-matching the vertex kind (`"array"`, `"list"`,
386/// `"sequence"`, etc.) or on any particular protocol's edge-kind
387/// convention. Any schema that encodes ordered collections via repeated
388/// unnamed edges is covered.
389fn is_list_vertex(schema: &Schema, vertex_id: &str) -> bool {
390    let outgoing = schema.outgoing_edges(vertex_id);
391    !outgoing.is_empty() && outgoing.iter().all(|e| e.name.is_none())
392}
393
394/// Detect a list-shaped node from its instance arcs.
395///
396/// Returns `true` when the node has at least two outgoing arcs that
397/// all share the same `(kind, name)` pair. Two same-named children
398/// cannot be expressed as a JSON object (duplicate keys are
399/// disallowed), so the only consistent serialization is a JSON array.
400/// In particular, this catches the synthetic `"item"` arcs that the
401/// open-schema CST extractor emits for every array element.
402fn is_list_via_instance_arcs(instance: &WInstance, node_id: u32) -> bool {
403    let mut signature: Option<(panproto_gat::Name, Option<panproto_gat::Name>)> = None;
404    let mut count = 0_usize;
405    for &(parent, _, ref edge) in &instance.arcs {
406        if parent != node_id {
407            continue;
408        }
409        let key = (edge.kind.clone(), edge.name.clone());
410        match &signature {
411            Some(existing) if existing != &key => return false,
412            Some(_) => {}
413            None => signature = Some(key),
414        }
415        count += 1;
416    }
417    count >= 2
418}
419
420/// Simple base64 encoding (no padding).
421fn base64_encode(bytes: &[u8]) -> String {
422    const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
423    let mut result = String::new();
424    for chunk in bytes.chunks(3) {
425        let b0 = u32::from(chunk[0]);
426        let b1 = u32::from(chunk.get(1).copied().unwrap_or_default());
427        let b2 = u32::from(chunk.get(2).copied().unwrap_or_default());
428        let triple = (b0 << 16) | (b1 << 8) | b2;
429
430        result.push(CHARS[((triple >> 18) & 0x3F) as usize] as char);
431        result.push(CHARS[((triple >> 12) & 0x3F) as usize] as char);
432        if chunk.len() > 1 {
433            result.push(CHARS[((triple >> 6) & 0x3F) as usize] as char);
434        }
435        if chunk.len() > 2 {
436            result.push(CHARS[(triple & 0x3F) as usize] as char);
437        }
438    }
439    result
440}
441
442#[cfg(test)]
443#[allow(clippy::unwrap_used)]
444mod tests {
445    use super::*;
446    use panproto_schema::{Protocol, SchemaBuilder};
447    use smallvec::smallvec;
448
449    /// Build a minimal schema for testing.
450    fn test_schema() -> Schema {
451        let mut vertices = HashMap::new();
452        vertices.insert(
453            "post:body".into(),
454            panproto_schema::Vertex {
455                id: "post:body".into(),
456                kind: "object".into(),
457                nsid: None,
458            },
459        );
460        vertices.insert(
461            "post:body.text".into(),
462            panproto_schema::Vertex {
463                id: "post:body.text".into(),
464                kind: "string".into(),
465                nsid: None,
466            },
467        );
468        vertices.insert(
469            "post:body.createdAt".into(),
470            panproto_schema::Vertex {
471                id: "post:body.createdAt".into(),
472                kind: "string".into(),
473                nsid: None,
474            },
475        );
476
477        let text_edge = Edge {
478            src: "post:body".into(),
479            tgt: "post:body.text".into(),
480            kind: "prop".into(),
481            name: Some("text".into()),
482        };
483        let date_edge = Edge {
484            src: "post:body".into(),
485            tgt: "post:body.createdAt".into(),
486            kind: "prop".into(),
487            name: Some("createdAt".into()),
488        };
489
490        let mut edges = HashMap::new();
491        edges.insert(text_edge.clone(), "prop".into());
492        edges.insert(date_edge.clone(), "prop".into());
493
494        let mut outgoing = HashMap::new();
495        outgoing.insert(
496            "post:body".into(),
497            smallvec![text_edge.clone(), date_edge.clone()],
498        );
499
500        let mut incoming = HashMap::new();
501        incoming.insert("post:body.text".into(), smallvec![text_edge.clone()]);
502        incoming.insert("post:body.createdAt".into(), smallvec![date_edge.clone()]);
503
504        let mut between = HashMap::new();
505        between.insert(
506            ("post:body".into(), "post:body.text".into()),
507            smallvec![text_edge],
508        );
509        between.insert(
510            ("post:body".into(), "post:body.createdAt".into()),
511            smallvec![date_edge],
512        );
513
514        Schema {
515            protocol: "test".into(),
516            vertices,
517            edges,
518            hyper_edges: HashMap::new(),
519            constraints: HashMap::new(),
520            required: HashMap::new(),
521            nsids: HashMap::new(),
522            entries: Vec::new(),
523            variants: HashMap::new(),
524            orderings: HashMap::new(),
525            recursion_points: HashMap::new(),
526            spans: HashMap::new(),
527            usage_modes: HashMap::new(),
528            nominal: HashMap::new(),
529            coercions: HashMap::new(),
530            mergers: HashMap::new(),
531            defaults: HashMap::new(),
532            policies: HashMap::new(),
533            outgoing,
534            incoming,
535            between,
536        }
537    }
538
539    #[test]
540    fn parse_json_simple_object() {
541        let schema = test_schema();
542        let json_val = json!({
543            "text": "hello world",
544            "createdAt": "2024-01-01T00:00:00Z"
545        });
546
547        let result = parse_json(&schema, "post:body", &json_val);
548        assert!(result.is_ok(), "parse failed: {result:?}");
549
550        let inst = result.unwrap_or_else(|_| {
551            WInstance::new(
552                HashMap::new(),
553                vec![],
554                vec![],
555                0,
556                panproto_gat::Name::default(),
557            )
558        });
559        assert_eq!(inst.node_count(), 3);
560        assert_eq!(inst.arc_count(), 2);
561    }
562
563    #[test]
564    fn json_round_trip() {
565        let schema = test_schema();
566        let json_val = json!({
567            "text": "hello world",
568            "createdAt": "2024-01-01T00:00:00Z"
569        });
570
571        let inst = parse_json(&schema, "post:body", &json_val);
572        assert!(inst.is_ok());
573        let inst = inst.unwrap_or_else(|_| {
574            WInstance::new(
575                HashMap::new(),
576                vec![],
577                vec![],
578                0,
579                panproto_gat::Name::default(),
580            )
581        });
582
583        let output = to_json(&schema, &inst);
584        assert!(output.is_object());
585        assert_eq!(output["text"], "hello world");
586        assert_eq!(output["createdAt"], "2024-01-01T00:00:00Z");
587    }
588
589    #[test]
590    fn parse_json_missing_root_vertex() {
591        let schema = test_schema();
592        let json_val = json!({"text": "hello"});
593        let result = parse_json(&schema, "nonexistent", &json_val);
594        assert!(result.is_err());
595    }
596
597    #[test]
598    fn parse_array_with_items_edge_kind() {
599        // Protocols declare the array edge kind as "items" (plural);
600        // the parser must match that spelling rather than the singular
601        // "item", which is a plausible typo.
602        let proto = Protocol {
603            name: "test".into(),
604            schema_theory: "ThTest".into(),
605            instance_theory: "ThWType".into(),
606            edge_rules: vec![],
607            obj_kinds: vec!["object".into(), "string".into(), "array".into()],
608            constraint_sorts: vec![],
609            ..Protocol::default()
610        };
611        let schema = SchemaBuilder::new(&proto)
612            .vertex("root", "object", None::<&str>)
613            .unwrap()
614            .vertex("root.tags", "array", None::<&str>)
615            .unwrap()
616            .vertex("tag", "string", None::<&str>)
617            .unwrap()
618            .edge("root", "root.tags", "prop", Some("tags"))
619            .unwrap()
620            .edge("root.tags", "tag", "items", None::<&str>)
621            .unwrap()
622            .build()
623            .unwrap();
624
625        let json_val = json!({"tags": ["alpha", "beta", "gamma"]});
626        let inst = parse_json(&schema, "root", &json_val).unwrap();
627
628        let output = to_json(&schema, &inst);
629        assert!(output["tags"].is_array());
630        let tags = output["tags"].as_array().unwrap();
631        assert_eq!(tags.len(), 3, "array elements should not be dropped");
632        assert_eq!(tags[0], "alpha");
633        assert_eq!(tags[1], "beta");
634        assert_eq!(tags[2], "gamma");
635    }
636
637    // ── Tests for issue #27: generic list detection + Value::List ──────
638
639    /// Build a minimal schema with an object vertex that carries a
640    /// list-vertex as a property, where the list-vertex is distinguished
641    /// only by having a single anonymous outgoing edge. The vertex kind
642    /// is deliberately NOT named `"array"` to prove the generic rule is
643    /// not relying on any particular protocol's kind string.
644    fn list_schema_with_kind(list_vertex_kind: &str) -> Schema {
645        let proto = Protocol {
646            name: "generic".into(),
647            schema_theory: "ThTest".into(),
648            instance_theory: "ThWType".into(),
649            edge_rules: vec![],
650            obj_kinds: vec!["object".into(), "string".into(), list_vertex_kind.into()],
651            constraint_sorts: vec![],
652            ..Protocol::default()
653        };
654        SchemaBuilder::new(&proto)
655            .vertex("root", "object", None::<&str>)
656            .unwrap()
657            .vertex("root.items", list_vertex_kind, None::<&str>)
658            .unwrap()
659            .vertex("item", "string", None::<&str>)
660            .unwrap()
661            .edge("root", "root.items", "prop", Some("items"))
662            .unwrap()
663            // Anonymous edge from list-vertex to element type:
664            // this is what marks the vertex as a list.
665            .edge("root.items", "item", "anonymous-edge-kind", None::<&str>)
666            .unwrap()
667            .build()
668            .unwrap()
669    }
670
671    #[test]
672    fn to_json_emits_list_regardless_of_kind_string() {
673        // The generic rule is "all outgoing edges are anonymous," not
674        // "vertex.kind == array". Prove it by using a vertex kind
675        // that isn't `"array"`: `"sequence"`, `"list"`, `"bag"`, etc.
676        for kind in ["sequence", "list", "bag", "ordered-multi"] {
677            let schema = list_schema_with_kind(kind);
678            let input = json!({"items": ["alpha", "beta"]});
679            let inst = parse_json(&schema, "root", &input).unwrap();
680            let output = to_json(&schema, &inst);
681            assert!(
682                output["items"].is_array(),
683                "kind={kind}: expected JSON array, got {}",
684                output["items"]
685            );
686            assert_eq!(output["items"][0], "alpha", "kind={kind}");
687            assert_eq!(output["items"][1], "beta", "kind={kind}");
688        }
689    }
690
691    #[test]
692    fn is_list_vertex_detects_by_anonymous_edges() {
693        // All outgoing edges anonymous → list.
694        let list_schema = list_schema_with_kind("whatever");
695        assert!(
696            is_list_vertex(&list_schema, "root.items"),
697            "a vertex with only anonymous outgoing edges is a list vertex"
698        );
699
700        // Named outgoing edges → not a list (it's a record).
701        assert!(
702            !is_list_vertex(&list_schema, "root"),
703            "a vertex with named outgoing edges is a record vertex, not a list"
704        );
705
706        // No outgoing edges → not a list (it's a leaf).
707        assert!(
708            !is_list_vertex(&list_schema, "item"),
709            "a leaf vertex with no outgoing edges is not a list vertex"
710        );
711    }
712
713    #[test]
714    fn to_json_empty_list_vertex_renders_as_empty_json_array() {
715        // A list-vertex with zero children must render as `[]`. The
716        // schema-based rule for picking array vs object renderings has
717        // to run before the child-count check, otherwise an empty list
718        // renders as `{}`.
719        let schema = list_schema_with_kind("collection");
720        let input = json!({"items": []});
721        let inst = parse_json(&schema, "root", &input).unwrap();
722        let output = to_json(&schema, &inst);
723        assert_eq!(output["items"], json!([]));
724    }
725
726    #[test]
727    fn json_value_to_value_preserves_array_as_list() {
728        // json_value_to_value is the embedding serde_json::Value ↪ Value;
729        // it must map arrays to Value::List (not Value::Unknown with
730        // stringly keys) or the embedding is not faithful.
731        let input = json!([1, "two", true, null]);
732        let v = json_value_to_value(&input);
733        match v {
734            Value::List(items) => {
735                assert_eq!(items.len(), 4);
736                assert_eq!(items[0], Value::Int(1));
737                assert_eq!(items[1], Value::Str("two".into()));
738                assert_eq!(items[2], Value::Bool(true));
739                assert_eq!(items[3], Value::Null);
740            }
741            other => panic!("expected Value::List, got {other:?}"),
742        }
743    }
744
745    #[test]
746    fn value_to_json_renders_list_as_json_array() {
747        let v = Value::List(vec![
748            Value::Int(1),
749            Value::Str("two".into()),
750            Value::Bool(true),
751            Value::Null,
752        ]);
753        let j = value_to_json(&v);
754        assert_eq!(j, json!([1, "two", true, null]));
755    }
756
757    #[test]
758    fn value_json_round_trip_is_faithful_for_arrays() {
759        // The composition value_to_json ∘ json_value_to_value should be
760        // the identity on the JSON subalgebra (up to the Int/Float
761        // numeric normalization). Exercise it on nested arrays and
762        // arrays-of-objects to confirm faithfulness.
763        let cases = vec![
764            json!([]),
765            json!(["en"]),
766            json!(["panproto", "atproto", "schemas"]),
767            json!([[1, 2], [3, 4]]),
768            json!([{"a": 1}, {"b": 2}]),
769            json!({"tags": ["x", "y"]}),
770            json!({"nested": {"tags": ["x", "y"]}}),
771        ];
772        for original in cases {
773            let roundtrip = value_to_json(&json_value_to_value(&original));
774            assert_eq!(
775                roundtrip, original,
776                "round trip should be faithful for {original}"
777            );
778        }
779    }
780
781    #[test]
782    fn to_json_extra_field_array_round_trips_via_value_list() {
783        // A field that lands in `extra_fields` (no matching schema
784        // edge) carries a JSON array. The emitter must preserve the
785        // array shape rather than flatten it to a stringly-keyed
786        // object like `{"0": "en"}`.
787        let schema = test_schema();
788        let input = json!({
789            "text": "Hello",
790            "createdAt": "2024-01-15T12:00:00.000Z",
791            "langs": ["en"],
792            "tags": ["panproto", "atproto", "schemas"]
793        });
794
795        let inst = parse_json(&schema, "post:body", &input).unwrap();
796        let output = to_json(&schema, &inst);
797
798        // Schema-anchored fields survive as usual.
799        assert_eq!(output["text"], "Hello");
800        assert_eq!(output["createdAt"], "2024-01-15T12:00:00.000Z");
801
802        // Extra-field arrays must come out as JSON arrays, not objects
803        // with numeric string keys.
804        assert!(
805            output["langs"].is_array(),
806            "langs should be a JSON array, got {}",
807            output["langs"]
808        );
809        assert_eq!(output["langs"], json!(["en"]));
810
811        assert!(
812            output["tags"].is_array(),
813            "tags should be a JSON array, got {}",
814            output["tags"]
815        );
816        assert_eq!(output["tags"], json!(["panproto", "atproto", "schemas"]));
817    }
818
819    #[test]
820    fn to_json_record_with_anonymous_edges_emits_extra_fields_not_empty_array() {
821        // Regression for issues #54 and #55: a hand-built schema whose
822        // record vertex happens to have only anonymous outgoing edges
823        // (e.g. a Python caller using SchemaBuilder.edge(..., name=None))
824        // was being classified as a list by the schema heuristic. The
825        // parser correctly preserves unhandled JSON keys in
826        // `extra_fields`, but the emitter then dropped them and rendered
827        // the node as `[]`. Object-only signals on the node (extra_fields
828        // populated, or a discriminator) must veto the schema heuristic.
829        let proto = Protocol {
830            name: "test".into(),
831            schema_theory: "ThTestSchema".into(),
832            instance_theory: "ThTestInstance".into(),
833            edge_rules: vec![],
834            obj_kinds: vec!["record".into(), "field".into(), "long".into()],
835            constraint_sorts: vec![],
836            ..Protocol::default()
837        };
838        let schema = SchemaBuilder::new(&proto)
839            .vertex("event", "record", Some("Event"))
840            .unwrap()
841            .vertex("event.tick", "field", Some("tick"))
842            .unwrap()
843            .vertex("event.tick:t", "long", None::<&str>)
844            .unwrap()
845            // Anonymous edges (name=None). This is what the user's
846            // didactic-style hand-built schema looks like.
847            .edge("event", "event.tick", "field-of", None::<&str>)
848            .unwrap()
849            .edge("event.tick", "event.tick:t", "type-of", None::<&str>)
850            .unwrap()
851            .build()
852            .unwrap();
853
854        let input = json!({"tick": 480});
855        let inst = parse_json(&schema, "event", &input).unwrap();
856
857        // The data is preserved on the parse side as an extra_field on
858        // the root: there is no schema edge named "tick", so the parser
859        // routes it to extra_fields rather than dropping it.
860        let output = to_json(&schema, &inst);
861        assert!(
862            output.is_object(),
863            "node with extra_fields must emit as an object, not an array; got {output}"
864        );
865        assert_eq!(
866            output["tick"], 480,
867            "extra_fields content must round-trip through to_json"
868        );
869    }
870}