Skip to main content

panproto_protocols/serialization/
msgpack_schema.rs

1//! MessagePack Schema protocol definition.
2//!
3//! MessagePack Schema uses a constrained multigraph + W-type theory (Group A):
4//! `colimit(ThGraph, ThConstraint, ThMulti)` + `ThWType`.
5//!
6//! Vertex kinds: object, field, array, string, integer, boolean, float,
7//!               binary, timestamp, extension, nil, union.
8//! Edge kinds: prop, items, variant.
9
10use std::collections::HashMap;
11use std::hash::BuildHasher;
12
13use panproto_gat::Theory;
14use panproto_schema::{EdgeRule, Protocol, Schema, SchemaBuilder};
15
16use crate::emit::{children_by_edge, constraint_value, find_roots};
17use crate::error::ProtocolError;
18use crate::theories;
19
20/// Returns the `MessagePack` Schema protocol definition.
21#[must_use]
22pub fn protocol() -> Protocol {
23    Protocol {
24        name: "msgpack-schema".into(),
25        schema_theory: "ThMsgPackSchemaSchema".into(),
26        instance_theory: "ThMsgPackSchemaInstance".into(),
27        edge_rules: edge_rules(),
28        obj_kinds: vec![
29            "object".into(),
30            "field".into(),
31            "array".into(),
32            "string".into(),
33            "integer".into(),
34            "boolean".into(),
35            "float".into(),
36            "binary".into(),
37            "timestamp".into(),
38            "extension".into(),
39            "nil".into(),
40            "union".into(),
41        ],
42        constraint_sorts: vec!["required".into(), "default".into()],
43        has_order: true,
44        has_recursion: true,
45        ..Protocol::default()
46    }
47}
48
49/// Register the component GATs for `MessagePack` Schema with a theory registry.
50///
51/// Uses Group A (constrained multigraph + W-type).
52pub fn register_theories<S: BuildHasher>(registry: &mut HashMap<String, Theory, S>) {
53    theories::register_constrained_multigraph_wtype(
54        registry,
55        "ThMsgPackSchemaSchema",
56        "ThMsgPackSchemaInstance",
57    );
58}
59
60/// Parse a `MessagePack` Schema (JSON) into a [`Schema`].
61///
62/// # Errors
63///
64/// Returns [`ProtocolError`] if the JSON cannot be parsed as valid `MessagePack` Schema.
65pub fn parse_msgpack_schema(json: &serde_json::Value) -> Result<Schema, ProtocolError> {
66    let proto_def = protocol();
67    let mut builder = SchemaBuilder::new(&proto_def);
68    let mut counter: usize = 0;
69
70    parse_schema_node(&mut builder, json, "", &mut counter)?;
71
72    let schema = builder.build()?;
73    Ok(schema)
74}
75
76/// Parse a single schema node recursively.
77fn parse_schema_node(
78    builder: &mut SchemaBuilder,
79    value: &serde_json::Value,
80    prefix: &str,
81    counter: &mut usize,
82) -> Result<String, ProtocolError> {
83    match value {
84        serde_json::Value::Object(obj) => {
85            let type_val = obj.get("type").and_then(|v| v.as_str()).unwrap_or("object");
86
87            let kind = msgpack_type_to_kind(type_val);
88
89            let node_id = if prefix.is_empty() {
90                "root".to_string()
91            } else {
92                format!("{prefix}:{counter}")
93            };
94            *counter += 1;
95
96            let taken = std::mem::replace(builder, SchemaBuilder::new(&protocol()));
97            let mut b = taken.vertex(&node_id, kind, None)?;
98
99            match kind {
100                "object" => {
101                    if let Some(serde_json::Value::Object(props)) = obj.get("properties") {
102                        let required_fields: Vec<String> = obj
103                            .get("required")
104                            .and_then(|v| v.as_array())
105                            .map(|arr| {
106                                arr.iter()
107                                    .filter_map(|v| v.as_str().map(String::from))
108                                    .collect()
109                            })
110                            .unwrap_or_default();
111
112                        for (prop_name, prop_schema) in props {
113                            let field_id = format!("{node_id}.{prop_name}");
114                            b = b.vertex(&field_id, "field", None)?;
115                            b = b.edge(&node_id, &field_id, "prop", Some(prop_name))?;
116
117                            if required_fields.contains(prop_name) {
118                                b = b.constraint(&field_id, "required", "true");
119                            }
120
121                            if let Some(default) = prop_schema.get("default") {
122                                b = b.constraint(&field_id, "default", &default.to_string());
123                            }
124
125                            // Parse nested type.
126                            *builder = b;
127                            let child_id =
128                                parse_schema_node(builder, prop_schema, &field_id, counter)?;
129                            b = std::mem::replace(builder, SchemaBuilder::new(&protocol()));
130
131                            if !child_id.is_empty() {
132                                b = b.edge(&field_id, &child_id, "items", None)?;
133                            }
134                        }
135                    }
136                }
137                "array" => {
138                    if let Some(items_schema) = obj.get("items") {
139                        *builder = b;
140                        let child_id = parse_schema_node(builder, items_schema, &node_id, counter)?;
141                        b = std::mem::replace(builder, SchemaBuilder::new(&protocol()));
142
143                        if !child_id.is_empty() {
144                            b = b.edge(&node_id, &child_id, "items", None)?;
145                        }
146                    }
147                }
148                "union" => {
149                    if let Some(serde_json::Value::Array(variants)) = obj.get("oneOf") {
150                        for variant_schema in variants {
151                            *builder = b;
152                            let child_id =
153                                parse_schema_node(builder, variant_schema, &node_id, counter)?;
154                            b = std::mem::replace(builder, SchemaBuilder::new(&protocol()));
155
156                            if !child_id.is_empty() {
157                                b = b.edge(&node_id, &child_id, "variant", None)?;
158                            }
159                        }
160                    }
161                }
162                _ => {}
163            }
164
165            *builder = b;
166            Ok(node_id)
167        }
168        serde_json::Value::String(s) => {
169            let kind = msgpack_type_to_kind(s);
170            let node_id = if prefix.is_empty() {
171                kind.to_string()
172            } else {
173                format!("{prefix}:{counter}")
174            };
175            *counter += 1;
176
177            let taken = std::mem::replace(builder, SchemaBuilder::new(&protocol()));
178            let b = taken.vertex(&node_id, kind, None)?;
179            *builder = b;
180            Ok(node_id)
181        }
182        _ => Ok(String::new()),
183    }
184}
185
186/// Map `MessagePack` type names to vertex kinds.
187fn msgpack_type_to_kind(type_name: &str) -> &'static str {
188    match type_name {
189        "array" => "array",
190        "string" | "str" => "string",
191        "integer" | "int" => "integer",
192        "boolean" | "bool" => "boolean",
193        "float" | "number" => "float",
194        "binary" | "bin" => "binary",
195        "timestamp" => "timestamp",
196        "extension" | "ext" => "extension",
197        "nil" | "null" => "nil",
198        "union" => "union",
199        // "object", "map", and any other type default to "object".
200        _ => "object",
201    }
202}
203
204/// Emit a `MessagePack` Schema (JSON) from a [`Schema`].
205///
206/// # Errors
207///
208/// Returns [`ProtocolError`] if the schema cannot be serialized.
209pub fn emit_msgpack_schema(schema: &Schema) -> Result<serde_json::Value, ProtocolError> {
210    let roots = find_roots(schema, &["prop", "items", "variant"]);
211
212    if roots.len() == 1 {
213        emit_node(schema, &roots[0].id)
214    } else if roots.is_empty() {
215        Err(ProtocolError::Emit("no root vertices found".into()))
216    } else {
217        // Multiple roots: emit first root.
218        emit_node(schema, &roots[0].id)
219    }
220}
221
222/// Emit a single schema node as JSON.
223fn emit_node(schema: &Schema, vertex_id: &str) -> Result<serde_json::Value, ProtocolError> {
224    let vertex = schema
225        .vertices
226        .get(vertex_id)
227        .ok_or_else(|| ProtocolError::Emit(format!("vertex not found: {vertex_id}")))?;
228
229    match vertex.kind.as_str() {
230        "object" => {
231            let mut obj = serde_json::Map::new();
232            obj.insert("type".into(), serde_json::Value::String("object".into()));
233
234            let props = children_by_edge(schema, vertex_id, "prop");
235            if !props.is_empty() {
236                let mut properties = serde_json::Map::new();
237                let mut required_list = Vec::new();
238
239                for (edge, field_vertex) in &props {
240                    let name = edge.name.as_deref().unwrap_or(&field_vertex.id);
241
242                    // Get the items edge to find the type.
243                    let items = children_by_edge(schema, &field_vertex.id, "items");
244                    let field_schema = if let Some((_, type_vertex)) = items.first() {
245                        emit_node(schema, &type_vertex.id)?
246                    } else {
247                        serde_json::Value::Object(serde_json::Map::new())
248                    };
249
250                    properties.insert(name.to_string(), field_schema);
251
252                    if constraint_value(schema, &field_vertex.id, "required").is_some() {
253                        required_list.push(serde_json::Value::String(name.to_string()));
254                    }
255                }
256
257                obj.insert("properties".into(), serde_json::Value::Object(properties));
258
259                if !required_list.is_empty() {
260                    obj.insert("required".into(), serde_json::Value::Array(required_list));
261                }
262            }
263
264            Ok(serde_json::Value::Object(obj))
265        }
266        "array" => {
267            let mut obj = serde_json::Map::new();
268            obj.insert("type".into(), serde_json::Value::String("array".into()));
269
270            let items = children_by_edge(schema, vertex_id, "items");
271            if let Some((_, type_vertex)) = items.first() {
272                obj.insert("items".into(), emit_node(schema, &type_vertex.id)?);
273            }
274
275            Ok(serde_json::Value::Object(obj))
276        }
277        kind => {
278            let mut obj = serde_json::Map::new();
279            obj.insert("type".into(), serde_json::Value::String(kind.into()));
280            Ok(serde_json::Value::Object(obj))
281        }
282    }
283}
284
285/// Well-formedness rules for `MessagePack` Schema edges.
286fn edge_rules() -> Vec<EdgeRule> {
287    let all_types: Vec<String> = vec![
288        "object",
289        "field",
290        "array",
291        "string",
292        "integer",
293        "boolean",
294        "float",
295        "binary",
296        "timestamp",
297        "extension",
298        "nil",
299        "union",
300    ]
301    .into_iter()
302    .map(Into::into)
303    .collect();
304
305    vec![
306        EdgeRule {
307            edge_kind: "prop".into(),
308            src_kinds: vec!["object".into()],
309            tgt_kinds: vec!["field".into()],
310        },
311        EdgeRule {
312            edge_kind: "items".into(),
313            src_kinds: vec!["array".into(), "field".into()],
314            tgt_kinds: all_types.clone(),
315        },
316        EdgeRule {
317            edge_kind: "variant".into(),
318            src_kinds: vec!["union".into()],
319            tgt_kinds: all_types,
320        },
321    ]
322}
323
324#[cfg(test)]
325#[allow(clippy::expect_used, clippy::unwrap_used)]
326mod tests {
327    use super::*;
328
329    #[test]
330    fn protocol_creates_valid_definition() {
331        let p = protocol();
332        assert_eq!(p.name, "msgpack-schema");
333        assert_eq!(p.schema_theory, "ThMsgPackSchemaSchema");
334        assert_eq!(p.instance_theory, "ThMsgPackSchemaInstance");
335    }
336
337    #[test]
338    fn register_theories_works() {
339        let mut registry = HashMap::new();
340        register_theories(&mut registry);
341        assert!(registry.contains_key("ThMsgPackSchemaSchema"));
342        assert!(registry.contains_key("ThMsgPackSchemaInstance"));
343    }
344
345    #[test]
346    fn parse_minimal() {
347        let json: serde_json::Value = serde_json::json!({
348            "type": "object",
349            "properties": {
350                "name": {"type": "string"},
351                "age": {"type": "integer"}
352            },
353            "required": ["name"]
354        });
355
356        let schema = parse_msgpack_schema(&json).expect("should parse");
357        assert!(schema.has_vertex("root"));
358        assert!(schema.has_vertex("root.name"));
359        assert!(schema.has_vertex("root.age"));
360    }
361
362    #[test]
363    fn emit_minimal() {
364        let json: serde_json::Value = serde_json::json!({
365            "type": "object",
366            "properties": {
367                "name": {"type": "string"},
368                "age": {"type": "integer"}
369            },
370            "required": ["name"]
371        });
372
373        let schema = parse_msgpack_schema(&json).expect("should parse");
374        let emitted = emit_msgpack_schema(&schema).expect("should emit");
375        assert!(emitted.is_object());
376        let obj = emitted.as_object().unwrap();
377        assert_eq!(obj.get("type").unwrap(), "object");
378        assert!(obj.contains_key("properties"));
379    }
380
381    #[test]
382    fn roundtrip() {
383        let json: serde_json::Value = serde_json::json!({
384            "type": "object",
385            "properties": {
386                "id": {"type": "string"},
387                "count": {"type": "integer"}
388            },
389            "required": ["id"]
390        });
391
392        let schema1 = parse_msgpack_schema(&json).expect("parse 1");
393        let emitted = emit_msgpack_schema(&schema1).expect("emit");
394        let schema2 = parse_msgpack_schema(&emitted).expect("parse 2");
395
396        assert_eq!(schema1.vertex_count(), schema2.vertex_count());
397        assert!(schema2.has_vertex("root"));
398    }
399}