Skip to main content

panproto_protocols/database/
mongodb.rs

1//! `MongoDB` Schema Validation protocol definition.
2//!
3//! `MongoDB` uses a constrained multigraph schema theory
4//! (`colimit(ThGraph, ThConstraint, ThMulti)`) and a W-type
5//! instance theory (`ThWType`).
6//!
7//! Vertex kinds: collection, field, object, array, string, int, long,
8//! double, decimal, bool, date, timestamp, objectId, binary, regex, null.
9//!
10//! Edge kinds: prop, items, variant.
11
12use std::collections::HashMap;
13
14use panproto_gat::Theory;
15use panproto_schema::{EdgeRule, Protocol, Schema, SchemaBuilder};
16
17use crate::emit::{children_by_edge, constraint_value, find_roots};
18use crate::error::ProtocolError;
19use crate::theories;
20
21/// Returns the `MongoDB` protocol definition.
22#[must_use]
23pub fn protocol() -> Protocol {
24    Protocol {
25        name: "mongodb".into(),
26        schema_theory: "ThMongoDBSchema".into(),
27        instance_theory: "ThMongoDBInstance".into(),
28        edge_rules: edge_rules(),
29        obj_kinds: vec![
30            "collection".into(),
31            "field".into(),
32            "object".into(),
33            "array".into(),
34            "string".into(),
35            "int".into(),
36            "long".into(),
37            "double".into(),
38            "decimal".into(),
39            "bool".into(),
40            "date".into(),
41            "timestamp".into(),
42            "objectId".into(),
43            "binary".into(),
44            "regex".into(),
45            "null".into(),
46        ],
47        constraint_sorts: vec![
48            "required".into(),
49            "bsonType".into(),
50            "enum".into(),
51            "minimum".into(),
52            "maximum".into(),
53            "minLength".into(),
54            "maxLength".into(),
55            "pattern".into(),
56            "description".into(),
57        ],
58        has_order: true,
59        has_recursion: true,
60        ..Protocol::default()
61    }
62}
63
64/// Register the component GATs for `MongoDB` with a theory registry.
65pub fn register_theories<S: ::std::hash::BuildHasher>(registry: &mut HashMap<String, Theory, S>) {
66    theories::register_constrained_multigraph_wtype(
67        registry,
68        "ThMongoDBSchema",
69        "ThMongoDBInstance",
70    );
71}
72
73/// Parse a `MongoDB` JSON Schema validation document into a [`Schema`].
74///
75/// Expects a JSON object with a `$jsonSchema` key at the top level,
76/// or the schema body directly (bsonType-based).
77///
78/// # Errors
79///
80/// Returns [`ProtocolError`] if parsing or schema construction fails.
81pub fn parse_mongodb_schema(json: &serde_json::Value) -> Result<Schema, ProtocolError> {
82    let proto = protocol();
83    let mut builder = SchemaBuilder::new(&proto);
84
85    // Support both `{$jsonSchema: ...}` wrapper and direct schema body.
86    let schema_body = json
87        .get("$jsonSchema")
88        .or_else(|| json.get("validator").and_then(|v| v.get("$jsonSchema")))
89        .unwrap_or(json);
90
91    let collection_name = json
92        .get("collection")
93        .and_then(serde_json::Value::as_str)
94        .unwrap_or("root");
95
96    let collection_id = format!("collection:{collection_name}");
97    builder = builder.vertex(&collection_id, "collection", None)?;
98
99    if let Some(desc) = schema_body
100        .get("description")
101        .and_then(serde_json::Value::as_str)
102    {
103        builder = builder.constraint(&collection_id, "description", desc);
104    }
105
106    // Walk the schema body.
107    builder = walk_bson_schema(builder, schema_body, &collection_id)?;
108
109    let schema = builder.build()?;
110    Ok(schema)
111}
112
113/// Recursively walk a `MongoDB` JSON Schema validation object.
114fn walk_bson_schema(
115    mut builder: SchemaBuilder,
116    schema: &serde_json::Value,
117    parent_id: &str,
118) -> Result<SchemaBuilder, ProtocolError> {
119    let required_fields: Vec<&str> = schema
120        .get("required")
121        .and_then(serde_json::Value::as_array)
122        .map(|arr| arr.iter().filter_map(serde_json::Value::as_str).collect())
123        .unwrap_or_default();
124
125    if let Some(properties) = schema
126        .get("properties")
127        .and_then(serde_json::Value::as_object)
128    {
129        for (prop_name, prop_schema) in properties {
130            let prop_id = format!("{parent_id}.{prop_name}");
131
132            let bson_type = prop_schema
133                .get("bsonType")
134                .and_then(serde_json::Value::as_str)
135                .unwrap_or("object");
136
137            let kind = bson_type_to_kind(bson_type);
138            builder = builder.vertex(&prop_id, &kind, None)?;
139            builder = builder.edge(parent_id, &prop_id, "prop", Some(prop_name))?;
140
141            if required_fields.contains(&prop_name.as_str()) {
142                builder = builder.constraint(&prop_id, "required", "true");
143            }
144
145            // Add constraints.
146            for field in &["minimum", "maximum", "minLength", "maxLength", "pattern"] {
147                if let Some(val) = prop_schema.get(field) {
148                    let val_str = match val {
149                        serde_json::Value::String(s) => s.clone(),
150                        serde_json::Value::Number(n) => n.to_string(),
151                        _ => val.to_string(),
152                    };
153                    builder = builder.constraint(&prop_id, field, &val_str);
154                }
155            }
156
157            if let Some(desc) = prop_schema
158                .get("description")
159                .and_then(serde_json::Value::as_str)
160            {
161                builder = builder.constraint(&prop_id, "description", desc);
162            }
163
164            if let Some(enum_val) = prop_schema
165                .get("enum")
166                .and_then(serde_json::Value::as_array)
167            {
168                let vals: Vec<String> = enum_val
169                    .iter()
170                    .map(|v| v.as_str().map_or_else(|| v.to_string(), String::from))
171                    .collect();
172                builder = builder.constraint(&prop_id, "enum", &vals.join(","));
173            }
174
175            // Recurse into nested objects.
176            if bson_type == "object" {
177                builder = walk_bson_schema(builder, prop_schema, &prop_id)?;
178            }
179
180            // Handle array items.
181            if bson_type == "array" {
182                if let Some(items) = prop_schema.get("items") {
183                    let items_id = format!("{prop_id}:items");
184                    let items_type = items
185                        .get("bsonType")
186                        .and_then(serde_json::Value::as_str)
187                        .unwrap_or("object");
188                    let items_kind = bson_type_to_kind(items_type);
189                    builder = builder.vertex(&items_id, &items_kind, None)?;
190                    builder = builder.edge(&prop_id, &items_id, "items", None)?;
191
192                    if items_type == "object" {
193                        builder = walk_bson_schema(builder, items, &items_id)?;
194                    }
195                }
196            }
197
198            // Handle bsonType arrays (union types).
199            if let Some(serde_json::Value::Array(types)) = prop_schema.get("bsonType") {
200                // Already created with the first type; add variant edges for the rest.
201                for (i, t) in types.iter().enumerate() {
202                    if let Some(t_str) = t.as_str() {
203                        if i > 0 {
204                            let variant_id = format!("{prop_id}:variant{i}");
205                            let variant_kind = bson_type_to_kind(t_str);
206                            builder = builder.vertex(&variant_id, &variant_kind, None)?;
207                            builder =
208                                builder.edge(&prop_id, &variant_id, "variant", Some(t_str))?;
209                        }
210                    }
211                }
212            }
213        }
214    }
215
216    Ok(builder)
217}
218
219/// Map a BSON type string to a vertex kind.
220fn bson_type_to_kind(bson_type: &str) -> String {
221    match bson_type {
222        "string" => "string",
223        "int" => "int",
224        "long" => "long",
225        "double" => "double",
226        "decimal" => "decimal",
227        "bool" => "bool",
228        "date" => "date",
229        "timestamp" => "timestamp",
230        "objectId" => "objectId",
231        "binary" | "binData" => "binary",
232        "regex" => "regex",
233        "null" => "null",
234        "array" => "array",
235        _ => "object",
236    }
237    .to_string()
238}
239
240/// Emit a [`Schema`] as a `MongoDB` JSON Schema validation document.
241///
242/// # Errors
243///
244/// Returns [`ProtocolError`] if emission fails.
245pub fn emit_mongodb_schema(schema: &Schema) -> Result<serde_json::Value, ProtocolError> {
246    let roots = find_roots(schema, &["prop", "items", "variant"]);
247
248    // Find the collection root.
249    let collection_root = roots
250        .iter()
251        .find(|v| v.kind == "collection")
252        .ok_or_else(|| ProtocolError::Emit("no collection vertex found".into()))?;
253
254    let collection_name = collection_root
255        .id
256        .strip_prefix("collection:")
257        .unwrap_or(&collection_root.id);
258
259    let json_schema = emit_bson_object(schema, &collection_root.id);
260
261    let mut result = serde_json::Map::new();
262    result.insert(
263        "collection".into(),
264        serde_json::Value::String(collection_name.to_string()),
265    );
266    result.insert("$jsonSchema".into(), json_schema);
267
268    Ok(serde_json::Value::Object(result))
269}
270
271/// Emit a BSON schema object from a vertex and its children.
272fn emit_bson_object(schema: &Schema, vertex_id: &str) -> serde_json::Value {
273    let mut obj = serde_json::Map::new();
274    obj.insert(
275        "bsonType".into(),
276        serde_json::Value::String("object".into()),
277    );
278
279    let children = children_by_edge(schema, vertex_id, "prop");
280    if children.is_empty() {
281        return serde_json::Value::Object(obj);
282    }
283
284    let mut properties = serde_json::Map::new();
285    let mut required_list = Vec::new();
286
287    for (edge, child) in &children {
288        let name = edge.name.as_deref().unwrap_or("");
289        let mut prop_obj = serde_json::Map::new();
290
291        let bson_type = match child.kind.as_str() {
292            "string" => "string",
293            "int" => "int",
294            "long" => "long",
295            "double" => "double",
296            "decimal" => "decimal",
297            "bool" => "bool",
298            "date" => "date",
299            "timestamp" => "timestamp",
300            "objectId" => "objectId",
301            "binary" => "binary",
302            "regex" => "regex",
303            "null" => "null",
304            "array" => "array",
305            _ => "object",
306        };
307        prop_obj.insert(
308            "bsonType".into(),
309            serde_json::Value::String(bson_type.into()),
310        );
311
312        if constraint_value(schema, &child.id, "required") == Some("true") {
313            required_list.push(serde_json::Value::String(name.to_string()));
314        }
315
316        for field in &["minimum", "maximum", "minLength", "maxLength", "pattern"] {
317            if let Some(val) = constraint_value(schema, &child.id, field) {
318                if let Ok(n) = val.parse::<f64>() {
319                    prop_obj.insert((*field).into(), serde_json::json!(n));
320                } else {
321                    prop_obj.insert((*field).into(), serde_json::Value::String(val.to_string()));
322                }
323            }
324        }
325
326        if let Some(desc) = constraint_value(schema, &child.id, "description") {
327            prop_obj.insert(
328                "description".into(),
329                serde_json::Value::String(desc.to_string()),
330            );
331        }
332
333        // Nested object.
334        if bson_type == "object" {
335            let nested = emit_bson_object(schema, &child.id);
336            if let Some(nested_obj) = nested.as_object() {
337                if let Some(nested_props) = nested_obj.get("properties") {
338                    prop_obj.insert("properties".into(), nested_props.clone());
339                }
340            }
341        }
342
343        properties.insert(name.to_string(), serde_json::Value::Object(prop_obj));
344    }
345
346    obj.insert("properties".into(), serde_json::Value::Object(properties));
347    if !required_list.is_empty() {
348        obj.insert("required".into(), serde_json::Value::Array(required_list));
349    }
350
351    serde_json::Value::Object(obj)
352}
353
354/// Well-formedness rules for `MongoDB` edges.
355fn edge_rules() -> Vec<EdgeRule> {
356    vec![
357        EdgeRule {
358            edge_kind: "prop".into(),
359            src_kinds: vec!["collection".into(), "object".into()],
360            tgt_kinds: vec![],
361        },
362        EdgeRule {
363            edge_kind: "items".into(),
364            src_kinds: vec!["array".into()],
365            tgt_kinds: vec![],
366        },
367        EdgeRule {
368            edge_kind: "variant".into(),
369            src_kinds: vec![],
370            tgt_kinds: vec![],
371        },
372    ]
373}
374
375#[cfg(test)]
376#[allow(clippy::expect_used, clippy::unwrap_used)]
377mod tests {
378    use super::*;
379
380    #[test]
381    fn protocol_def() {
382        let p = protocol();
383        assert_eq!(p.name, "mongodb");
384        assert_eq!(p.schema_theory, "ThMongoDBSchema");
385        assert_eq!(p.instance_theory, "ThMongoDBInstance");
386    }
387
388    #[test]
389    fn register_theories_works() {
390        let mut registry = HashMap::new();
391        register_theories(&mut registry);
392        assert!(registry.contains_key("ThMongoDBSchema"));
393        assert!(registry.contains_key("ThMongoDBInstance"));
394    }
395
396    #[test]
397    fn parse_minimal() {
398        let doc = serde_json::json!({
399            "collection": "users",
400            "$jsonSchema": {
401                "bsonType": "object",
402                "required": ["name", "email"],
403                "properties": {
404                    "name": {
405                        "bsonType": "string",
406                        "description": "User name",
407                        "maxLength": 100
408                    },
409                    "email": {
410                        "bsonType": "string"
411                    },
412                    "age": {
413                        "bsonType": "int",
414                        "minimum": 0,
415                        "maximum": 150
416                    }
417                }
418            }
419        });
420        let schema = parse_mongodb_schema(&doc).expect("should parse");
421        assert!(schema.has_vertex("collection:users"));
422        assert!(schema.has_vertex("collection:users.name"));
423        assert!(schema.has_vertex("collection:users.email"));
424        assert!(schema.has_vertex("collection:users.age"));
425    }
426
427    #[test]
428    fn emit_minimal() {
429        let doc = serde_json::json!({
430            "collection": "items",
431            "$jsonSchema": {
432                "bsonType": "object",
433                "properties": {
434                    "title": {"bsonType": "string"}
435                }
436            }
437        });
438        let schema = parse_mongodb_schema(&doc).expect("should parse");
439        let emitted = emit_mongodb_schema(&schema).expect("should emit");
440        assert!(emitted.get("$jsonSchema").is_some());
441        assert_eq!(
442            emitted.get("collection").and_then(|v| v.as_str()),
443            Some("items")
444        );
445    }
446
447    #[test]
448    fn roundtrip() {
449        let doc = serde_json::json!({
450            "collection": "products",
451            "$jsonSchema": {
452                "bsonType": "object",
453                "properties": {
454                    "name": {"bsonType": "string"},
455                    "price": {"bsonType": "double"}
456                }
457            }
458        });
459        let schema = parse_mongodb_schema(&doc).expect("parse");
460        let emitted = emit_mongodb_schema(&schema).expect("emit");
461        let schema2 = parse_mongodb_schema(&emitted).expect("re-parse");
462        assert_eq!(schema.vertices.len(), schema2.vertices.len());
463    }
464}