1use std::collections::HashMap;
13
14use panproto_gat::Theory;
15use panproto_schema::{EdgeRule, Protocol, Schema, SchemaBuilder};
16
17use crate::emit::{children_by_edge, constraint_value, find_roots};
18use crate::error::ProtocolError;
19use crate::theories;
20
21#[must_use]
23pub fn protocol() -> Protocol {
24 Protocol {
25 name: "mongodb".into(),
26 schema_theory: "ThMongoDBSchema".into(),
27 instance_theory: "ThMongoDBInstance".into(),
28 edge_rules: edge_rules(),
29 obj_kinds: vec![
30 "collection".into(),
31 "field".into(),
32 "object".into(),
33 "array".into(),
34 "string".into(),
35 "int".into(),
36 "long".into(),
37 "double".into(),
38 "decimal".into(),
39 "bool".into(),
40 "date".into(),
41 "timestamp".into(),
42 "objectId".into(),
43 "binary".into(),
44 "regex".into(),
45 "null".into(),
46 ],
47 constraint_sorts: vec![
48 "required".into(),
49 "bsonType".into(),
50 "enum".into(),
51 "minimum".into(),
52 "maximum".into(),
53 "minLength".into(),
54 "maxLength".into(),
55 "pattern".into(),
56 "description".into(),
57 ],
58 has_order: true,
59 has_recursion: true,
60 ..Protocol::default()
61 }
62}
63
64pub fn register_theories<S: ::std::hash::BuildHasher>(registry: &mut HashMap<String, Theory, S>) {
66 theories::register_constrained_multigraph_wtype(
67 registry,
68 "ThMongoDBSchema",
69 "ThMongoDBInstance",
70 );
71}
72
73pub fn parse_mongodb_schema(json: &serde_json::Value) -> Result<Schema, ProtocolError> {
82 let proto = protocol();
83 let mut builder = SchemaBuilder::new(&proto);
84
85 let schema_body = json
87 .get("$jsonSchema")
88 .or_else(|| json.get("validator").and_then(|v| v.get("$jsonSchema")))
89 .unwrap_or(json);
90
91 let collection_name = json
92 .get("collection")
93 .and_then(serde_json::Value::as_str)
94 .unwrap_or("root");
95
96 let collection_id = format!("collection:{collection_name}");
97 builder = builder.vertex(&collection_id, "collection", None)?;
98
99 if let Some(desc) = schema_body
100 .get("description")
101 .and_then(serde_json::Value::as_str)
102 {
103 builder = builder.constraint(&collection_id, "description", desc);
104 }
105
106 builder = walk_bson_schema(builder, schema_body, &collection_id)?;
108
109 let schema = builder.build()?;
110 Ok(schema)
111}
112
113fn walk_bson_schema(
115 mut builder: SchemaBuilder,
116 schema: &serde_json::Value,
117 parent_id: &str,
118) -> Result<SchemaBuilder, ProtocolError> {
119 let required_fields: Vec<&str> = schema
120 .get("required")
121 .and_then(serde_json::Value::as_array)
122 .map(|arr| arr.iter().filter_map(serde_json::Value::as_str).collect())
123 .unwrap_or_default();
124
125 if let Some(properties) = schema
126 .get("properties")
127 .and_then(serde_json::Value::as_object)
128 {
129 for (prop_name, prop_schema) in properties {
130 let prop_id = format!("{parent_id}.{prop_name}");
131
132 let bson_type = prop_schema
133 .get("bsonType")
134 .and_then(serde_json::Value::as_str)
135 .unwrap_or("object");
136
137 let kind = bson_type_to_kind(bson_type);
138 builder = builder.vertex(&prop_id, &kind, None)?;
139 builder = builder.edge(parent_id, &prop_id, "prop", Some(prop_name))?;
140
141 if required_fields.contains(&prop_name.as_str()) {
142 builder = builder.constraint(&prop_id, "required", "true");
143 }
144
145 for field in &["minimum", "maximum", "minLength", "maxLength", "pattern"] {
147 if let Some(val) = prop_schema.get(field) {
148 let val_str = match val {
149 serde_json::Value::String(s) => s.clone(),
150 serde_json::Value::Number(n) => n.to_string(),
151 _ => val.to_string(),
152 };
153 builder = builder.constraint(&prop_id, field, &val_str);
154 }
155 }
156
157 if let Some(desc) = prop_schema
158 .get("description")
159 .and_then(serde_json::Value::as_str)
160 {
161 builder = builder.constraint(&prop_id, "description", desc);
162 }
163
164 if let Some(enum_val) = prop_schema
165 .get("enum")
166 .and_then(serde_json::Value::as_array)
167 {
168 let vals: Vec<String> = enum_val
169 .iter()
170 .map(|v| v.as_str().map_or_else(|| v.to_string(), String::from))
171 .collect();
172 builder = builder.constraint(&prop_id, "enum", &vals.join(","));
173 }
174
175 if bson_type == "object" {
177 builder = walk_bson_schema(builder, prop_schema, &prop_id)?;
178 }
179
180 if bson_type == "array" {
182 if let Some(items) = prop_schema.get("items") {
183 let items_id = format!("{prop_id}:items");
184 let items_type = items
185 .get("bsonType")
186 .and_then(serde_json::Value::as_str)
187 .unwrap_or("object");
188 let items_kind = bson_type_to_kind(items_type);
189 builder = builder.vertex(&items_id, &items_kind, None)?;
190 builder = builder.edge(&prop_id, &items_id, "items", None)?;
191
192 if items_type == "object" {
193 builder = walk_bson_schema(builder, items, &items_id)?;
194 }
195 }
196 }
197
198 if let Some(serde_json::Value::Array(types)) = prop_schema.get("bsonType") {
200 for (i, t) in types.iter().enumerate() {
202 if let Some(t_str) = t.as_str() {
203 if i > 0 {
204 let variant_id = format!("{prop_id}:variant{i}");
205 let variant_kind = bson_type_to_kind(t_str);
206 builder = builder.vertex(&variant_id, &variant_kind, None)?;
207 builder =
208 builder.edge(&prop_id, &variant_id, "variant", Some(t_str))?;
209 }
210 }
211 }
212 }
213 }
214 }
215
216 Ok(builder)
217}
218
219fn bson_type_to_kind(bson_type: &str) -> String {
221 match bson_type {
222 "string" => "string",
223 "int" => "int",
224 "long" => "long",
225 "double" => "double",
226 "decimal" => "decimal",
227 "bool" => "bool",
228 "date" => "date",
229 "timestamp" => "timestamp",
230 "objectId" => "objectId",
231 "binary" | "binData" => "binary",
232 "regex" => "regex",
233 "null" => "null",
234 "array" => "array",
235 _ => "object",
236 }
237 .to_string()
238}
239
240pub fn emit_mongodb_schema(schema: &Schema) -> Result<serde_json::Value, ProtocolError> {
246 let roots = find_roots(schema, &["prop", "items", "variant"]);
247
248 let collection_root = roots
250 .iter()
251 .find(|v| v.kind == "collection")
252 .ok_or_else(|| ProtocolError::Emit("no collection vertex found".into()))?;
253
254 let collection_name = collection_root
255 .id
256 .strip_prefix("collection:")
257 .unwrap_or(&collection_root.id);
258
259 let json_schema = emit_bson_object(schema, &collection_root.id);
260
261 let mut result = serde_json::Map::new();
262 result.insert(
263 "collection".into(),
264 serde_json::Value::String(collection_name.to_string()),
265 );
266 result.insert("$jsonSchema".into(), json_schema);
267
268 Ok(serde_json::Value::Object(result))
269}
270
271fn emit_bson_object(schema: &Schema, vertex_id: &str) -> serde_json::Value {
273 let mut obj = serde_json::Map::new();
274 obj.insert(
275 "bsonType".into(),
276 serde_json::Value::String("object".into()),
277 );
278
279 let children = children_by_edge(schema, vertex_id, "prop");
280 if children.is_empty() {
281 return serde_json::Value::Object(obj);
282 }
283
284 let mut properties = serde_json::Map::new();
285 let mut required_list = Vec::new();
286
287 for (edge, child) in &children {
288 let name = edge.name.as_deref().unwrap_or("");
289 let mut prop_obj = serde_json::Map::new();
290
291 let bson_type = match child.kind.as_str() {
292 "string" => "string",
293 "int" => "int",
294 "long" => "long",
295 "double" => "double",
296 "decimal" => "decimal",
297 "bool" => "bool",
298 "date" => "date",
299 "timestamp" => "timestamp",
300 "objectId" => "objectId",
301 "binary" => "binary",
302 "regex" => "regex",
303 "null" => "null",
304 "array" => "array",
305 _ => "object",
306 };
307 prop_obj.insert(
308 "bsonType".into(),
309 serde_json::Value::String(bson_type.into()),
310 );
311
312 if constraint_value(schema, &child.id, "required") == Some("true") {
313 required_list.push(serde_json::Value::String(name.to_string()));
314 }
315
316 for field in &["minimum", "maximum", "minLength", "maxLength", "pattern"] {
317 if let Some(val) = constraint_value(schema, &child.id, field) {
318 if let Ok(n) = val.parse::<f64>() {
319 prop_obj.insert((*field).into(), serde_json::json!(n));
320 } else {
321 prop_obj.insert((*field).into(), serde_json::Value::String(val.to_string()));
322 }
323 }
324 }
325
326 if let Some(desc) = constraint_value(schema, &child.id, "description") {
327 prop_obj.insert(
328 "description".into(),
329 serde_json::Value::String(desc.to_string()),
330 );
331 }
332
333 if bson_type == "object" {
335 let nested = emit_bson_object(schema, &child.id);
336 if let Some(nested_obj) = nested.as_object() {
337 if let Some(nested_props) = nested_obj.get("properties") {
338 prop_obj.insert("properties".into(), nested_props.clone());
339 }
340 }
341 }
342
343 properties.insert(name.to_string(), serde_json::Value::Object(prop_obj));
344 }
345
346 obj.insert("properties".into(), serde_json::Value::Object(properties));
347 if !required_list.is_empty() {
348 obj.insert("required".into(), serde_json::Value::Array(required_list));
349 }
350
351 serde_json::Value::Object(obj)
352}
353
354fn edge_rules() -> Vec<EdgeRule> {
356 vec![
357 EdgeRule {
358 edge_kind: "prop".into(),
359 src_kinds: vec!["collection".into(), "object".into()],
360 tgt_kinds: vec![],
361 },
362 EdgeRule {
363 edge_kind: "items".into(),
364 src_kinds: vec!["array".into()],
365 tgt_kinds: vec![],
366 },
367 EdgeRule {
368 edge_kind: "variant".into(),
369 src_kinds: vec![],
370 tgt_kinds: vec![],
371 },
372 ]
373}
374
375#[cfg(test)]
376#[allow(clippy::expect_used, clippy::unwrap_used)]
377mod tests {
378 use super::*;
379
380 #[test]
381 fn protocol_def() {
382 let p = protocol();
383 assert_eq!(p.name, "mongodb");
384 assert_eq!(p.schema_theory, "ThMongoDBSchema");
385 assert_eq!(p.instance_theory, "ThMongoDBInstance");
386 }
387
388 #[test]
389 fn register_theories_works() {
390 let mut registry = HashMap::new();
391 register_theories(&mut registry);
392 assert!(registry.contains_key("ThMongoDBSchema"));
393 assert!(registry.contains_key("ThMongoDBInstance"));
394 }
395
396 #[test]
397 fn parse_minimal() {
398 let doc = serde_json::json!({
399 "collection": "users",
400 "$jsonSchema": {
401 "bsonType": "object",
402 "required": ["name", "email"],
403 "properties": {
404 "name": {
405 "bsonType": "string",
406 "description": "User name",
407 "maxLength": 100
408 },
409 "email": {
410 "bsonType": "string"
411 },
412 "age": {
413 "bsonType": "int",
414 "minimum": 0,
415 "maximum": 150
416 }
417 }
418 }
419 });
420 let schema = parse_mongodb_schema(&doc).expect("should parse");
421 assert!(schema.has_vertex("collection:users"));
422 assert!(schema.has_vertex("collection:users.name"));
423 assert!(schema.has_vertex("collection:users.email"));
424 assert!(schema.has_vertex("collection:users.age"));
425 }
426
427 #[test]
428 fn emit_minimal() {
429 let doc = serde_json::json!({
430 "collection": "items",
431 "$jsonSchema": {
432 "bsonType": "object",
433 "properties": {
434 "title": {"bsonType": "string"}
435 }
436 }
437 });
438 let schema = parse_mongodb_schema(&doc).expect("should parse");
439 let emitted = emit_mongodb_schema(&schema).expect("should emit");
440 assert!(emitted.get("$jsonSchema").is_some());
441 assert_eq!(
442 emitted.get("collection").and_then(|v| v.as_str()),
443 Some("items")
444 );
445 }
446
447 #[test]
448 fn roundtrip() {
449 let doc = serde_json::json!({
450 "collection": "products",
451 "$jsonSchema": {
452 "bsonType": "object",
453 "properties": {
454 "name": {"bsonType": "string"},
455 "price": {"bsonType": "double"}
456 }
457 }
458 });
459 let schema = parse_mongodb_schema(&doc).expect("parse");
460 let emitted = emit_mongodb_schema(&schema).expect("emit");
461 let schema2 = parse_mongodb_schema(&emitted).expect("re-parse");
462 assert_eq!(schema.vertices.len(), schema2.vertices.len());
463 }
464}