1use std::collections::HashMap;
9
10use panproto_gat::Name;
11use rustc_hash::FxHashSet;
12use smallvec::SmallVec;
13
14use crate::error::SchemaError;
15use crate::protocol::Protocol;
16use crate::schema::{CoercionSpec, Constraint, Edge, HyperEdge, Schema, Vertex};
17use panproto_expr::Expr;
18
19pub struct SchemaBuilder {
31 protocol: Protocol,
32 vertices: HashMap<Name, Vertex>,
33 edges: Vec<Edge>,
34 hyper_edges: HashMap<Name, HyperEdge>,
35 constraints: HashMap<Name, Vec<Constraint>>,
36 required: HashMap<Name, Vec<Edge>>,
37 nsids: HashMap<Name, Name>,
38 edge_set: FxHashSet<(Name, Name, Name, Option<Name>)>,
39 coercions: HashMap<(Name, Name), CoercionSpec>,
40 mergers: HashMap<Name, Expr>,
41 defaults: HashMap<Name, Expr>,
42 policies: HashMap<Name, Expr>,
43}
44
45impl SchemaBuilder {
46 #[must_use]
48 pub fn new(protocol: &Protocol) -> Self {
49 Self {
50 protocol: protocol.clone(),
51 vertices: HashMap::new(),
52 edges: Vec::new(),
53 hyper_edges: HashMap::new(),
54 constraints: HashMap::new(),
55 required: HashMap::new(),
56 nsids: HashMap::new(),
57 edge_set: FxHashSet::default(),
58 coercions: HashMap::new(),
59 mergers: HashMap::new(),
60 defaults: HashMap::new(),
61 policies: HashMap::new(),
62 }
63 }
64
65 pub fn vertex(mut self, id: &str, kind: &str, nsid: Option<&str>) -> Result<Self, SchemaError> {
73 if self.vertices.contains_key(id) {
74 return Err(SchemaError::DuplicateVertex(id.to_owned()));
75 }
76
77 if (!self.protocol.obj_kinds.is_empty() || !self.protocol.edge_rules.is_empty())
81 && !self.protocol.is_known_vertex_kind(kind)
82 {
83 return Err(SchemaError::UnknownVertexKind(kind.to_owned()));
84 }
85
86 let vertex = Vertex {
87 id: Name::from(id),
88 kind: Name::from(kind),
89 nsid: nsid.map(Name::from),
90 };
91
92 if let Some(nsid_val) = nsid {
93 self.nsids.insert(Name::from(id), Name::from(nsid_val));
94 }
95
96 self.vertices.insert(Name::from(id), vertex);
97 Ok(self)
98 }
99
100 pub fn edge(
112 mut self,
113 src: &str,
114 tgt: &str,
115 kind: &str,
116 name: Option<&str>,
117 ) -> Result<Self, SchemaError> {
118 let src_vertex = self
119 .vertices
120 .get(src)
121 .ok_or_else(|| SchemaError::VertexNotFound(src.to_owned()))?;
122 let tgt_vertex = self
123 .vertices
124 .get(tgt)
125 .ok_or_else(|| SchemaError::VertexNotFound(tgt.to_owned()))?;
126
127 if let Some(rule) = self.protocol.find_edge_rule(kind) {
129 if !rule.src_kinds.is_empty()
131 && !rule.src_kinds.iter().any(|k| k == src_vertex.kind.as_ref())
132 {
133 return Err(SchemaError::InvalidEdgeSource {
134 kind: kind.to_owned(),
135 src_kind: src_vertex.kind.to_string(),
136 permitted: rule.src_kinds.join(", "),
137 });
138 }
139 if !rule.tgt_kinds.is_empty()
141 && !rule.tgt_kinds.iter().any(|k| k == tgt_vertex.kind.as_ref())
142 {
143 return Err(SchemaError::InvalidEdgeTarget {
144 kind: kind.to_owned(),
145 tgt_kind: tgt_vertex.kind.to_string(),
146 permitted: rule.tgt_kinds.join(", "),
147 });
148 }
149 } else if !self.protocol.edge_rules.is_empty() {
150 return Err(SchemaError::UnknownEdgeKind(kind.to_owned()));
152 }
153
154 let edge_key = (
155 Name::from(src),
156 Name::from(tgt),
157 Name::from(kind),
158 name.map(Name::from),
159 );
160 if !self.edge_set.insert(edge_key) {
161 return Err(SchemaError::DuplicateEdge {
162 src: src.to_owned(),
163 tgt: tgt.to_owned(),
164 kind: kind.to_owned(),
165 });
166 }
167
168 let edge = Edge {
169 src: Name::from(src),
170 tgt: Name::from(tgt),
171 kind: Name::from(kind),
172 name: name.map(Name::from),
173 };
174 self.edges.push(edge);
175 Ok(self)
176 }
177
178 pub fn hyper_edge(
186 mut self,
187 id: &str,
188 kind: &str,
189 sig: HashMap<String, String>,
190 parent: &str,
191 ) -> Result<Self, SchemaError> {
192 if self.hyper_edges.contains_key(id) {
193 return Err(SchemaError::DuplicateHyperEdge(id.to_owned()));
194 }
195
196 for (label, vertex_id) in &sig {
198 if !self.vertices.contains_key(vertex_id.as_str()) {
199 return Err(SchemaError::VertexNotFound(format!(
200 "{vertex_id} (in hyper-edge {id}, label {label})"
201 )));
202 }
203 }
204
205 let name_sig: HashMap<Name, Name> = sig
206 .into_iter()
207 .map(|(k, v)| (Name::from(k), Name::from(v)))
208 .collect();
209
210 let hyper_edge = HyperEdge {
211 id: Name::from(id),
212 kind: Name::from(kind),
213 signature: name_sig,
214 parent_label: Name::from(parent),
215 };
216 self.hyper_edges.insert(Name::from(id), hyper_edge);
217 Ok(self)
218 }
219
220 #[must_use]
225 pub fn constraint(mut self, vertex: &str, sort: &str, value: &str) -> Self {
226 self.constraints
227 .entry(Name::from(vertex))
228 .or_default()
229 .push(Constraint {
230 sort: Name::from(sort),
231 value: value.to_owned(),
232 });
233 self
234 }
235
236 #[must_use]
238 pub fn required(mut self, vertex: &str, edges: Vec<Edge>) -> Self {
239 self.required
240 .entry(Name::from(vertex))
241 .or_default()
242 .extend(edges);
243 self
244 }
245
246 #[must_use]
248 pub fn coercion(mut self, source_kind: &str, target_kind: &str, spec: CoercionSpec) -> Self {
249 self.coercions
250 .insert((Name::from(source_kind), Name::from(target_kind)), spec);
251 self
252 }
253
254 #[must_use]
256 pub fn merger(mut self, vertex_id: &str, expr: Expr) -> Self {
257 self.mergers.insert(Name::from(vertex_id), expr);
258 self
259 }
260
261 #[must_use]
263 pub fn default_expr(mut self, vertex_id: &str, expr: Expr) -> Self {
264 self.defaults.insert(Name::from(vertex_id), expr);
265 self
266 }
267
268 #[must_use]
270 pub fn policy(mut self, sort_name: &str, expr: Expr) -> Self {
271 self.policies.insert(Name::from(sort_name), expr);
272 self
273 }
274
275 pub fn build(self) -> Result<Schema, SchemaError> {
282 if self.vertices.is_empty() {
283 return Err(SchemaError::EmptySchema);
284 }
285
286 let mut edge_map: HashMap<Edge, Name> = HashMap::with_capacity(self.edges.len());
288 let mut outgoing: HashMap<Name, SmallVec<Edge, 4>> = HashMap::new();
289 let mut incoming: HashMap<Name, SmallVec<Edge, 4>> = HashMap::new();
290 let mut between: HashMap<(Name, Name), SmallVec<Edge, 2>> = HashMap::new();
291
292 for edge in &self.edges {
293 edge_map.insert(edge.clone(), edge.kind.clone());
294
295 outgoing
296 .entry(edge.src.clone())
297 .or_default()
298 .push(edge.clone());
299
300 incoming
301 .entry(edge.tgt.clone())
302 .or_default()
303 .push(edge.clone());
304
305 between
306 .entry((edge.src.clone(), edge.tgt.clone()))
307 .or_default()
308 .push(edge.clone());
309 }
310
311 Ok(Schema {
312 protocol: self.protocol.name.clone(),
313 vertices: self.vertices,
314 edges: edge_map,
315 hyper_edges: self.hyper_edges,
316 constraints: self.constraints,
317 required: self.required,
318 nsids: self.nsids,
319 variants: HashMap::new(),
320 orderings: HashMap::new(),
321 recursion_points: HashMap::new(),
322 spans: HashMap::new(),
323 usage_modes: HashMap::new(),
324 nominal: HashMap::new(),
325 coercions: self.coercions,
326 mergers: self.mergers,
327 defaults: self.defaults,
328 policies: self.policies,
329 outgoing,
330 incoming,
331 between,
332 })
333 }
334}
335
336#[cfg(test)]
337#[allow(clippy::unwrap_used, clippy::expect_used)]
338mod tests {
339 use super::*;
340 use crate::protocol::EdgeRule;
341
342 fn atproto_protocol() -> Protocol {
344 Protocol {
345 name: "atproto".to_owned(),
346 schema_theory: "ThATProtoSchema".to_owned(),
347 instance_theory: "ThWType".to_owned(),
348 edge_rules: vec![
349 EdgeRule {
350 edge_kind: "record-schema".to_owned(),
351 src_kinds: vec!["record".to_owned()],
352 tgt_kinds: vec!["object".to_owned()],
353 },
354 EdgeRule {
355 edge_kind: "prop".to_owned(),
356 src_kinds: vec!["object".to_owned()],
357 tgt_kinds: vec![
358 "string".to_owned(),
359 "integer".to_owned(),
360 "object".to_owned(),
361 "ref".to_owned(),
362 "array".to_owned(),
363 "union".to_owned(),
364 "boolean".to_owned(),
365 ],
366 },
367 ],
368 obj_kinds: vec![
369 "record".to_owned(),
370 "object".to_owned(),
371 "string".to_owned(),
372 "integer".to_owned(),
373 "ref".to_owned(),
374 "array".to_owned(),
375 "union".to_owned(),
376 "boolean".to_owned(),
377 ],
378 constraint_sorts: vec![
379 "maxLength".to_owned(),
380 "minLength".to_owned(),
381 "format".to_owned(),
382 "minimum".to_owned(),
383 "maximum".to_owned(),
384 ],
385 ..Protocol::default()
386 }
387 }
388
389 #[test]
390 fn build_atproto_schema() {
391 let proto = atproto_protocol();
392 let schema = SchemaBuilder::new(&proto)
393 .vertex("post", "record", Some("app.bsky.feed.post"))
394 .expect("vertex post")
395 .vertex("post:body", "object", None)
396 .expect("vertex body")
397 .vertex("post:body.text", "string", None)
398 .expect("vertex text")
399 .edge("post", "post:body", "record-schema", None)
400 .expect("edge record-schema")
401 .edge("post:body", "post:body.text", "prop", Some("text"))
402 .expect("edge prop")
403 .constraint("post:body.text", "maxLength", "3000")
404 .build()
405 .expect("build");
406
407 assert_eq!(schema.vertex_count(), 3);
408 assert_eq!(schema.edge_count(), 2);
409 assert_eq!(schema.outgoing_edges("post").len(), 1);
410 assert_eq!(schema.incoming_edges("post:body").len(), 1);
411 assert_eq!(
412 schema.nsids.get("post").map(AsRef::as_ref),
413 Some("app.bsky.feed.post")
414 );
415 assert_eq!(
416 schema.constraints.get("post:body.text").map(Vec::len),
417 Some(1)
418 );
419 }
420
421 #[test]
422 fn invalid_edge_rejected() {
423 let proto = atproto_protocol();
424 let result = SchemaBuilder::new(&proto)
426 .vertex("s", "string", None)
427 .expect("vertex string")
428 .vertex("i", "integer", None)
429 .expect("vertex integer")
430 .edge("s", "i", "record-schema", None);
431
432 assert!(
433 matches!(result, Err(SchemaError::InvalidEdgeSource { .. })),
434 "expected InvalidEdgeSource"
435 );
436 }
437
438 #[test]
439 fn duplicate_vertex_rejected() {
440 let proto = atproto_protocol();
441 let result = SchemaBuilder::new(&proto)
442 .vertex("v", "record", None)
443 .expect("first vertex")
444 .vertex("v", "record", None);
445
446 assert!(
447 matches!(result, Err(SchemaError::DuplicateVertex(_))),
448 "expected DuplicateVertex"
449 );
450 }
451
452 #[test]
453 fn edge_to_missing_vertex_rejected() {
454 let proto = atproto_protocol();
455 let result = SchemaBuilder::new(&proto)
456 .vertex("a", "record", None)
457 .expect("vertex a")
458 .edge("a", "missing", "record-schema", None);
459
460 assert!(
461 matches!(result, Err(SchemaError::VertexNotFound(_))),
462 "expected VertexNotFound"
463 );
464 }
465
466 #[test]
467 fn empty_schema_rejected() {
468 let proto = atproto_protocol();
469 let result = SchemaBuilder::new(&proto).build();
470 assert!(
471 matches!(result, Err(SchemaError::EmptySchema)),
472 "expected EmptySchema"
473 );
474 }
475
476 #[test]
477 fn between_index_works() {
478 let proto = atproto_protocol();
479 let schema = SchemaBuilder::new(&proto)
480 .vertex("r", "record", None)
481 .expect("vertex r")
482 .vertex("o", "object", None)
483 .expect("vertex o")
484 .edge("r", "o", "record-schema", None)
485 .expect("edge")
486 .build()
487 .expect("build");
488
489 assert_eq!(schema.edges_between("r", "o").len(), 1);
490 assert_eq!(schema.edges_between("o", "r").len(), 0);
491 }
492}