Skip to main content

panproto_schema/
builder.rs

1//! Incremental schema construction with protocol-aware validation.
2//!
3//! [`SchemaBuilder`] provides a fluent API for constructing a [`Schema`].
4//! Each `vertex()` and `edge()` call validates against the [`Protocol`]'s
5//! edge rules before accepting the element. The final `build()` call
6//! computes adjacency indices and returns the finished schema.
7
8use std::collections::HashMap;
9
10use panproto_gat::Name;
11use rustc_hash::FxHashSet;
12use smallvec::SmallVec;
13
14use crate::error::SchemaError;
15use crate::protocol::Protocol;
16use crate::schema::{CoercionSpec, Constraint, Edge, HyperEdge, Schema, Vertex};
17use panproto_expr::Expr;
18
19/// A builder for incrementally constructing a validated [`Schema`].
20///
21/// # Example
22///
23/// ```ignore
24/// let schema = SchemaBuilder::new(&protocol)
25///     .vertex("post", "record", Some("app.bsky.feed.post"))?
26///     .vertex("post:body", "object", None)?
27///     .edge("post", "post:body", "record-schema", None)?
28///     .build()?;
29/// ```
30pub struct SchemaBuilder {
31    protocol: Protocol,
32    vertices: HashMap<Name, Vertex>,
33    edges: Vec<Edge>,
34    hyper_edges: HashMap<Name, HyperEdge>,
35    constraints: HashMap<Name, Vec<Constraint>>,
36    required: HashMap<Name, Vec<Edge>>,
37    nsids: HashMap<Name, Name>,
38    edge_set: FxHashSet<(Name, Name, Name, Option<Name>)>,
39    coercions: HashMap<(Name, Name), CoercionSpec>,
40    mergers: HashMap<Name, Expr>,
41    defaults: HashMap<Name, Expr>,
42    policies: HashMap<Name, Expr>,
43}
44
45impl SchemaBuilder {
46    /// Create a new builder for the given protocol.
47    #[must_use]
48    pub fn new(protocol: &Protocol) -> Self {
49        Self {
50            protocol: protocol.clone(),
51            vertices: HashMap::new(),
52            edges: Vec::new(),
53            hyper_edges: HashMap::new(),
54            constraints: HashMap::new(),
55            required: HashMap::new(),
56            nsids: HashMap::new(),
57            edge_set: FxHashSet::default(),
58            coercions: HashMap::new(),
59            mergers: HashMap::new(),
60            defaults: HashMap::new(),
61            policies: HashMap::new(),
62        }
63    }
64
65    /// Add a vertex to the schema.
66    ///
67    /// # Errors
68    ///
69    /// Returns [`SchemaError::DuplicateVertex`] if a vertex with the same ID
70    /// already exists, or [`SchemaError::UnknownVertexKind`] if the kind is
71    /// not recognized by the protocol.
72    pub fn vertex(mut self, id: &str, kind: &str, nsid: Option<&str>) -> Result<Self, SchemaError> {
73        if self.vertices.contains_key(id) {
74            return Err(SchemaError::DuplicateVertex(id.to_owned()));
75        }
76
77        // Validate vertex kind against the protocol if the protocol
78        // has any known kinds at all. If no kinds are declared,
79        // we allow anything (open protocol).
80        if (!self.protocol.obj_kinds.is_empty() || !self.protocol.edge_rules.is_empty())
81            && !self.protocol.is_known_vertex_kind(kind)
82        {
83            return Err(SchemaError::UnknownVertexKind(kind.to_owned()));
84        }
85
86        let vertex = Vertex {
87            id: Name::from(id),
88            kind: Name::from(kind),
89            nsid: nsid.map(Name::from),
90        };
91
92        if let Some(nsid_val) = nsid {
93            self.nsids.insert(Name::from(id), Name::from(nsid_val));
94        }
95
96        self.vertices.insert(Name::from(id), vertex);
97        Ok(self)
98    }
99
100    /// Add a binary edge to the schema.
101    ///
102    /// Validates that:
103    /// - Both `src` and `tgt` vertices exist
104    /// - The edge kind is recognized by the protocol
105    /// - The source and target vertex kinds satisfy the edge rule
106    ///
107    /// # Errors
108    ///
109    /// Returns [`SchemaError::VertexNotFound`], [`SchemaError::UnknownEdgeKind`],
110    /// [`SchemaError::InvalidEdgeSource`], or [`SchemaError::InvalidEdgeTarget`].
111    pub fn edge(
112        mut self,
113        src: &str,
114        tgt: &str,
115        kind: &str,
116        name: Option<&str>,
117    ) -> Result<Self, SchemaError> {
118        let src_vertex = self
119            .vertices
120            .get(src)
121            .ok_or_else(|| SchemaError::VertexNotFound(src.to_owned()))?;
122        let tgt_vertex = self
123            .vertices
124            .get(tgt)
125            .ok_or_else(|| SchemaError::VertexNotFound(tgt.to_owned()))?;
126
127        // Validate against edge rules (if any rules are defined).
128        if let Some(rule) = self.protocol.find_edge_rule(kind) {
129            // Check source kind constraint.
130            if !rule.src_kinds.is_empty()
131                && !rule.src_kinds.iter().any(|k| k == src_vertex.kind.as_ref())
132            {
133                return Err(SchemaError::InvalidEdgeSource {
134                    kind: kind.to_owned(),
135                    src_kind: src_vertex.kind.to_string(),
136                    permitted: rule.src_kinds.join(", "),
137                });
138            }
139            // Check target kind constraint.
140            if !rule.tgt_kinds.is_empty()
141                && !rule.tgt_kinds.iter().any(|k| k == tgt_vertex.kind.as_ref())
142            {
143                return Err(SchemaError::InvalidEdgeTarget {
144                    kind: kind.to_owned(),
145                    tgt_kind: tgt_vertex.kind.to_string(),
146                    permitted: rule.tgt_kinds.join(", "),
147                });
148            }
149        } else if !self.protocol.edge_rules.is_empty() {
150            // The protocol has rules but none matches this edge kind.
151            return Err(SchemaError::UnknownEdgeKind(kind.to_owned()));
152        }
153
154        let edge_key = (
155            Name::from(src),
156            Name::from(tgt),
157            Name::from(kind),
158            name.map(Name::from),
159        );
160        if !self.edge_set.insert(edge_key) {
161            return Err(SchemaError::DuplicateEdge {
162                src: src.to_owned(),
163                tgt: tgt.to_owned(),
164                kind: kind.to_owned(),
165            });
166        }
167
168        let edge = Edge {
169            src: Name::from(src),
170            tgt: Name::from(tgt),
171            kind: Name::from(kind),
172            name: name.map(Name::from),
173        };
174        self.edges.push(edge);
175        Ok(self)
176    }
177
178    /// Add a hyper-edge to the schema.
179    ///
180    /// # Errors
181    ///
182    /// Returns [`SchemaError::DuplicateHyperEdge`] if a hyper-edge with the
183    /// same ID already exists, or [`SchemaError::VertexNotFound`] if any
184    /// vertex in the signature is missing.
185    pub fn hyper_edge(
186        mut self,
187        id: &str,
188        kind: &str,
189        sig: HashMap<String, String>,
190        parent: &str,
191    ) -> Result<Self, SchemaError> {
192        if self.hyper_edges.contains_key(id) {
193            return Err(SchemaError::DuplicateHyperEdge(id.to_owned()));
194        }
195
196        // Validate all vertices in signature exist.
197        for (label, vertex_id) in &sig {
198            if !self.vertices.contains_key(vertex_id.as_str()) {
199                return Err(SchemaError::VertexNotFound(format!(
200                    "{vertex_id} (in hyper-edge {id}, label {label})"
201                )));
202            }
203        }
204
205        let name_sig: HashMap<Name, Name> = sig
206            .into_iter()
207            .map(|(k, v)| (Name::from(k), Name::from(v)))
208            .collect();
209
210        let hyper_edge = HyperEdge {
211            id: Name::from(id),
212            kind: Name::from(kind),
213            signature: name_sig,
214            parent_label: Name::from(parent),
215        };
216        self.hyper_edges.insert(Name::from(id), hyper_edge);
217        Ok(self)
218    }
219
220    /// Add a constraint to a vertex.
221    ///
222    /// Constraints are not validated during building; use [`validate`](crate::validate)
223    /// to check them against the protocol's constraint sorts.
224    #[must_use]
225    pub fn constraint(mut self, vertex: &str, sort: &str, value: &str) -> Self {
226        self.constraints
227            .entry(Name::from(vertex))
228            .or_default()
229            .push(Constraint {
230                sort: Name::from(sort),
231                value: value.to_owned(),
232            });
233        self
234    }
235
236    /// Declare required edges for a vertex.
237    #[must_use]
238    pub fn required(mut self, vertex: &str, edges: Vec<Edge>) -> Self {
239        self.required
240            .entry(Name::from(vertex))
241            .or_default()
242            .extend(edges);
243        self
244    }
245
246    /// Add a coercion specification for a `(source_kind, target_kind)` pair.
247    #[must_use]
248    pub fn coercion(mut self, source_kind: &str, target_kind: &str, spec: CoercionSpec) -> Self {
249        self.coercions
250            .insert((Name::from(source_kind), Name::from(target_kind)), spec);
251        self
252    }
253
254    /// Add a merger expression for a vertex.
255    #[must_use]
256    pub fn merger(mut self, vertex_id: &str, expr: Expr) -> Self {
257        self.mergers.insert(Name::from(vertex_id), expr);
258        self
259    }
260
261    /// Add a default value expression for a vertex.
262    #[must_use]
263    pub fn default_expr(mut self, vertex_id: &str, expr: Expr) -> Self {
264        self.defaults.insert(Name::from(vertex_id), expr);
265        self
266    }
267
268    /// Add a conflict resolution policy expression for a sort.
269    #[must_use]
270    pub fn policy(mut self, sort_name: &str, expr: Expr) -> Self {
271        self.policies.insert(Name::from(sort_name), expr);
272        self
273    }
274
275    /// Consume the builder and produce a validated [`Schema`] with
276    /// precomputed adjacency indices.
277    ///
278    /// # Errors
279    ///
280    /// Returns [`SchemaError::EmptySchema`] if no vertices were added.
281    pub fn build(self) -> Result<Schema, SchemaError> {
282        if self.vertices.is_empty() {
283            return Err(SchemaError::EmptySchema);
284        }
285
286        // Build edge map.
287        let mut edge_map: HashMap<Edge, Name> = HashMap::with_capacity(self.edges.len());
288        let mut outgoing: HashMap<Name, SmallVec<Edge, 4>> = HashMap::new();
289        let mut incoming: HashMap<Name, SmallVec<Edge, 4>> = HashMap::new();
290        let mut between: HashMap<(Name, Name), SmallVec<Edge, 2>> = HashMap::new();
291
292        for edge in &self.edges {
293            edge_map.insert(edge.clone(), edge.kind.clone());
294
295            outgoing
296                .entry(edge.src.clone())
297                .or_default()
298                .push(edge.clone());
299
300            incoming
301                .entry(edge.tgt.clone())
302                .or_default()
303                .push(edge.clone());
304
305            between
306                .entry((edge.src.clone(), edge.tgt.clone()))
307                .or_default()
308                .push(edge.clone());
309        }
310
311        Ok(Schema {
312            protocol: self.protocol.name.clone(),
313            vertices: self.vertices,
314            edges: edge_map,
315            hyper_edges: self.hyper_edges,
316            constraints: self.constraints,
317            required: self.required,
318            nsids: self.nsids,
319            variants: HashMap::new(),
320            orderings: HashMap::new(),
321            recursion_points: HashMap::new(),
322            spans: HashMap::new(),
323            usage_modes: HashMap::new(),
324            nominal: HashMap::new(),
325            coercions: self.coercions,
326            mergers: self.mergers,
327            defaults: self.defaults,
328            policies: self.policies,
329            outgoing,
330            incoming,
331            between,
332        })
333    }
334}
335
336#[cfg(test)]
337#[allow(clippy::unwrap_used, clippy::expect_used)]
338mod tests {
339    use super::*;
340    use crate::protocol::EdgeRule;
341
342    /// Build a minimal ATProto-like protocol for testing.
343    fn atproto_protocol() -> Protocol {
344        Protocol {
345            name: "atproto".to_owned(),
346            schema_theory: "ThATProtoSchema".to_owned(),
347            instance_theory: "ThWType".to_owned(),
348            edge_rules: vec![
349                EdgeRule {
350                    edge_kind: "record-schema".to_owned(),
351                    src_kinds: vec!["record".to_owned()],
352                    tgt_kinds: vec!["object".to_owned()],
353                },
354                EdgeRule {
355                    edge_kind: "prop".to_owned(),
356                    src_kinds: vec!["object".to_owned()],
357                    tgt_kinds: vec![
358                        "string".to_owned(),
359                        "integer".to_owned(),
360                        "object".to_owned(),
361                        "ref".to_owned(),
362                        "array".to_owned(),
363                        "union".to_owned(),
364                        "boolean".to_owned(),
365                    ],
366                },
367            ],
368            obj_kinds: vec![
369                "record".to_owned(),
370                "object".to_owned(),
371                "string".to_owned(),
372                "integer".to_owned(),
373                "ref".to_owned(),
374                "array".to_owned(),
375                "union".to_owned(),
376                "boolean".to_owned(),
377            ],
378            constraint_sorts: vec![
379                "maxLength".to_owned(),
380                "minLength".to_owned(),
381                "format".to_owned(),
382                "minimum".to_owned(),
383                "maximum".to_owned(),
384            ],
385            ..Protocol::default()
386        }
387    }
388
389    #[test]
390    fn build_atproto_schema() {
391        let proto = atproto_protocol();
392        let schema = SchemaBuilder::new(&proto)
393            .vertex("post", "record", Some("app.bsky.feed.post"))
394            .expect("vertex post")
395            .vertex("post:body", "object", None)
396            .expect("vertex body")
397            .vertex("post:body.text", "string", None)
398            .expect("vertex text")
399            .edge("post", "post:body", "record-schema", None)
400            .expect("edge record-schema")
401            .edge("post:body", "post:body.text", "prop", Some("text"))
402            .expect("edge prop")
403            .constraint("post:body.text", "maxLength", "3000")
404            .build()
405            .expect("build");
406
407        assert_eq!(schema.vertex_count(), 3);
408        assert_eq!(schema.edge_count(), 2);
409        assert_eq!(schema.outgoing_edges("post").len(), 1);
410        assert_eq!(schema.incoming_edges("post:body").len(), 1);
411        assert_eq!(
412            schema.nsids.get("post").map(AsRef::as_ref),
413            Some("app.bsky.feed.post")
414        );
415        assert_eq!(
416            schema.constraints.get("post:body.text").map(Vec::len),
417            Some(1)
418        );
419    }
420
421    #[test]
422    fn invalid_edge_rejected() {
423        let proto = atproto_protocol();
424        // Attempt to add a record-schema edge from string to integer (should fail).
425        let result = SchemaBuilder::new(&proto)
426            .vertex("s", "string", None)
427            .expect("vertex string")
428            .vertex("i", "integer", None)
429            .expect("vertex integer")
430            .edge("s", "i", "record-schema", None);
431
432        assert!(
433            matches!(result, Err(SchemaError::InvalidEdgeSource { .. })),
434            "expected InvalidEdgeSource"
435        );
436    }
437
438    #[test]
439    fn duplicate_vertex_rejected() {
440        let proto = atproto_protocol();
441        let result = SchemaBuilder::new(&proto)
442            .vertex("v", "record", None)
443            .expect("first vertex")
444            .vertex("v", "record", None);
445
446        assert!(
447            matches!(result, Err(SchemaError::DuplicateVertex(_))),
448            "expected DuplicateVertex"
449        );
450    }
451
452    #[test]
453    fn edge_to_missing_vertex_rejected() {
454        let proto = atproto_protocol();
455        let result = SchemaBuilder::new(&proto)
456            .vertex("a", "record", None)
457            .expect("vertex a")
458            .edge("a", "missing", "record-schema", None);
459
460        assert!(
461            matches!(result, Err(SchemaError::VertexNotFound(_))),
462            "expected VertexNotFound"
463        );
464    }
465
466    #[test]
467    fn empty_schema_rejected() {
468        let proto = atproto_protocol();
469        let result = SchemaBuilder::new(&proto).build();
470        assert!(
471            matches!(result, Err(SchemaError::EmptySchema)),
472            "expected EmptySchema"
473        );
474    }
475
476    #[test]
477    fn between_index_works() {
478        let proto = atproto_protocol();
479        let schema = SchemaBuilder::new(&proto)
480            .vertex("r", "record", None)
481            .expect("vertex r")
482            .vertex("o", "object", None)
483            .expect("vertex o")
484            .edge("r", "o", "record-schema", None)
485            .expect("edge")
486            .build()
487            .expect("build");
488
489        assert_eq!(schema.edges_between("r", "o").len(), 1);
490        assert_eq!(schema.edges_between("o", "r").len(), 0);
491    }
492}