use std::collections::HashMap;
use panproto_gat::Name;
use rustc_hash::FxHashSet;
use smallvec::SmallVec;
use crate::error::SchemaError;
use crate::protocol::Protocol;
use crate::schema::{CoercionSpec, Constraint, Edge, HyperEdge, Schema, Vertex};
use panproto_expr::Expr;
pub struct SchemaBuilder {
protocol: Protocol,
vertices: HashMap<Name, Vertex>,
edges: Vec<Edge>,
hyper_edges: HashMap<Name, HyperEdge>,
constraints: HashMap<Name, Vec<Constraint>>,
required: HashMap<Name, Vec<Edge>>,
nsids: HashMap<Name, Name>,
edge_set: FxHashSet<(Name, Name, Name, Option<Name>)>,
coercions: HashMap<(Name, Name), CoercionSpec>,
mergers: HashMap<Name, Expr>,
defaults: HashMap<Name, Expr>,
policies: HashMap<Name, Expr>,
}
impl SchemaBuilder {
#[must_use]
pub fn new(protocol: &Protocol) -> Self {
Self {
protocol: protocol.clone(),
vertices: HashMap::new(),
edges: Vec::new(),
hyper_edges: HashMap::new(),
constraints: HashMap::new(),
required: HashMap::new(),
nsids: HashMap::new(),
edge_set: FxHashSet::default(),
coercions: HashMap::new(),
mergers: HashMap::new(),
defaults: HashMap::new(),
policies: HashMap::new(),
}
}
pub fn vertex(mut self, id: &str, kind: &str, nsid: Option<&str>) -> Result<Self, SchemaError> {
if self.vertices.contains_key(id) {
return Err(SchemaError::DuplicateVertex(id.to_owned()));
}
if (!self.protocol.obj_kinds.is_empty() || !self.protocol.edge_rules.is_empty())
&& !self.protocol.is_known_vertex_kind(kind)
{
return Err(SchemaError::UnknownVertexKind(kind.to_owned()));
}
let vertex = Vertex {
id: Name::from(id),
kind: Name::from(kind),
nsid: nsid.map(Name::from),
};
if let Some(nsid_val) = nsid {
self.nsids.insert(Name::from(id), Name::from(nsid_val));
}
self.vertices.insert(Name::from(id), vertex);
Ok(self)
}
pub fn edge(
mut self,
src: &str,
tgt: &str,
kind: &str,
name: Option<&str>,
) -> Result<Self, SchemaError> {
let src_vertex = self
.vertices
.get(src)
.ok_or_else(|| SchemaError::VertexNotFound(src.to_owned()))?;
let tgt_vertex = self
.vertices
.get(tgt)
.ok_or_else(|| SchemaError::VertexNotFound(tgt.to_owned()))?;
if let Some(rule) = self.protocol.find_edge_rule(kind) {
if !rule.src_kinds.is_empty()
&& !rule.src_kinds.iter().any(|k| k == src_vertex.kind.as_ref())
{
return Err(SchemaError::InvalidEdgeSource {
kind: kind.to_owned(),
src_kind: src_vertex.kind.to_string(),
permitted: rule.src_kinds.join(", "),
});
}
if !rule.tgt_kinds.is_empty()
&& !rule.tgt_kinds.iter().any(|k| k == tgt_vertex.kind.as_ref())
{
return Err(SchemaError::InvalidEdgeTarget {
kind: kind.to_owned(),
tgt_kind: tgt_vertex.kind.to_string(),
permitted: rule.tgt_kinds.join(", "),
});
}
} else if !self.protocol.edge_rules.is_empty() {
return Err(SchemaError::UnknownEdgeKind(kind.to_owned()));
}
let edge_key = (
Name::from(src),
Name::from(tgt),
Name::from(kind),
name.map(Name::from),
);
if !self.edge_set.insert(edge_key) {
return Err(SchemaError::DuplicateEdge {
src: src.to_owned(),
tgt: tgt.to_owned(),
kind: kind.to_owned(),
});
}
let edge = Edge {
src: Name::from(src),
tgt: Name::from(tgt),
kind: Name::from(kind),
name: name.map(Name::from),
};
self.edges.push(edge);
Ok(self)
}
pub fn hyper_edge(
mut self,
id: &str,
kind: &str,
sig: HashMap<String, String>,
parent: &str,
) -> Result<Self, SchemaError> {
if self.hyper_edges.contains_key(id) {
return Err(SchemaError::DuplicateHyperEdge(id.to_owned()));
}
for (label, vertex_id) in &sig {
if !self.vertices.contains_key(vertex_id.as_str()) {
return Err(SchemaError::VertexNotFound(format!(
"{vertex_id} (in hyper-edge {id}, label {label})"
)));
}
}
let name_sig: HashMap<Name, Name> = sig
.into_iter()
.map(|(k, v)| (Name::from(k), Name::from(v)))
.collect();
let hyper_edge = HyperEdge {
id: Name::from(id),
kind: Name::from(kind),
signature: name_sig,
parent_label: Name::from(parent),
};
self.hyper_edges.insert(Name::from(id), hyper_edge);
Ok(self)
}
#[must_use]
pub fn constraint(mut self, vertex: &str, sort: &str, value: &str) -> Self {
self.constraints
.entry(Name::from(vertex))
.or_default()
.push(Constraint {
sort: Name::from(sort),
value: value.to_owned(),
});
self
}
#[must_use]
pub fn required(mut self, vertex: &str, edges: Vec<Edge>) -> Self {
self.required
.entry(Name::from(vertex))
.or_default()
.extend(edges);
self
}
#[must_use]
pub fn coercion(mut self, source_kind: &str, target_kind: &str, spec: CoercionSpec) -> Self {
self.coercions
.insert((Name::from(source_kind), Name::from(target_kind)), spec);
self
}
#[must_use]
pub fn merger(mut self, vertex_id: &str, expr: Expr) -> Self {
self.mergers.insert(Name::from(vertex_id), expr);
self
}
#[must_use]
pub fn default_expr(mut self, vertex_id: &str, expr: Expr) -> Self {
self.defaults.insert(Name::from(vertex_id), expr);
self
}
#[must_use]
pub fn policy(mut self, sort_name: &str, expr: Expr) -> Self {
self.policies.insert(Name::from(sort_name), expr);
self
}
pub fn build(self) -> Result<Schema, SchemaError> {
if self.vertices.is_empty() {
return Err(SchemaError::EmptySchema);
}
let mut edge_map: HashMap<Edge, Name> = HashMap::with_capacity(self.edges.len());
let mut outgoing: HashMap<Name, SmallVec<Edge, 4>> = HashMap::new();
let mut incoming: HashMap<Name, SmallVec<Edge, 4>> = HashMap::new();
let mut between: HashMap<(Name, Name), SmallVec<Edge, 2>> = HashMap::new();
for edge in &self.edges {
edge_map.insert(edge.clone(), edge.kind.clone());
outgoing
.entry(edge.src.clone())
.or_default()
.push(edge.clone());
incoming
.entry(edge.tgt.clone())
.or_default()
.push(edge.clone());
between
.entry((edge.src.clone(), edge.tgt.clone()))
.or_default()
.push(edge.clone());
}
Ok(Schema {
protocol: self.protocol.name.clone(),
vertices: self.vertices,
edges: edge_map,
hyper_edges: self.hyper_edges,
constraints: self.constraints,
required: self.required,
nsids: self.nsids,
variants: HashMap::new(),
orderings: HashMap::new(),
recursion_points: HashMap::new(),
spans: HashMap::new(),
usage_modes: HashMap::new(),
nominal: HashMap::new(),
coercions: self.coercions,
mergers: self.mergers,
defaults: self.defaults,
policies: self.policies,
outgoing,
incoming,
between,
})
}
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
use crate::protocol::EdgeRule;
fn atproto_protocol() -> Protocol {
Protocol {
name: "atproto".to_owned(),
schema_theory: "ThATProtoSchema".to_owned(),
instance_theory: "ThWType".to_owned(),
edge_rules: vec![
EdgeRule {
edge_kind: "record-schema".to_owned(),
src_kinds: vec!["record".to_owned()],
tgt_kinds: vec!["object".to_owned()],
},
EdgeRule {
edge_kind: "prop".to_owned(),
src_kinds: vec!["object".to_owned()],
tgt_kinds: vec![
"string".to_owned(),
"integer".to_owned(),
"object".to_owned(),
"ref".to_owned(),
"array".to_owned(),
"union".to_owned(),
"boolean".to_owned(),
],
},
],
obj_kinds: vec![
"record".to_owned(),
"object".to_owned(),
"string".to_owned(),
"integer".to_owned(),
"ref".to_owned(),
"array".to_owned(),
"union".to_owned(),
"boolean".to_owned(),
],
constraint_sorts: vec![
"maxLength".to_owned(),
"minLength".to_owned(),
"format".to_owned(),
"minimum".to_owned(),
"maximum".to_owned(),
],
..Protocol::default()
}
}
#[test]
fn build_atproto_schema() {
let proto = atproto_protocol();
let schema = SchemaBuilder::new(&proto)
.vertex("post", "record", Some("app.bsky.feed.post"))
.expect("vertex post")
.vertex("post:body", "object", None)
.expect("vertex body")
.vertex("post:body.text", "string", None)
.expect("vertex text")
.edge("post", "post:body", "record-schema", None)
.expect("edge record-schema")
.edge("post:body", "post:body.text", "prop", Some("text"))
.expect("edge prop")
.constraint("post:body.text", "maxLength", "3000")
.build()
.expect("build");
assert_eq!(schema.vertex_count(), 3);
assert_eq!(schema.edge_count(), 2);
assert_eq!(schema.outgoing_edges("post").len(), 1);
assert_eq!(schema.incoming_edges("post:body").len(), 1);
assert_eq!(
schema.nsids.get("post").map(AsRef::as_ref),
Some("app.bsky.feed.post")
);
assert_eq!(
schema.constraints.get("post:body.text").map(Vec::len),
Some(1)
);
}
#[test]
fn invalid_edge_rejected() {
let proto = atproto_protocol();
let result = SchemaBuilder::new(&proto)
.vertex("s", "string", None)
.expect("vertex string")
.vertex("i", "integer", None)
.expect("vertex integer")
.edge("s", "i", "record-schema", None);
assert!(
matches!(result, Err(SchemaError::InvalidEdgeSource { .. })),
"expected InvalidEdgeSource"
);
}
#[test]
fn duplicate_vertex_rejected() {
let proto = atproto_protocol();
let result = SchemaBuilder::new(&proto)
.vertex("v", "record", None)
.expect("first vertex")
.vertex("v", "record", None);
assert!(
matches!(result, Err(SchemaError::DuplicateVertex(_))),
"expected DuplicateVertex"
);
}
#[test]
fn edge_to_missing_vertex_rejected() {
let proto = atproto_protocol();
let result = SchemaBuilder::new(&proto)
.vertex("a", "record", None)
.expect("vertex a")
.edge("a", "missing", "record-schema", None);
assert!(
matches!(result, Err(SchemaError::VertexNotFound(_))),
"expected VertexNotFound"
);
}
#[test]
fn empty_schema_rejected() {
let proto = atproto_protocol();
let result = SchemaBuilder::new(&proto).build();
assert!(
matches!(result, Err(SchemaError::EmptySchema)),
"expected EmptySchema"
);
}
#[test]
fn between_index_works() {
let proto = atproto_protocol();
let schema = SchemaBuilder::new(&proto)
.vertex("r", "record", None)
.expect("vertex r")
.vertex("o", "object", None)
.expect("vertex o")
.edge("r", "o", "record-schema", None)
.expect("edge")
.build()
.expect("build");
assert_eq!(schema.edges_between("r", "o").len(), 1);
assert_eq!(schema.edges_between("o", "r").len(), 0);
}
}