panproto_schema/schema.rs
1//! Core schema data structures.
2//!
3//! A [`Schema`] is a model of a protocol's schema theory GAT. It stores
4//! vertices, binary edges, hyper-edges, constraints, required-edge
5//! declarations, and NSID mappings. Precomputed adjacency indices
6//! (`outgoing`, `incoming`, `between`) enable fast traversal.
7
8use std::collections::HashMap;
9
10use panproto_gat::Name;
11use serde::{Deserialize, Serialize};
12use smallvec::SmallVec;
13
14/// A schema vertex.
15///
16/// Each vertex has a unique `id`, a `kind` drawn from the protocol's
17/// recognized vertex kinds, and an optional NSID (namespace identifier).
18#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
19pub struct Vertex {
20 /// Unique vertex identifier within the schema.
21 pub id: Name,
22 /// The vertex kind (e.g., `"record"`, `"object"`, `"string"`).
23 pub kind: Name,
24 /// Optional namespace identifier (e.g., `"app.bsky.feed.post"`).
25 pub nsid: Option<Name>,
26}
27
28/// A binary edge between two vertices.
29///
30/// Edges are directed: they go from `src` to `tgt`. The `kind` determines
31/// the structural role (e.g., `"prop"`, `"record-schema"`), and `name`
32/// provides an optional label (e.g., the property name).
33#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
34pub struct Edge {
35 /// Source vertex ID.
36 pub src: Name,
37 /// Target vertex ID.
38 pub tgt: Name,
39 /// Edge kind (e.g., `"prop"`, `"record-schema"`).
40 pub kind: Name,
41 /// Optional edge label (e.g., a property name like `"text"`).
42 pub name: Option<Name>,
43}
44
45/// A hyper-edge (present only when the schema theory includes `ThHypergraph`).
46///
47/// Hyper-edges connect multiple vertices via a labeled signature.
48#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
49pub struct HyperEdge {
50 /// Unique hyper-edge identifier.
51 pub id: Name,
52 /// Hyper-edge kind.
53 pub kind: Name,
54 /// Maps label names to vertex IDs.
55 pub signature: HashMap<Name, Name>,
56 /// The label that identifies the parent vertex.
57 pub parent_label: Name,
58}
59
60/// A constraint on a vertex.
61///
62/// Constraints restrict the values a vertex can hold (e.g., maximum
63/// string length, format pattern).
64#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
65pub struct Constraint {
66 /// The constraint sort (e.g., `"maxLength"`, `"format"`).
67 pub sort: Name,
68 /// The constraint value (e.g., `"3000"`, `"at-uri"`).
69 pub value: String,
70}
71
72/// A variant in a coproduct (sum type / union).
73///
74/// Each variant is injected into a parent vertex (the union/coproduct)
75/// with an optional discriminant tag.
76#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
77pub struct Variant {
78 /// Unique variant identifier.
79 pub id: Name,
80 /// The parent coproduct vertex this variant belongs to.
81 pub parent_vertex: Name,
82 /// Optional discriminant tag.
83 pub tag: Option<Name>,
84}
85
86/// An ordering annotation on an edge.
87///
88/// Records that the children reached via this edge are ordered,
89/// with a specific position index.
90#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
91pub struct Ordering {
92 /// The edge being ordered.
93 pub edge: Edge,
94 /// Position in the ordered collection.
95 pub position: u32,
96}
97
98/// A recursion point (fixpoint marker) in the schema.
99///
100/// Marks a vertex as a recursive reference to another vertex,
101/// satisfying the fold-unfold law: `unfold(fold(v)) = v`.
102#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
103pub struct RecursionPoint {
104 /// The fixpoint marker vertex ID.
105 pub mu_id: Name,
106 /// The target vertex this unfolds to.
107 pub target_vertex: Name,
108}
109
110/// A span connecting two vertices through a common source.
111///
112/// Spans model correspondences, diffs, and migrations:
113/// `left ← span → right`.
114#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
115pub struct Span {
116 /// Unique span identifier.
117 pub id: Name,
118 /// Left vertex of the span.
119 pub left: Name,
120 /// Right vertex of the span.
121 pub right: Name,
122}
123
124/// Use-counting mode for an edge.
125///
126/// Captures the substructural distinction between edges that can
127/// be used freely (structural), exactly once (linear), or at most
128/// once (affine).
129#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
130pub enum UsageMode {
131 /// Can be used any number of times (default).
132 #[default]
133 Structural,
134 /// Must be used exactly once (e.g., protobuf `oneof`).
135 Linear,
136 /// Can be used at most once.
137 Affine,
138}
139
140/// Specification of a coercion between two value kinds.
141///
142/// Contains the forward coercion expression, an optional inverse for
143/// round-tripping, and the coercion class classifying the round-trip behavior.
144#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
145pub struct CoercionSpec {
146 /// Forward coercion expression (source to target).
147 pub forward: panproto_expr::Expr,
148 /// Inverse coercion expression (target to source) for the `put` direction.
149 pub inverse: Option<panproto_expr::Expr>,
150 /// Round-trip classification.
151 pub class: panproto_gat::CoercionClass,
152}
153
154/// A schema: a model of the protocol's schema theory.
155///
156/// Contains both the raw data (vertices, edges, constraints, etc.) and
157/// precomputed adjacency indices for efficient graph traversal.
158#[derive(Clone, Debug, Serialize, Deserialize)]
159pub struct Schema {
160 /// The protocol this schema belongs to.
161 pub protocol: String,
162 /// Vertices keyed by their ID.
163 pub vertices: HashMap<Name, Vertex>,
164 /// Edges keyed by the edge itself, value is the edge kind.
165 #[serde(with = "crate::serde_helpers::map_as_vec")]
166 pub edges: HashMap<Edge, Name>,
167 /// Hyper-edges keyed by their ID.
168 pub hyper_edges: HashMap<Name, HyperEdge>,
169 /// Constraints per vertex ID.
170 pub constraints: HashMap<Name, Vec<Constraint>>,
171 /// Required edges per vertex ID.
172 pub required: HashMap<Name, Vec<Edge>>,
173 /// NSID mapping: vertex ID to NSID string.
174 pub nsids: HashMap<Name, Name>,
175 /// Declared entry vertices.
176 ///
177 /// Semantically, this is the finite family of basepoints that makes
178 /// the schema a *pointed* schema: `E → Ob(C_S)` selecting the sorts
179 /// at which the W-algebra of instances may be rooted. Parsers set
180 /// this explicitly per their protocol's notion of a top-level
181 /// definition (a record, a top-level type, a path root, etc.);
182 /// consumers that need to choose an instance root consult it via
183 /// [`primary_entry`].
184 ///
185 /// Empty means the parser declined to supply a pointing; consumers
186 /// should then either fall back to a deterministic (but non-
187 /// canonical) selection or report an error. Order is preserved for
188 /// reproducibility; the set carries no duplicates.
189 #[serde(default)]
190 pub entries: Vec<Name>,
191
192 /// Coproduct variants per union vertex ID.
193 #[serde(default)]
194 pub variants: HashMap<Name, Vec<Variant>>,
195 /// Edge ordering positions (edge → position index).
196 #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
197 pub orderings: HashMap<Edge, u32>,
198 /// Recursion points (fixpoint markers).
199 #[serde(default)]
200 pub recursion_points: HashMap<Name, RecursionPoint>,
201 /// Spans connecting pairs of vertices.
202 #[serde(default)]
203 pub spans: HashMap<Name, Span>,
204 /// Edge usage modes (default: `Structural` for all).
205 #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
206 pub usage_modes: HashMap<Edge, UsageMode>,
207 /// Whether each vertex uses nominal identity (`true`) or
208 /// structural identity (`false`). Absent = structural.
209 #[serde(default)]
210 pub nominal: HashMap<Name, bool>,
211
212 // -- enrichment fields --
213 /// Coercion specifications: `(source_kind, target_kind)` to coercion spec.
214 #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
215 pub coercions: HashMap<(Name, Name), CoercionSpec>,
216 /// Merger expressions: `vertex_id` to merger expression.
217 #[serde(default)]
218 pub mergers: HashMap<Name, panproto_expr::Expr>,
219 /// Default value expressions: `vertex_id` to default expression.
220 #[serde(default)]
221 pub defaults: HashMap<Name, panproto_expr::Expr>,
222 /// Conflict resolution policy expressions: `sort_name` to policy expression.
223 #[serde(default)]
224 pub policies: HashMap<Name, panproto_expr::Expr>,
225
226 // -- precomputed indices --
227 /// Outgoing edges per vertex ID.
228 pub outgoing: HashMap<Name, SmallVec<Edge, 4>>,
229 /// Incoming edges per vertex ID.
230 pub incoming: HashMap<Name, SmallVec<Edge, 4>>,
231 /// Edges between a specific `(src, tgt)` pair.
232 #[serde(with = "crate::serde_helpers::map_as_vec")]
233 pub between: HashMap<(Name, Name), SmallVec<Edge, 2>>,
234}
235
236impl Schema {
237 /// Look up a vertex by ID.
238 #[must_use]
239 pub fn vertex(&self, id: &str) -> Option<&Vertex> {
240 self.vertices.get(id)
241 }
242
243 /// Return all outgoing edges from the given vertex.
244 #[must_use]
245 pub fn outgoing_edges(&self, vertex_id: &str) -> &[Edge] {
246 self.outgoing.get(vertex_id).map_or(&[], SmallVec::as_slice)
247 }
248
249 /// Return all incoming edges to the given vertex.
250 #[must_use]
251 pub fn incoming_edges(&self, vertex_id: &str) -> &[Edge] {
252 self.incoming.get(vertex_id).map_or(&[], SmallVec::as_slice)
253 }
254
255 /// Return edges between a specific `(src, tgt)` pair.
256 #[must_use]
257 #[inline]
258 pub fn edges_between(&self, src: &str, tgt: &str) -> &[Edge] {
259 self.between
260 .get(&(Name::from(src), Name::from(tgt)))
261 .map_or(&[], SmallVec::as_slice)
262 }
263
264 /// Returns `true` if the given vertex ID exists in this schema.
265 #[must_use]
266 #[inline]
267 pub fn has_vertex(&self, id: &str) -> bool {
268 self.vertices.contains_key(id)
269 }
270
271 /// Returns the number of vertices in the schema.
272 #[must_use]
273 pub fn vertex_count(&self) -> usize {
274 self.vertices.len()
275 }
276
277 /// Returns the number of edges in the schema.
278 #[must_use]
279 pub fn edge_count(&self) -> usize {
280 self.edges.len()
281 }
282
283 /// Return the declared entry vertices.
284 ///
285 /// See [`Schema::entries`] for semantics. Use [`primary_entry`] for
286 /// callers that need a single root and want a deterministic
287 /// fallback when no entries are declared.
288 #[must_use]
289 pub fn entry_vertices(&self) -> &[Name] {
290 &self.entries
291 }
292}
293
294/// Choose a single entry vertex for a schema.
295///
296/// Returns the first declared entry if any. Otherwise falls back to a
297/// deterministic, protocol-agnostic choice: among vertex ids sorted
298/// lexicographically, the first vertex that is a source of at least
299/// one edge and a target of none (an edgeless "root" in the signature
300/// graph); failing that, the first vertex that has outgoing edges at
301/// all; failing that, the lexicographically first vertex; `None` only
302/// for an empty schema.
303///
304/// The fallback is explicitly *non-canonical*: it exists so that legacy
305/// schemas without declared entries remain usable, but new parsers
306/// should always supply at least one entry via
307/// [`SchemaBuilder::entry`](crate::SchemaBuilder::entry) so that the
308/// pointing is part of the schema's semantics rather than recovered by
309/// heuristic.
310#[must_use]
311pub fn primary_entry(schema: &Schema) -> Option<&Name> {
312 if let Some(first) = schema.entries.first() {
313 return Some(first);
314 }
315
316 let mut ids: Vec<&Name> = schema.vertices.keys().collect();
317 ids.sort();
318
319 let has_outgoing = |id: &Name| -> bool {
320 schema
321 .outgoing
322 .get(id)
323 .is_some_and(|edges| !edges.is_empty())
324 };
325 let has_incoming = |id: &Name| -> bool {
326 schema
327 .incoming
328 .get(id)
329 .is_some_and(|edges| !edges.is_empty())
330 };
331
332 if let Some(id) = ids
333 .iter()
334 .copied()
335 .find(|id| has_outgoing(id) && !has_incoming(id))
336 {
337 return Some(id);
338 }
339 if let Some(id) = ids.iter().copied().find(|id| has_outgoing(id)) {
340 return Some(id);
341 }
342 ids.into_iter().next()
343}