panproto_schema/schema.rs
1//! Core schema data structures.
2//!
3//! A [`Schema`] is a model of a protocol's schema theory GAT. It stores
4//! vertices, binary edges, hyper-edges, constraints, required-edge
5//! declarations, and NSID mappings. Precomputed adjacency indices
6//! (`outgoing`, `incoming`, `between`) enable fast traversal.
7
8use std::collections::HashMap;
9
10use panproto_gat::Name;
11use serde::{Deserialize, Serialize};
12use smallvec::SmallVec;
13
14/// A schema vertex.
15///
16/// Each vertex has a unique `id`, a `kind` drawn from the protocol's
17/// recognized vertex kinds, and an optional NSID (namespace identifier).
18#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
19pub struct Vertex {
20 /// Unique vertex identifier within the schema.
21 pub id: Name,
22 /// The vertex kind (e.g., `"record"`, `"object"`, `"string"`).
23 pub kind: Name,
24 /// Optional namespace identifier (e.g., `"app.bsky.feed.post"`).
25 pub nsid: Option<Name>,
26}
27
28/// A binary edge between two vertices.
29///
30/// Edges are directed: they go from `src` to `tgt`. The `kind` determines
31/// the structural role (e.g., `"prop"`, `"record-schema"`), and `name`
32/// provides an optional label (e.g., the property name).
33#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
34pub struct Edge {
35 /// Source vertex ID.
36 pub src: Name,
37 /// Target vertex ID.
38 pub tgt: Name,
39 /// Edge kind (e.g., `"prop"`, `"record-schema"`).
40 pub kind: Name,
41 /// Optional edge label (e.g., a property name like `"text"`).
42 pub name: Option<Name>,
43}
44
45/// A hyper-edge (present only when the schema theory includes `ThHypergraph`).
46///
47/// Hyper-edges connect multiple vertices via a labeled signature.
48#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
49pub struct HyperEdge {
50 /// Unique hyper-edge identifier.
51 pub id: Name,
52 /// Hyper-edge kind.
53 pub kind: Name,
54 /// Maps label names to vertex IDs.
55 pub signature: HashMap<Name, Name>,
56 /// The label that identifies the parent vertex.
57 pub parent_label: Name,
58}
59
60/// A constraint on a vertex.
61///
62/// Constraints restrict the values a vertex can hold (e.g., maximum
63/// string length, format pattern).
64#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
65pub struct Constraint {
66 /// The constraint sort (e.g., `"maxLength"`, `"format"`).
67 pub sort: Name,
68 /// The constraint value (e.g., `"3000"`, `"at-uri"`).
69 pub value: String,
70}
71
72/// A variant in a coproduct (sum type / union).
73///
74/// Each variant is injected into a parent vertex (the union/coproduct)
75/// with an optional discriminant tag.
76#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
77pub struct Variant {
78 /// Unique variant identifier.
79 pub id: Name,
80 /// The parent coproduct vertex this variant belongs to.
81 pub parent_vertex: Name,
82 /// Optional discriminant tag.
83 pub tag: Option<Name>,
84}
85
86/// An ordering annotation on an edge.
87///
88/// Records that the children reached via this edge are ordered,
89/// with a specific position index.
90#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
91pub struct Ordering {
92 /// The edge being ordered.
93 pub edge: Edge,
94 /// Position in the ordered collection.
95 pub position: u32,
96}
97
98/// A recursion point (fixpoint marker) in the schema.
99///
100/// Marks a vertex as a recursive reference to another vertex,
101/// satisfying the fold-unfold law: `unfold(fold(v)) = v`.
102#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
103pub struct RecursionPoint {
104 /// The fixpoint marker vertex ID.
105 pub mu_id: Name,
106 /// The target vertex this unfolds to.
107 pub target_vertex: Name,
108}
109
110/// A span connecting two vertices through a common source.
111///
112/// Spans model correspondences, diffs, and migrations:
113/// `left ← span → right`.
114#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
115pub struct Span {
116 /// Unique span identifier.
117 pub id: Name,
118 /// Left vertex of the span.
119 pub left: Name,
120 /// Right vertex of the span.
121 pub right: Name,
122}
123
124/// Use-counting mode for an edge.
125///
126/// Captures the substructural distinction between edges that can
127/// be used freely (structural), exactly once (linear), or at most
128/// once (affine).
129#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
130pub enum UsageMode {
131 /// Can be used any number of times (default).
132 #[default]
133 Structural,
134 /// Must be used exactly once (e.g., protobuf `oneof`).
135 Linear,
136 /// Can be used at most once.
137 Affine,
138}
139
140/// Specification of a coercion between two value kinds.
141///
142/// Contains the forward coercion expression, an optional inverse for
143/// round-tripping, and the coercion class classifying the round-trip behavior.
144#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
145pub struct CoercionSpec {
146 /// Forward coercion expression (source to target).
147 pub forward: panproto_expr::Expr,
148 /// Inverse coercion expression (target to source) for the `put` direction.
149 pub inverse: Option<panproto_expr::Expr>,
150 /// Round-trip classification.
151 pub class: panproto_gat::CoercionClass,
152}
153
154/// A schema: a model of the protocol's schema theory.
155///
156/// Contains both the raw data (vertices, edges, constraints, etc.) and
157/// precomputed adjacency indices for efficient graph traversal.
158#[derive(Clone, Debug, Serialize, Deserialize)]
159pub struct Schema {
160 /// The protocol this schema belongs to.
161 pub protocol: String,
162 /// Vertices keyed by their ID.
163 pub vertices: HashMap<Name, Vertex>,
164 /// Edges keyed by the edge itself, value is the edge kind.
165 #[serde(with = "crate::serde_helpers::map_as_vec")]
166 pub edges: HashMap<Edge, Name>,
167 /// Hyper-edges keyed by their ID.
168 pub hyper_edges: HashMap<Name, HyperEdge>,
169 /// Constraints per vertex ID.
170 pub constraints: HashMap<Name, Vec<Constraint>>,
171 /// Required edges per vertex ID.
172 pub required: HashMap<Name, Vec<Edge>>,
173 /// NSID mapping: vertex ID to NSID string.
174 pub nsids: HashMap<Name, Name>,
175
176 /// Coproduct variants per union vertex ID.
177 #[serde(default)]
178 pub variants: HashMap<Name, Vec<Variant>>,
179 /// Edge ordering positions (edge → position index).
180 #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
181 pub orderings: HashMap<Edge, u32>,
182 /// Recursion points (fixpoint markers).
183 #[serde(default)]
184 pub recursion_points: HashMap<Name, RecursionPoint>,
185 /// Spans connecting pairs of vertices.
186 #[serde(default)]
187 pub spans: HashMap<Name, Span>,
188 /// Edge usage modes (default: `Structural` for all).
189 #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
190 pub usage_modes: HashMap<Edge, UsageMode>,
191 /// Whether each vertex uses nominal identity (`true`) or
192 /// structural identity (`false`). Absent = structural.
193 #[serde(default)]
194 pub nominal: HashMap<Name, bool>,
195
196 // -- enrichment fields --
197 /// Coercion specifications: `(source_kind, target_kind)` to coercion spec.
198 #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
199 pub coercions: HashMap<(Name, Name), CoercionSpec>,
200 /// Merger expressions: `vertex_id` to merger expression.
201 #[serde(default)]
202 pub mergers: HashMap<Name, panproto_expr::Expr>,
203 /// Default value expressions: `vertex_id` to default expression.
204 #[serde(default)]
205 pub defaults: HashMap<Name, panproto_expr::Expr>,
206 /// Conflict resolution policy expressions: `sort_name` to policy expression.
207 #[serde(default)]
208 pub policies: HashMap<Name, panproto_expr::Expr>,
209
210 // -- precomputed indices --
211 /// Outgoing edges per vertex ID.
212 pub outgoing: HashMap<Name, SmallVec<Edge, 4>>,
213 /// Incoming edges per vertex ID.
214 pub incoming: HashMap<Name, SmallVec<Edge, 4>>,
215 /// Edges between a specific `(src, tgt)` pair.
216 #[serde(with = "crate::serde_helpers::map_as_vec")]
217 pub between: HashMap<(Name, Name), SmallVec<Edge, 2>>,
218}
219
220impl Schema {
221 /// Look up a vertex by ID.
222 #[must_use]
223 pub fn vertex(&self, id: &str) -> Option<&Vertex> {
224 self.vertices.get(id)
225 }
226
227 /// Return all outgoing edges from the given vertex.
228 #[must_use]
229 pub fn outgoing_edges(&self, vertex_id: &str) -> &[Edge] {
230 self.outgoing.get(vertex_id).map_or(&[], SmallVec::as_slice)
231 }
232
233 /// Return all incoming edges to the given vertex.
234 #[must_use]
235 pub fn incoming_edges(&self, vertex_id: &str) -> &[Edge] {
236 self.incoming.get(vertex_id).map_or(&[], SmallVec::as_slice)
237 }
238
239 /// Return edges between a specific `(src, tgt)` pair.
240 #[must_use]
241 #[inline]
242 pub fn edges_between(&self, src: &str, tgt: &str) -> &[Edge] {
243 self.between
244 .get(&(Name::from(src), Name::from(tgt)))
245 .map_or(&[], SmallVec::as_slice)
246 }
247
248 /// Returns `true` if the given vertex ID exists in this schema.
249 #[must_use]
250 #[inline]
251 pub fn has_vertex(&self, id: &str) -> bool {
252 self.vertices.contains_key(id)
253 }
254
255 /// Returns the number of vertices in the schema.
256 #[must_use]
257 pub fn vertex_count(&self) -> usize {
258 self.vertices.len()
259 }
260
261 /// Returns the number of edges in the schema.
262 #[must_use]
263 pub fn edge_count(&self) -> usize {
264 self.edges.len()
265 }
266}