panproto_schema/schema.rs
1//! Core schema data structures.
2//!
3//! A [`Schema`] is a model of a protocol's schema theory GAT. It stores
4//! vertices, binary edges, hyper-edges, constraints, required-edge
5//! declarations, and NSID mappings. Precomputed adjacency indices
6//! (`outgoing`, `incoming`, `between`) enable fast traversal.
7
8use std::collections::HashMap;
9
10use panproto_gat::Name;
11use serde::{Deserialize, Serialize};
12use smallvec::SmallVec;
13
14/// A schema vertex.
15///
16/// Each vertex has a unique `id`, a `kind` drawn from the protocol's
17/// recognized vertex kinds, and an optional NSID (namespace identifier).
18#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
19pub struct Vertex {
20 /// Unique vertex identifier within the schema.
21 pub id: Name,
22 /// The vertex kind (e.g., `"record"`, `"object"`, `"string"`).
23 pub kind: Name,
24 /// Optional namespace identifier (e.g., `"app.bsky.feed.post"`).
25 pub nsid: Option<Name>,
26}
27
28/// A binary edge between two vertices.
29///
30/// Edges are directed: they go from `src` to `tgt`. The `kind` determines
31/// the structural role (e.g., `"prop"`, `"record-schema"`), and `name`
32/// provides an optional label (e.g., the property name).
33#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
34pub struct Edge {
35 /// Source vertex ID.
36 pub src: Name,
37 /// Target vertex ID.
38 pub tgt: Name,
39 /// Edge kind (e.g., `"prop"`, `"record-schema"`).
40 pub kind: Name,
41 /// Optional edge label (e.g., a property name like `"text"`).
42 pub name: Option<Name>,
43}
44
45/// A hyper-edge (present only when the schema theory includes `ThHypergraph`).
46///
47/// Hyper-edges connect multiple vertices via a labeled signature.
48#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
49pub struct HyperEdge {
50 /// Unique hyper-edge identifier.
51 pub id: Name,
52 /// Hyper-edge kind.
53 pub kind: Name,
54 /// Maps label names to vertex IDs.
55 pub signature: HashMap<Name, Name>,
56 /// The label that identifies the parent vertex.
57 pub parent_label: Name,
58}
59
60/// A constraint on a vertex.
61///
62/// Constraints restrict the values a vertex can hold (e.g., maximum
63/// string length, format pattern).
64#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
65pub struct Constraint {
66 /// The constraint sort (e.g., `"maxLength"`, `"format"`).
67 pub sort: Name,
68 /// The constraint value (e.g., `"3000"`, `"at-uri"`).
69 pub value: String,
70}
71
72/// A variant in a coproduct (sum type / union).
73///
74/// Each variant is injected into a parent vertex (the union/coproduct)
75/// with an optional discriminant tag.
76#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
77pub struct Variant {
78 /// Unique variant identifier.
79 pub id: Name,
80 /// The parent coproduct vertex this variant belongs to.
81 pub parent_vertex: Name,
82 /// Optional discriminant tag.
83 pub tag: Option<Name>,
84}
85
86/// An ordering annotation on an edge.
87///
88/// Records that the children reached via this edge are ordered,
89/// with a specific position index.
90#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
91pub struct Ordering {
92 /// The edge being ordered.
93 pub edge: Edge,
94 /// Position in the ordered collection.
95 pub position: u32,
96}
97
98/// A recursion point (fixpoint marker) in the schema.
99///
100/// Marks a vertex as a recursive reference to another vertex,
101/// satisfying the fold-unfold law: `unfold(fold(v)) = v`.
102#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
103pub struct RecursionPoint {
104 /// The fixpoint marker vertex ID.
105 pub mu_id: Name,
106 /// The target vertex this unfolds to.
107 pub target_vertex: Name,
108}
109
110/// A span connecting two vertices through a common source.
111///
112/// Spans model correspondences, diffs, and migrations:
113/// `left ← span → right`.
114#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
115pub struct Span {
116 /// Unique span identifier.
117 pub id: Name,
118 /// Left vertex of the span.
119 pub left: Name,
120 /// Right vertex of the span.
121 pub right: Name,
122}
123
124/// Use-counting mode for an edge.
125///
126/// Captures the substructural distinction between edges that can
127/// be used freely (structural), exactly once (linear), or at most
128/// once (affine).
129#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
130pub enum UsageMode {
131 /// Can be used any number of times (default).
132 #[default]
133 Structural,
134 /// Must be used exactly once (e.g., protobuf `oneof`).
135 Linear,
136 /// Can be used at most once.
137 Affine,
138}
139
140/// Specification of a coercion between two value kinds.
141///
142/// Contains the forward coercion expression, an optional inverse for
143/// round-tripping, and the coercion class classifying the round-trip behavior.
144#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
145pub struct CoercionSpec {
146 /// Forward coercion expression (source to target).
147 pub forward: panproto_expr::Expr,
148 /// Inverse coercion expression (target to source) for the `put` direction.
149 pub inverse: Option<panproto_expr::Expr>,
150 /// Round-trip classification.
151 pub class: panproto_gat::CoercionClass,
152}
153
154/// A schema: a model of the protocol's schema theory.
155///
156/// Contains both the raw data (vertices, edges, constraints, etc.) and
157/// precomputed adjacency indices for efficient graph traversal.
158#[derive(Clone, Debug, Serialize, Deserialize)]
159pub struct Schema {
160 /// The protocol this schema belongs to.
161 pub protocol: String,
162 /// Vertices keyed by their ID.
163 pub vertices: HashMap<Name, Vertex>,
164 /// Edges keyed by the edge itself, value is the edge kind.
165 #[serde(with = "crate::serde_helpers::map_as_vec")]
166 pub edges: HashMap<Edge, Name>,
167 /// Hyper-edges keyed by their ID.
168 pub hyper_edges: HashMap<Name, HyperEdge>,
169 /// Constraints per vertex ID.
170 pub constraints: HashMap<Name, Vec<Constraint>>,
171 /// Required edges per vertex ID.
172 pub required: HashMap<Name, Vec<Edge>>,
173 /// NSID mapping: vertex ID to NSID string.
174 pub nsids: HashMap<Name, Name>,
175 /// Declared entry vertices.
176 ///
177 /// Semantically, this is the finite family of basepoints that makes
178 /// the schema a *pointed* schema: `E → Ob(C_S)` selecting the sorts
179 /// at which the W-algebra of instances may be rooted. Parsers set
180 /// this explicitly per their protocol's notion of a top-level
181 /// definition (a record, a top-level type, a path root, etc.);
182 /// consumers that need to choose an instance root consult it via
183 /// [`primary_entry`].
184 ///
185 /// Empty means the parser declined to supply a pointing; consumers
186 /// should then either fall back to a deterministic (but non-
187 /// canonical) selection or report an error. Order is preserved for
188 /// reproducibility; the set carries no duplicates.
189 #[serde(default)]
190 pub entries: Vec<Name>,
191
192 /// Coproduct variants per union vertex ID.
193 #[serde(default)]
194 pub variants: HashMap<Name, Vec<Variant>>,
195 /// Edge ordering positions (edge → position index).
196 #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
197 pub orderings: HashMap<Edge, u32>,
198 /// Recursion points (fixpoint markers).
199 #[serde(default)]
200 pub recursion_points: HashMap<Name, RecursionPoint>,
201 /// Spans connecting pairs of vertices.
202 #[serde(default)]
203 pub spans: HashMap<Name, Span>,
204 /// Edge usage modes (default: `Structural` for all).
205 #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
206 pub usage_modes: HashMap<Edge, UsageMode>,
207 /// Whether each vertex uses nominal identity (`true`) or
208 /// structural identity (`false`). Absent = structural.
209 #[serde(default)]
210 pub nominal: HashMap<Name, bool>,
211
212 // -- enrichment fields --
213 /// Coercion specifications: `(source_kind, target_kind)` to coercion spec.
214 #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
215 pub coercions: HashMap<(Name, Name), CoercionSpec>,
216 /// Merger expressions: `vertex_id` to merger expression.
217 #[serde(default)]
218 pub mergers: HashMap<Name, panproto_expr::Expr>,
219 /// Default value expressions: `vertex_id` to default expression.
220 #[serde(default)]
221 pub defaults: HashMap<Name, panproto_expr::Expr>,
222 /// Conflict resolution policy expressions: `sort_name` to policy expression.
223 #[serde(default)]
224 pub policies: HashMap<Name, panproto_expr::Expr>,
225
226 // -- precomputed indices --
227 /// Outgoing edges per vertex ID.
228 pub outgoing: HashMap<Name, SmallVec<Edge, 4>>,
229 /// Incoming edges per vertex ID.
230 pub incoming: HashMap<Name, SmallVec<Edge, 4>>,
231 /// Edges between a specific `(src, tgt)` pair.
232 #[serde(with = "crate::serde_helpers::map_as_vec")]
233 pub between: HashMap<(Name, Name), SmallVec<Edge, 2>>,
234}
235
236impl Schema {
237 /// Strip every constraint whose sort belongs to the layout
238 /// enrichment fibre (per [`panproto_gat::is_layout_sort`]).
239 ///
240 /// This is the schema-level forgetful U sending a decorated schema
241 /// to its abstract base in the parse/decorate/emit lens. Idempotent.
242 #[must_use]
243 pub fn forget_layout(&self) -> Self {
244 let mut clone = self.clone();
245 clone.forget_layout_in_place();
246 clone
247 }
248
249 /// In-place variant of [`Self::forget_layout`].
250 pub fn forget_layout_in_place(&mut self) {
251 for constraints in self.constraints.values_mut() {
252 constraints.retain(|c| !panproto_gat::is_layout_sort(c.sort.as_ref()));
253 }
254 // Drop now-empty vertex entries so equality is structural.
255 self.constraints.retain(|_, cs| !cs.is_empty());
256 }
257
258 /// Returns `true` when no constraint sort belongs to the layout
259 /// enrichment fibre. This is the well-formedness predicate for an
260 /// [`AbstractSchema`](crate::AbstractSchema).
261 #[must_use]
262 pub fn is_layout_free(&self) -> bool {
263 self.constraints.values().all(|cs| {
264 cs.iter()
265 .all(|c| !panproto_gat::is_layout_sort(c.sort.as_ref()))
266 })
267 }
268
269 /// Look up a vertex by ID.
270 #[must_use]
271 pub fn vertex(&self, id: &str) -> Option<&Vertex> {
272 self.vertices.get(id)
273 }
274
275 /// Return all outgoing edges from the given vertex.
276 #[must_use]
277 pub fn outgoing_edges(&self, vertex_id: &str) -> &[Edge] {
278 self.outgoing.get(vertex_id).map_or(&[], SmallVec::as_slice)
279 }
280
281 /// Return all incoming edges to the given vertex.
282 #[must_use]
283 pub fn incoming_edges(&self, vertex_id: &str) -> &[Edge] {
284 self.incoming.get(vertex_id).map_or(&[], SmallVec::as_slice)
285 }
286
287 /// Return edges between a specific `(src, tgt)` pair.
288 #[must_use]
289 #[inline]
290 pub fn edges_between(&self, src: &str, tgt: &str) -> &[Edge] {
291 self.between
292 .get(&(Name::from(src), Name::from(tgt)))
293 .map_or(&[], SmallVec::as_slice)
294 }
295
296 /// Returns `true` if the given vertex ID exists in this schema.
297 #[must_use]
298 #[inline]
299 pub fn has_vertex(&self, id: &str) -> bool {
300 self.vertices.contains_key(id)
301 }
302
303 /// Returns the number of vertices in the schema.
304 #[must_use]
305 pub fn vertex_count(&self) -> usize {
306 self.vertices.len()
307 }
308
309 /// Returns the number of edges in the schema.
310 #[must_use]
311 pub fn edge_count(&self) -> usize {
312 self.edges.len()
313 }
314
315 /// Return the declared entry vertices.
316 ///
317 /// See [`Schema::entries`] for semantics. Use [`primary_entry`] for
318 /// callers that need a single root and want a deterministic
319 /// fallback when no entries are declared.
320 #[must_use]
321 pub fn entry_vertices(&self) -> &[Name] {
322 &self.entries
323 }
324
325 /// Return every constraint attached to the given vertex.
326 ///
327 /// Tree-sitter-derived schemas attach byte ranges, interstitials,
328 /// formatting, and `field:<name>` entries here.
329 #[must_use]
330 pub fn constraints_for(&self, vertex_id: &str) -> &[Constraint] {
331 self.constraints
332 .get(&Name::from(vertex_id))
333 .map_or(&[], Vec::as_slice)
334 }
335
336 /// Return the text value of a tree-sitter `field('<name>', ...)`
337 /// anonymous-token child on the given vertex, if any.
338 ///
339 /// Tree-sitter rules of the form
340 /// `field('op', choice('+', '-', '*', '/'))` attach a field name to
341 /// an unnamed token alternative. The walker emits the token's text
342 /// as a `field:<name>` constraint on the parent vertex; this is the
343 /// supported accessor for that text.
344 ///
345 /// Returns `None` if no `field:<name>` constraint exists on
346 /// `vertex_id`. Named-node field children continue to surface as
347 /// edges (use [`outgoing_edges`](Self::outgoing_edges) and filter
348 /// by [`Edge::kind`](crate::Edge::kind) for those).
349 #[must_use]
350 pub fn field_text(&self, vertex_id: &str, field_name: &str) -> Option<&str> {
351 let sort = format!("field:{field_name}");
352 self.constraints
353 .get(&Name::from(vertex_id))?
354 .iter()
355 .find(|c| c.sort.as_ref() == sort.as_str())
356 .map(|c| c.value.as_str())
357 }
358}
359
360/// Choose a single entry vertex for a schema.
361///
362/// Returns the first declared entry if any. Otherwise falls back to a
363/// deterministic, protocol-agnostic choice: among vertex ids sorted
364/// lexicographically, the first vertex that is a source of at least
365/// one edge and a target of none (an edgeless "root" in the signature
366/// graph); failing that, the first vertex that has outgoing edges at
367/// all; failing that, the lexicographically first vertex; `None` only
368/// for an empty schema.
369///
370/// The fallback is explicitly *non-canonical*: it exists so that legacy
371/// schemas without declared entries remain usable, but new parsers
372/// should always supply at least one entry via
373/// [`SchemaBuilder::entry`](crate::SchemaBuilder::entry) so that the
374/// pointing is part of the schema's semantics rather than recovered by
375/// heuristic.
376#[must_use]
377pub fn primary_entry(schema: &Schema) -> Option<&Name> {
378 if let Some(first) = schema.entries.first() {
379 return Some(first);
380 }
381
382 let mut ids: Vec<&Name> = schema.vertices.keys().collect();
383 ids.sort();
384
385 let has_outgoing = |id: &Name| -> bool {
386 schema
387 .outgoing
388 .get(id)
389 .is_some_and(|edges| !edges.is_empty())
390 };
391 let has_incoming = |id: &Name| -> bool {
392 schema
393 .incoming
394 .get(id)
395 .is_some_and(|edges| !edges.is_empty())
396 };
397
398 if let Some(id) = ids
399 .iter()
400 .copied()
401 .find(|id| has_outgoing(id) && !has_incoming(id))
402 {
403 return Some(id);
404 }
405 if let Some(id) = ids.iter().copied().find(|id| has_outgoing(id)) {
406 return Some(id);
407 }
408 ids.into_iter().next()
409}