Skip to main content

panproto_schema/
schema.rs

1//! Core schema data structures.
2//!
3//! A [`Schema`] is a model of a protocol's schema theory GAT. It stores
4//! vertices, binary edges, hyper-edges, constraints, required-edge
5//! declarations, and NSID mappings. Precomputed adjacency indices
6//! (`outgoing`, `incoming`, `between`) enable fast traversal.
7
8use std::collections::HashMap;
9
10use panproto_gat::Name;
11use serde::{Deserialize, Serialize};
12use smallvec::SmallVec;
13
14/// A schema vertex.
15///
16/// Each vertex has a unique `id`, a `kind` drawn from the protocol's
17/// recognized vertex kinds, and an optional NSID (namespace identifier).
18#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
19pub struct Vertex {
20    /// Unique vertex identifier within the schema.
21    pub id: Name,
22    /// The vertex kind (e.g., `"record"`, `"object"`, `"string"`).
23    pub kind: Name,
24    /// Optional namespace identifier (e.g., `"app.bsky.feed.post"`).
25    pub nsid: Option<Name>,
26}
27
28/// A binary edge between two vertices.
29///
30/// Edges are directed: they go from `src` to `tgt`. The `kind` determines
31/// the structural role (e.g., `"prop"`, `"record-schema"`), and `name`
32/// provides an optional label (e.g., the property name).
33#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
34pub struct Edge {
35    /// Source vertex ID.
36    pub src: Name,
37    /// Target vertex ID.
38    pub tgt: Name,
39    /// Edge kind (e.g., `"prop"`, `"record-schema"`).
40    pub kind: Name,
41    /// Optional edge label (e.g., a property name like `"text"`).
42    pub name: Option<Name>,
43}
44
45/// A hyper-edge (present only when the schema theory includes `ThHypergraph`).
46///
47/// Hyper-edges connect multiple vertices via a labeled signature.
48#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
49pub struct HyperEdge {
50    /// Unique hyper-edge identifier.
51    pub id: Name,
52    /// Hyper-edge kind.
53    pub kind: Name,
54    /// Maps label names to vertex IDs.
55    pub signature: HashMap<Name, Name>,
56    /// The label that identifies the parent vertex.
57    pub parent_label: Name,
58}
59
60/// A constraint on a vertex.
61///
62/// Constraints restrict the values a vertex can hold (e.g., maximum
63/// string length, format pattern).
64#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
65pub struct Constraint {
66    /// The constraint sort (e.g., `"maxLength"`, `"format"`).
67    pub sort: Name,
68    /// The constraint value (e.g., `"3000"`, `"at-uri"`).
69    pub value: String,
70}
71
72/// A variant in a coproduct (sum type / union).
73///
74/// Each variant is injected into a parent vertex (the union/coproduct)
75/// with an optional discriminant tag.
76#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
77pub struct Variant {
78    /// Unique variant identifier.
79    pub id: Name,
80    /// The parent coproduct vertex this variant belongs to.
81    pub parent_vertex: Name,
82    /// Optional discriminant tag.
83    pub tag: Option<Name>,
84}
85
86/// An ordering annotation on an edge.
87///
88/// Records that the children reached via this edge are ordered,
89/// with a specific position index.
90#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
91pub struct Ordering {
92    /// The edge being ordered.
93    pub edge: Edge,
94    /// Position in the ordered collection.
95    pub position: u32,
96}
97
98/// A recursion point (fixpoint marker) in the schema.
99///
100/// Marks a vertex as a recursive reference to another vertex,
101/// satisfying the fold-unfold law: `unfold(fold(v)) = v`.
102#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
103pub struct RecursionPoint {
104    /// The fixpoint marker vertex ID.
105    pub mu_id: Name,
106    /// The target vertex this unfolds to.
107    pub target_vertex: Name,
108}
109
110/// A span connecting two vertices through a common source.
111///
112/// Spans model correspondences, diffs, and migrations:
113/// `left ← span → right`.
114#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
115pub struct Span {
116    /// Unique span identifier.
117    pub id: Name,
118    /// Left vertex of the span.
119    pub left: Name,
120    /// Right vertex of the span.
121    pub right: Name,
122}
123
124/// Use-counting mode for an edge.
125///
126/// Captures the substructural distinction between edges that can
127/// be used freely (structural), exactly once (linear), or at most
128/// once (affine).
129#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
130pub enum UsageMode {
131    /// Can be used any number of times (default).
132    #[default]
133    Structural,
134    /// Must be used exactly once (e.g., protobuf `oneof`).
135    Linear,
136    /// Can be used at most once.
137    Affine,
138}
139
140/// Specification of a coercion between two value kinds.
141///
142/// Contains the forward coercion expression, an optional inverse for
143/// round-tripping, and the coercion class classifying the round-trip behavior.
144#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
145pub struct CoercionSpec {
146    /// Forward coercion expression (source to target).
147    pub forward: panproto_expr::Expr,
148    /// Inverse coercion expression (target to source) for the `put` direction.
149    pub inverse: Option<panproto_expr::Expr>,
150    /// Round-trip classification.
151    pub class: panproto_gat::CoercionClass,
152}
153
154/// A schema: a model of the protocol's schema theory.
155///
156/// Contains both the raw data (vertices, edges, constraints, etc.) and
157/// precomputed adjacency indices for efficient graph traversal.
158#[derive(Clone, Debug, Serialize, Deserialize)]
159pub struct Schema {
160    /// The protocol this schema belongs to.
161    pub protocol: String,
162    /// Vertices keyed by their ID.
163    pub vertices: HashMap<Name, Vertex>,
164    /// Edges keyed by the edge itself, value is the edge kind.
165    #[serde(with = "crate::serde_helpers::map_as_vec")]
166    pub edges: HashMap<Edge, Name>,
167    /// Hyper-edges keyed by their ID.
168    pub hyper_edges: HashMap<Name, HyperEdge>,
169    /// Constraints per vertex ID.
170    pub constraints: HashMap<Name, Vec<Constraint>>,
171    /// Required edges per vertex ID.
172    pub required: HashMap<Name, Vec<Edge>>,
173    /// NSID mapping: vertex ID to NSID string.
174    pub nsids: HashMap<Name, Name>,
175    /// Declared entry vertices.
176    ///
177    /// Semantically, this is the finite family of basepoints that makes
178    /// the schema a *pointed* schema: `E → Ob(C_S)` selecting the sorts
179    /// at which the W-algebra of instances may be rooted. Parsers set
180    /// this explicitly per their protocol's notion of a top-level
181    /// definition (a record, a top-level type, a path root, etc.);
182    /// consumers that need to choose an instance root consult it via
183    /// [`primary_entry`].
184    ///
185    /// Empty means the parser declined to supply a pointing; consumers
186    /// should then either fall back to a deterministic (but non-
187    /// canonical) selection or report an error. Order is preserved for
188    /// reproducibility; the set carries no duplicates.
189    #[serde(default)]
190    pub entries: Vec<Name>,
191
192    /// Coproduct variants per union vertex ID.
193    #[serde(default)]
194    pub variants: HashMap<Name, Vec<Variant>>,
195    /// Edge ordering positions (edge → position index).
196    #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
197    pub orderings: HashMap<Edge, u32>,
198    /// Recursion points (fixpoint markers).
199    #[serde(default)]
200    pub recursion_points: HashMap<Name, RecursionPoint>,
201    /// Spans connecting pairs of vertices.
202    #[serde(default)]
203    pub spans: HashMap<Name, Span>,
204    /// Edge usage modes (default: `Structural` for all).
205    #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
206    pub usage_modes: HashMap<Edge, UsageMode>,
207    /// Whether each vertex uses nominal identity (`true`) or
208    /// structural identity (`false`). Absent = structural.
209    #[serde(default)]
210    pub nominal: HashMap<Name, bool>,
211
212    // -- enrichment fields --
213    /// Coercion specifications: `(source_kind, target_kind)` to coercion spec.
214    #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
215    pub coercions: HashMap<(Name, Name), CoercionSpec>,
216    /// Merger expressions: `vertex_id` to merger expression.
217    #[serde(default)]
218    pub mergers: HashMap<Name, panproto_expr::Expr>,
219    /// Default value expressions: `vertex_id` to default expression.
220    #[serde(default)]
221    pub defaults: HashMap<Name, panproto_expr::Expr>,
222    /// Conflict resolution policy expressions: `sort_name` to policy expression.
223    #[serde(default)]
224    pub policies: HashMap<Name, panproto_expr::Expr>,
225
226    // -- precomputed indices --
227    /// Outgoing edges per vertex ID.
228    pub outgoing: HashMap<Name, SmallVec<Edge, 4>>,
229    /// Incoming edges per vertex ID.
230    pub incoming: HashMap<Name, SmallVec<Edge, 4>>,
231    /// Edges between a specific `(src, tgt)` pair.
232    #[serde(with = "crate::serde_helpers::map_as_vec")]
233    pub between: HashMap<(Name, Name), SmallVec<Edge, 2>>,
234}
235
236impl Schema {
237    /// Look up a vertex by ID.
238    #[must_use]
239    pub fn vertex(&self, id: &str) -> Option<&Vertex> {
240        self.vertices.get(id)
241    }
242
243    /// Return all outgoing edges from the given vertex.
244    #[must_use]
245    pub fn outgoing_edges(&self, vertex_id: &str) -> &[Edge] {
246        self.outgoing.get(vertex_id).map_or(&[], SmallVec::as_slice)
247    }
248
249    /// Return all incoming edges to the given vertex.
250    #[must_use]
251    pub fn incoming_edges(&self, vertex_id: &str) -> &[Edge] {
252        self.incoming.get(vertex_id).map_or(&[], SmallVec::as_slice)
253    }
254
255    /// Return edges between a specific `(src, tgt)` pair.
256    #[must_use]
257    #[inline]
258    pub fn edges_between(&self, src: &str, tgt: &str) -> &[Edge] {
259        self.between
260            .get(&(Name::from(src), Name::from(tgt)))
261            .map_or(&[], SmallVec::as_slice)
262    }
263
264    /// Returns `true` if the given vertex ID exists in this schema.
265    #[must_use]
266    #[inline]
267    pub fn has_vertex(&self, id: &str) -> bool {
268        self.vertices.contains_key(id)
269    }
270
271    /// Returns the number of vertices in the schema.
272    #[must_use]
273    pub fn vertex_count(&self) -> usize {
274        self.vertices.len()
275    }
276
277    /// Returns the number of edges in the schema.
278    #[must_use]
279    pub fn edge_count(&self) -> usize {
280        self.edges.len()
281    }
282
283    /// Return the declared entry vertices.
284    ///
285    /// See [`Schema::entries`] for semantics. Use [`primary_entry`] for
286    /// callers that need a single root and want a deterministic
287    /// fallback when no entries are declared.
288    #[must_use]
289    pub fn entry_vertices(&self) -> &[Name] {
290        &self.entries
291    }
292}
293
294/// Choose a single entry vertex for a schema.
295///
296/// Returns the first declared entry if any. Otherwise falls back to a
297/// deterministic, protocol-agnostic choice: among vertex ids sorted
298/// lexicographically, the first vertex that is a source of at least
299/// one edge and a target of none (an edgeless "root" in the signature
300/// graph); failing that, the first vertex that has outgoing edges at
301/// all; failing that, the lexicographically first vertex; `None` only
302/// for an empty schema.
303///
304/// The fallback is explicitly *non-canonical*: it exists so that legacy
305/// schemas without declared entries remain usable, but new parsers
306/// should always supply at least one entry via
307/// [`SchemaBuilder::entry`](crate::SchemaBuilder::entry) so that the
308/// pointing is part of the schema's semantics rather than recovered by
309/// heuristic.
310#[must_use]
311pub fn primary_entry(schema: &Schema) -> Option<&Name> {
312    if let Some(first) = schema.entries.first() {
313        return Some(first);
314    }
315
316    let mut ids: Vec<&Name> = schema.vertices.keys().collect();
317    ids.sort();
318
319    let has_outgoing = |id: &Name| -> bool {
320        schema
321            .outgoing
322            .get(id)
323            .is_some_and(|edges| !edges.is_empty())
324    };
325    let has_incoming = |id: &Name| -> bool {
326        schema
327            .incoming
328            .get(id)
329            .is_some_and(|edges| !edges.is_empty())
330    };
331
332    if let Some(id) = ids
333        .iter()
334        .copied()
335        .find(|id| has_outgoing(id) && !has_incoming(id))
336    {
337        return Some(id);
338    }
339    if let Some(id) = ids.iter().copied().find(|id| has_outgoing(id)) {
340        return Some(id);
341    }
342    ids.into_iter().next()
343}