Skip to main content

panproto_schema/
schema.rs

1//! Core schema data structures.
2//!
3//! A [`Schema`] is a model of a protocol's schema theory GAT. It stores
4//! vertices, binary edges, hyper-edges, constraints, required-edge
5//! declarations, and NSID mappings. Precomputed adjacency indices
6//! (`outgoing`, `incoming`, `between`) enable fast traversal.
7
8use std::collections::HashMap;
9
10use panproto_gat::Name;
11use serde::{Deserialize, Serialize};
12use smallvec::SmallVec;
13
14/// A schema vertex.
15///
16/// Each vertex has a unique `id`, a `kind` drawn from the protocol's
17/// recognized vertex kinds, and an optional NSID (namespace identifier).
18#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
19pub struct Vertex {
20    /// Unique vertex identifier within the schema.
21    pub id: Name,
22    /// The vertex kind (e.g., `"record"`, `"object"`, `"string"`).
23    pub kind: Name,
24    /// Optional namespace identifier (e.g., `"app.bsky.feed.post"`).
25    pub nsid: Option<Name>,
26}
27
28/// A binary edge between two vertices.
29///
30/// Edges are directed: they go from `src` to `tgt`. The `kind` determines
31/// the structural role (e.g., `"prop"`, `"record-schema"`), and `name`
32/// provides an optional label (e.g., the property name).
33#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
34pub struct Edge {
35    /// Source vertex ID.
36    pub src: Name,
37    /// Target vertex ID.
38    pub tgt: Name,
39    /// Edge kind (e.g., `"prop"`, `"record-schema"`).
40    pub kind: Name,
41    /// Optional edge label (e.g., a property name like `"text"`).
42    pub name: Option<Name>,
43}
44
45/// A hyper-edge (present only when the schema theory includes `ThHypergraph`).
46///
47/// Hyper-edges connect multiple vertices via a labeled signature.
48#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
49pub struct HyperEdge {
50    /// Unique hyper-edge identifier.
51    pub id: Name,
52    /// Hyper-edge kind.
53    pub kind: Name,
54    /// Maps label names to vertex IDs.
55    pub signature: HashMap<Name, Name>,
56    /// The label that identifies the parent vertex.
57    pub parent_label: Name,
58}
59
60/// A constraint on a vertex.
61///
62/// Constraints restrict the values a vertex can hold (e.g., maximum
63/// string length, format pattern).
64#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
65pub struct Constraint {
66    /// The constraint sort (e.g., `"maxLength"`, `"format"`).
67    pub sort: Name,
68    /// The constraint value (e.g., `"3000"`, `"at-uri"`).
69    pub value: String,
70}
71
72/// A variant in a coproduct (sum type / union).
73///
74/// Each variant is injected into a parent vertex (the union/coproduct)
75/// with an optional discriminant tag.
76#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
77pub struct Variant {
78    /// Unique variant identifier.
79    pub id: Name,
80    /// The parent coproduct vertex this variant belongs to.
81    pub parent_vertex: Name,
82    /// Optional discriminant tag.
83    pub tag: Option<Name>,
84}
85
86/// An ordering annotation on an edge.
87///
88/// Records that the children reached via this edge are ordered,
89/// with a specific position index.
90#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
91pub struct Ordering {
92    /// The edge being ordered.
93    pub edge: Edge,
94    /// Position in the ordered collection.
95    pub position: u32,
96}
97
98/// A recursion point (fixpoint marker) in the schema.
99///
100/// Marks a vertex as a recursive reference to another vertex,
101/// satisfying the fold-unfold law: `unfold(fold(v)) = v`.
102#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
103pub struct RecursionPoint {
104    /// The fixpoint marker vertex ID.
105    pub mu_id: Name,
106    /// The target vertex this unfolds to.
107    pub target_vertex: Name,
108}
109
110/// A span connecting two vertices through a common source.
111///
112/// Spans model correspondences, diffs, and migrations:
113/// `left ← span → right`.
114#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
115pub struct Span {
116    /// Unique span identifier.
117    pub id: Name,
118    /// Left vertex of the span.
119    pub left: Name,
120    /// Right vertex of the span.
121    pub right: Name,
122}
123
124/// Use-counting mode for an edge.
125///
126/// Captures the substructural distinction between edges that can
127/// be used freely (structural), exactly once (linear), or at most
128/// once (affine).
129#[derive(Clone, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
130pub enum UsageMode {
131    /// Can be used any number of times (default).
132    #[default]
133    Structural,
134    /// Must be used exactly once (e.g., protobuf `oneof`).
135    Linear,
136    /// Can be used at most once.
137    Affine,
138}
139
140/// Specification of a coercion between two value kinds.
141///
142/// Contains the forward coercion expression, an optional inverse for
143/// round-tripping, and the coercion class classifying the round-trip behavior.
144#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
145pub struct CoercionSpec {
146    /// Forward coercion expression (source to target).
147    pub forward: panproto_expr::Expr,
148    /// Inverse coercion expression (target to source) for the `put` direction.
149    pub inverse: Option<panproto_expr::Expr>,
150    /// Round-trip classification.
151    pub class: panproto_gat::CoercionClass,
152}
153
154/// A schema: a model of the protocol's schema theory.
155///
156/// Contains both the raw data (vertices, edges, constraints, etc.) and
157/// precomputed adjacency indices for efficient graph traversal.
158#[derive(Clone, Debug, Serialize, Deserialize)]
159pub struct Schema {
160    /// The protocol this schema belongs to.
161    pub protocol: String,
162    /// Vertices keyed by their ID.
163    pub vertices: HashMap<Name, Vertex>,
164    /// Edges keyed by the edge itself, value is the edge kind.
165    #[serde(with = "crate::serde_helpers::map_as_vec")]
166    pub edges: HashMap<Edge, Name>,
167    /// Hyper-edges keyed by their ID.
168    pub hyper_edges: HashMap<Name, HyperEdge>,
169    /// Constraints per vertex ID.
170    pub constraints: HashMap<Name, Vec<Constraint>>,
171    /// Required edges per vertex ID.
172    pub required: HashMap<Name, Vec<Edge>>,
173    /// NSID mapping: vertex ID to NSID string.
174    pub nsids: HashMap<Name, Name>,
175
176    /// Coproduct variants per union vertex ID.
177    #[serde(default)]
178    pub variants: HashMap<Name, Vec<Variant>>,
179    /// Edge ordering positions (edge → position index).
180    #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
181    pub orderings: HashMap<Edge, u32>,
182    /// Recursion points (fixpoint markers).
183    #[serde(default)]
184    pub recursion_points: HashMap<Name, RecursionPoint>,
185    /// Spans connecting pairs of vertices.
186    #[serde(default)]
187    pub spans: HashMap<Name, Span>,
188    /// Edge usage modes (default: `Structural` for all).
189    #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
190    pub usage_modes: HashMap<Edge, UsageMode>,
191    /// Whether each vertex uses nominal identity (`true`) or
192    /// structural identity (`false`). Absent = structural.
193    #[serde(default)]
194    pub nominal: HashMap<Name, bool>,
195
196    // -- enrichment fields --
197    /// Coercion specifications: `(source_kind, target_kind)` to coercion spec.
198    #[serde(default, with = "crate::serde_helpers::map_as_vec_default")]
199    pub coercions: HashMap<(Name, Name), CoercionSpec>,
200    /// Merger expressions: `vertex_id` to merger expression.
201    #[serde(default)]
202    pub mergers: HashMap<Name, panproto_expr::Expr>,
203    /// Default value expressions: `vertex_id` to default expression.
204    #[serde(default)]
205    pub defaults: HashMap<Name, panproto_expr::Expr>,
206    /// Conflict resolution policy expressions: `sort_name` to policy expression.
207    #[serde(default)]
208    pub policies: HashMap<Name, panproto_expr::Expr>,
209
210    // -- precomputed indices --
211    /// Outgoing edges per vertex ID.
212    pub outgoing: HashMap<Name, SmallVec<Edge, 4>>,
213    /// Incoming edges per vertex ID.
214    pub incoming: HashMap<Name, SmallVec<Edge, 4>>,
215    /// Edges between a specific `(src, tgt)` pair.
216    #[serde(with = "crate::serde_helpers::map_as_vec")]
217    pub between: HashMap<(Name, Name), SmallVec<Edge, 2>>,
218}
219
220impl Schema {
221    /// Look up a vertex by ID.
222    #[must_use]
223    pub fn vertex(&self, id: &str) -> Option<&Vertex> {
224        self.vertices.get(id)
225    }
226
227    /// Return all outgoing edges from the given vertex.
228    #[must_use]
229    pub fn outgoing_edges(&self, vertex_id: &str) -> &[Edge] {
230        self.outgoing.get(vertex_id).map_or(&[], SmallVec::as_slice)
231    }
232
233    /// Return all incoming edges to the given vertex.
234    #[must_use]
235    pub fn incoming_edges(&self, vertex_id: &str) -> &[Edge] {
236        self.incoming.get(vertex_id).map_or(&[], SmallVec::as_slice)
237    }
238
239    /// Return edges between a specific `(src, tgt)` pair.
240    #[must_use]
241    #[inline]
242    pub fn edges_between(&self, src: &str, tgt: &str) -> &[Edge] {
243        self.between
244            .get(&(Name::from(src), Name::from(tgt)))
245            .map_or(&[], SmallVec::as_slice)
246    }
247
248    /// Returns `true` if the given vertex ID exists in this schema.
249    #[must_use]
250    #[inline]
251    pub fn has_vertex(&self, id: &str) -> bool {
252        self.vertices.contains_key(id)
253    }
254
255    /// Returns the number of vertices in the schema.
256    #[must_use]
257    pub fn vertex_count(&self) -> usize {
258        self.vertices.len()
259    }
260
261    /// Returns the number of edges in the schema.
262    #[must_use]
263    pub fn edge_count(&self) -> usize {
264        self.edges.len()
265    }
266}