panproto_schema/schema.rs
1//! Core schema data structures.
2//!
3//! A [`Schema`] is a model of a protocol's schema theory GAT. It stores
4//! vertices, binary edges, hyper-edges, constraints, required-edge
5//! declarations, and NSID mappings. Precomputed adjacency indices
6//! (`outgoing`, `incoming`, `between`) enable fast traversal.
7
8use std::collections::HashMap;
9
10use serde::{Deserialize, Serialize};
11use smallvec::SmallVec;
12
13/// A schema vertex.
14///
15/// Each vertex has a unique `id`, a `kind` drawn from the protocol's
16/// recognized vertex kinds, and an optional NSID (namespace identifier).
17#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
18pub struct Vertex {
19 /// Unique vertex identifier within the schema.
20 pub id: String,
21 /// The vertex kind (e.g., `"record"`, `"object"`, `"string"`).
22 pub kind: String,
23 /// Optional namespace identifier (e.g., `"app.bsky.feed.post"`).
24 pub nsid: Option<String>,
25}
26
27/// A binary edge between two vertices.
28///
29/// Edges are directed: they go from `src` to `tgt`. The `kind` determines
30/// the structural role (e.g., `"prop"`, `"record-schema"`), and `name`
31/// provides an optional label (e.g., the property name).
32#[derive(Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
33pub struct Edge {
34 /// Source vertex ID.
35 pub src: String,
36 /// Target vertex ID.
37 pub tgt: String,
38 /// Edge kind (e.g., `"prop"`, `"record-schema"`).
39 pub kind: String,
40 /// Optional edge label (e.g., a property name like `"text"`).
41 pub name: Option<String>,
42}
43
44/// A hyper-edge (present only when the schema theory includes `ThHypergraph`).
45///
46/// Hyper-edges connect multiple vertices via a labeled signature.
47#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
48pub struct HyperEdge {
49 /// Unique hyper-edge identifier.
50 pub id: String,
51 /// Hyper-edge kind.
52 pub kind: String,
53 /// Maps label names to vertex IDs.
54 pub signature: HashMap<String, String>,
55 /// The label that identifies the parent vertex.
56 pub parent_label: String,
57}
58
59/// A constraint on a vertex.
60///
61/// Constraints restrict the values a vertex can hold (e.g., maximum
62/// string length, format pattern).
63#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
64pub struct Constraint {
65 /// The constraint sort (e.g., `"maxLength"`, `"format"`).
66 pub sort: String,
67 /// The constraint value (e.g., `"3000"`, `"at-uri"`).
68 pub value: String,
69}
70
71/// A schema: a model of the protocol's schema theory.
72///
73/// Contains both the raw data (vertices, edges, constraints, etc.) and
74/// precomputed adjacency indices for efficient graph traversal.
75#[derive(Clone, Debug, Serialize, Deserialize)]
76pub struct Schema {
77 /// The protocol this schema belongs to.
78 pub protocol: String,
79 /// Vertices keyed by their ID.
80 pub vertices: HashMap<String, Vertex>,
81 /// Edges keyed by the edge itself, value is the edge kind.
82 pub edges: HashMap<Edge, String>,
83 /// Hyper-edges keyed by their ID.
84 pub hyper_edges: HashMap<String, HyperEdge>,
85 /// Constraints per vertex ID.
86 pub constraints: HashMap<String, Vec<Constraint>>,
87 /// Required edges per vertex ID.
88 pub required: HashMap<String, Vec<Edge>>,
89 /// NSID mapping: vertex ID to NSID string.
90 pub nsids: HashMap<String, String>,
91
92 // -- precomputed indices --
93 /// Outgoing edges per vertex ID.
94 pub outgoing: HashMap<String, SmallVec<Edge, 4>>,
95 /// Incoming edges per vertex ID.
96 pub incoming: HashMap<String, SmallVec<Edge, 4>>,
97 /// Edges between a specific `(src, tgt)` pair.
98 pub between: HashMap<(String, String), SmallVec<Edge, 2>>,
99}
100
101impl Schema {
102 /// Look up a vertex by ID.
103 #[must_use]
104 pub fn vertex(&self, id: &str) -> Option<&Vertex> {
105 self.vertices.get(id)
106 }
107
108 /// Return all outgoing edges from the given vertex.
109 #[must_use]
110 pub fn outgoing_edges(&self, vertex_id: &str) -> &[Edge] {
111 self.outgoing.get(vertex_id).map_or(&[], SmallVec::as_slice)
112 }
113
114 /// Return all incoming edges to the given vertex.
115 #[must_use]
116 pub fn incoming_edges(&self, vertex_id: &str) -> &[Edge] {
117 self.incoming.get(vertex_id).map_or(&[], SmallVec::as_slice)
118 }
119
120 /// Return edges between a specific `(src, tgt)` pair.
121 #[must_use]
122 pub fn edges_between(&self, src: &str, tgt: &str) -> &[Edge] {
123 self.between
124 .get(&(src.to_owned(), tgt.to_owned()))
125 .map_or(&[], SmallVec::as_slice)
126 }
127
128 /// Returns `true` if the given vertex ID exists in this schema.
129 #[must_use]
130 pub fn has_vertex(&self, id: &str) -> bool {
131 self.vertices.contains_key(id)
132 }
133
134 /// Returns the number of vertices in the schema.
135 #[must_use]
136 pub fn vertex_count(&self) -> usize {
137 self.vertices.len()
138 }
139
140 /// Returns the number of edges in the schema.
141 #[must_use]
142 pub fn edge_count(&self) -> usize {
143 self.edges.len()
144 }
145}