Skip to main content

jacquard_lexicon/
schema.rs

1//! # Lexicon Schema Generation
2//!
3//! This module provides traits and utilities for generating AT Protocol lexicon schemas
4//! from Rust types. This is the reverse direction from the usual lexicon→Rust codegen.
5//!
6//! ## Use Cases
7//!
8//! - **Rapid prototyping**: Define types in Rust, generate schemas automatically
9//! - **Custom lexicons**: Easy creation of third-party lexicons
10//! - **Documentation**: Keep types and schemas in sync
11//! - **Runtime introspection**: Access schema metadata at runtime
12//!
13//! ## Usage
14//!
15//! ### Derive Macro
16//!
17//! Use `#[derive(LexiconSchema)]` to automatically generate schemas:
18//!
19//! ```rust,ignore
20//! use jacquard_lexicon::schema::LexiconSchema;
21//! use jacquard_common::CowStr;
22//!
23//! #[derive(LexiconSchema)]
24//! #[lexicon(nsid = "app.bsky.feed.post", record, key = "tid")]
25//! struct Post<'a> {
26//!     #[lexicon(max_graphemes = 300, max_length = 3000)]
27//!     text: CowStr<'a>,
28//!     created_at: Datetime,
29//! }
30//! ```
31//!
32//! #### Constraint Attributes
33//!
34//! - **Field constraints**: `max_length`, `max_graphemes`, `min_length`, `min_graphemes`
35//! - **Array constraints**: `max_items`, `min_items` (for the array itself)
36//! - **Item constraints**: `item_max_length`, `item_max_graphemes`, etc. (for array items)
37//! - **Integer constraints**: `minimum`, `maximum`
38//! - **Refs**: `ref = "nsid"` to explicitly reference another type
39//! - **Unions**: `union` to mark a field as a union type
40//!
41//! #### Fragments
42//!
43//! Multiple types can share the same NSID using fragments:
44//!
45//! ```rust,ignore
46//! #[derive(LexiconSchema)]
47//! #[lexicon(nsid = "app.bsky.feed.post", fragment = "textSlice")]
48//! struct TextSlice {
49//!     start: i64,
50//!     end: i64,
51//! }
52//! ```
53//!
54//! ### Runtime Registry
55//!
56//! Access complete schemas (with all fragments merged) via the global registry:
57//!
58//! ```rust,ignore
59//! let registry = jacquard_lexicon::schema::global_registry();
60//! let post_doc = registry.get("app.bsky.feed.post").expect("schema exists");
61//!
62//! // The doc contains all defs: main, textSlice, entity, replyRef, etc.
63//! for (def_name, def) in &post_doc.defs {
64//!     println!("Def: {}", def_name);
65//! }
66//! ```
67//!
68//! ## Design Pattern
69//!
70//! - **Trait-based**: Types implement `LexiconSchema` trait
71//! - **Inventory-based discovery**: Runtime schema registry via `inventory` crate
72//! - **Fragment merging**: Multiple types with same NSID have their defs merged
73//! - **Const literals**: Generated code emits schema as const data
74//! - **Validation**: Runtime constraint checking via `validate()` method
75
76pub mod builder;
77#[cfg(feature = "codegen")]
78pub mod from_ast;
79#[cfg(feature = "codegen")]
80pub mod type_mapping;
81
82use crate::lexicon::LexiconDoc;
83
84/// Trait for types that can generate lexicon schemas
85pub trait LexiconSchema {
86    /// The NSID for this type's primary definition
87    ///
88    /// For fragments, this is the base NSID (without `#fragment`).
89    fn nsid() -> &'static str;
90
91    /// The definition name within the lexicon document
92    ///
93    /// Returns "main" for the primary definition, or the fragment name for other defs.
94    /// For example, in a lexicon with multiple defs like `pub.leaflet.poll.definition`,
95    /// the main type returns "main" while the `Option` type returns "option".
96    fn def_name() -> &'static str {
97        "main"
98    }
99
100    /// The schema ID for this type
101    ///
102    /// Defaults to NSID. Override for fragments to include `#fragment` suffix.
103    fn schema_id() -> jacquard_common::CowStr<'static> {
104        jacquard_common::CowStr::new_static(Self::nsid())
105    }
106
107    /// Whether this type should be inlined vs referenced
108    ///
109    /// - `false` (default): Type becomes a def, references use `{"type": "ref", "ref": "nsid"}`
110    /// - `true`: Type's schema is inlined directly into parent
111    ///
112    /// Recursive types MUST return `false` to avoid infinite expansion.
113    fn inline_schema() -> bool {
114        false
115    }
116
117    /// Generate the lexicon document for this type
118    ///
119    /// Returns the complete lexicon schema for this type. Nested refs are resolved
120    /// at runtime via the inventory-based registry.
121    fn lexicon_doc() -> LexiconDoc<'static>;
122
123    /// Validate an instance against lexicon constraints
124    ///
125    /// Checks runtime constraints like `max_length`, `max_graphemes`, `minimum`, etc.
126    /// Returns `Ok(())` if valid, `Err` with details if invalid.
127    fn validate(&self) -> Result<(), crate::validation::ConstraintError> {
128        // Default impl: no constraints to check
129        Ok(())
130    }
131}
132
133/// Registry entry for schema discovery via inventory
134///
135/// Generated automatically by `#[derive(LexiconSchema)]` to enable runtime schema discovery.
136pub struct LexiconSchemaRef {
137    /// The NSID for this schema
138    pub nsid: &'static str,
139    /// The def name within the lexicon (e.g., "main", "textSlice")
140    pub def_name: &'static str,
141    /// Function that generates the lexicon document
142    pub provider: fn() -> crate::lexicon::LexiconDoc<'static>,
143}
144
145inventory::collect!(LexiconSchemaRef);
146
147/// Registry of lexicon schemas
148///
149/// Collects schemas from inventory at construction and supports runtime insertion.
150#[derive(Debug, Clone)]
151pub struct SchemaRegistry {
152    /// Schema documents indexed by NSID (concurrent access safe)
153    schemas: dashmap::DashMap<
154        jacquard_common::deps::smol_str::SmolStr,
155        crate::lexicon::LexiconDoc<'static>,
156    >,
157}
158
159impl SchemaRegistry {
160    /// Build registry from inventory-collected schemas
161    pub fn from_inventory() -> Self {
162        use jacquard_common::deps::smol_str::ToSmolStr;
163        let schemas = dashmap::DashMap::new();
164
165        for entry in inventory::iter::<LexiconSchemaRef> {
166            let doc = (entry.provider)();
167
168            // Get existing doc or create new one
169            let mut doc_entry = schemas.entry(entry.nsid.to_smolstr()).or_insert_with(|| {
170                crate::lexicon::LexiconDoc {
171                    lexicon: crate::lexicon::Lexicon::Lexicon1,
172                    id: jacquard_common::CowStr::new_static(entry.nsid),
173                    revision: None,
174                    description: None,
175                    defs: Default::default(),
176                }
177            });
178
179            // Merge the defs from this schema
180            // Each type's lexicon_doc() now returns a doc with the def under its proper name
181            for (def_name, def) in doc.defs {
182                doc_entry.defs.insert(def_name, def);
183            }
184        }
185
186        Self { schemas }
187    }
188
189    /// Create an empty registry
190    pub fn new() -> Self {
191        Self {
192            schemas: dashmap::DashMap::new(),
193        }
194    }
195
196    /// Get schema by NSID
197    ///
198    /// IMPORTANT: Clone the returned schema immediately to avoid holding DashMap ref
199    pub fn get(&self, nsid: &str) -> Option<crate::lexicon::LexiconDoc<'static>> {
200        self.schemas.get(nsid).map(|doc| doc.clone())
201    }
202
203    /// Insert or update a schema (for runtime schema loading)
204    pub fn insert(
205        &self,
206        nsid: jacquard_common::deps::smol_str::SmolStr,
207        doc: crate::lexicon::LexiconDoc<'static>,
208    ) {
209        self.schemas.insert(nsid, doc);
210    }
211
212    /// Get specific def from a schema
213    ///
214    /// IMPORTANT: Returns cloned def to avoid holding DashMap ref
215    pub fn get_def(
216        &self,
217        nsid: &str,
218        def_name: &str,
219    ) -> Option<crate::lexicon::LexUserType<'static>> {
220        // Clone immediately to release DashMap ref before returning
221        self.schemas
222            .get(nsid)
223            .and_then(|doc| doc.defs.get(def_name).cloned())
224    }
225}
226
227impl Default for SchemaRegistry {
228    fn default() -> Self {
229        Self::from_inventory()
230    }
231}
232
233/// Global schema registry built from inventory
234pub fn global_registry() -> &'static SchemaRegistry {
235    static REGISTRY: std::sync::LazyLock<SchemaRegistry> =
236        std::sync::LazyLock::new(SchemaRegistry::from_inventory);
237    &REGISTRY
238}
239
240#[cfg(test)]
241mod tests {
242    use crate::validation::{ConstraintError, ValidationPath};
243
244    #[test]
245    fn test_validation_max_length() {
246        let err = ConstraintError::MaxLength {
247            path: ValidationPath::from_field("text"),
248            max: 100,
249            actual: 150,
250        };
251        assert!(err.to_string().contains("exceeds max length"));
252    }
253
254    #[test]
255    fn test_validation_max_graphemes() {
256        let err = ConstraintError::MaxGraphemes {
257            path: ValidationPath::from_field("text"),
258            max: 50,
259            actual: 75,
260        };
261        assert!(err.to_string().contains("exceeds max graphemes"));
262    }
263}