obj_derive/lib.rs
1//! `obj-derive` — procedural macros for `obj`.
2//!
3//! # ⚠️ UNSTABLE — consume via `obj-db`, not directly
4//!
5//! `obj-derive` is an implementation detail of `obj-db` (re-exported as
6//! `obj::Document`). It is published only so `obj-db` can depend on it and
7//! carries **no `SemVer` guarantee** as a standalone crate — depend on
8//! `obj-db` and write `#[derive(obj::Document)]`. Only `obj-db`'s public
9//! surface is frozen at 1.0; `obj-derive` is excluded from the public-api
10//! freeze gate (see `docs/public-api.md`).
11//!
12//! This crate provides `#[derive(obj::Document)]`, which emits the
13//! [`obj_core::Document`](https://docs.rs/obj/latest/obj/trait.Document.html)
14//! implementation for a user struct. The derive is intentionally
15//! small — it fills in the trait's associated constants
16//! (`COLLECTION`, `VERSION`) from optional `#[obj(...)]` attributes
17//! and emits an `indexes()` override whenever any field carries an
18//! `#[obj(index ...)]` attribute.
19//!
20//! # Supported attributes
21//!
22//! Struct-level (`#[obj(...)]` directly above the `struct` keyword):
23//!
24//! - `version = N` (integer ≥ 0) — sets `Document::VERSION`.
25//! - `collection = "name"` (non-empty string literal) — sets
26//! `Document::COLLECTION`.
27//!
28//! Multiple `#[obj(...)]` attributes compose; the same scalar key
29//! (`version`, `collection`) declared twice is a compile error.
30//!
31//! Struct-level composite (one or more occurrences compose, each
32//! adding one `Composite` `IndexSpec`):
33//!
34//! - `index_composite(fields = ("a", "b"), name = "by_a_b")` — emit a
35//! `Composite` `IndexSpec` spanning the listed fields. `name`
36//! defaults to the field names joined with `__`. The referenced
37//! fields must exist on the struct; fewer than two is a compile
38//! error.
39//! - `index = ("a", "b")` — short form, equivalent to
40//! `index_composite(fields = ("a", "b"))`. Same downstream
41//! validation (≥ 2 fields, each declared on the struct). The
42//! default index name is the fields joined with `__`; there is no
43//! `name = "..."` slot on the short form — use `index_composite`
44//! when a custom name is required. Both syntaxes coexist; the
45//! short form mirrors `design.md` § Indexes verbatim.
46//!
47//! Field-level (`#[obj(...)]` on a struct field):
48//!
49//! - `index` — emit a `Standard` `IndexSpec` for this field.
50//! - `index = unique` — emit a `Unique` `IndexSpec` for this field.
51//! - `index = each` — emit an `Each` `IndexSpec` for this field. The
52//! field type must syntactically be `Vec<...>` — otherwise the
53//! derive errors at compile time.
54//! - `name = "..."` — alongside any `index = ...`, overrides the
55//! default index name (which is the field name).
56//!
57//! Struct-level historical schema registry (M10 #82):
58//!
59//! - `history(v1 = OldType1, v2 = OldType2)` — emit a
60//! `Document::historical_schemas()` override that lifts each
61//! version into a `(version, DynamicSchema)` pair via
62//! `<OldType as ::obj::Schema>::schema()`. The keys (`v1`, `v2`,
63//! …) must be `vN` for integer `N` ≥ 1; the values are arbitrary
64//! type paths. Each named type must implement `::obj::Schema`
65//! (hand-impls are accepted; the derive auto-implements `Schema`
66//! for types that opt in via `#[obj(history(...))]` or
67//! `#[obj(schema)]`). Entries are emitted in ascending version
68//! order.
69//! - `schema` — explicitly opt the current type into a derived
70//! `Schema` impl WITHOUT declaring any history. Useful when the
71//! type is referenced from a future version's `history(...)`.
72//!
73//! When either `history(...)` or `schema` is declared, the derive
74//! emits a companion `impl ::obj::Schema` block whose `schema()`
75//! body maps each field to a `DynamicSchema` variant. Scalar
76//! primitives (bool, u\*, i\*, f\*, String) map directly; `Vec<T>`
77//! maps to `DynamicSchema::seq(<T as Schema>::schema())`; anything
78//! else delegates via `<T as ::obj::Schema>::schema()`, which fails
79//! to compile if `T` lacks a `Schema` impl.
80//!
81//! # Serde requirements
82//!
83//! The derive does **NOT** emit `serde::Serialize` or
84//! `serde::Deserialize` for you. Users still write
85//! `#[derive(serde::Serialize, serde::Deserialize)]` on the struct
86//! alongside `#[derive(obj::Document)]`.
87//!
88//! # Power-of-ten posture
89//!
90//! - **Rule 4** — every function in this crate is ≤ 60 lines.
91//! - **Rule 7** — every fallible path returns `syn::Result<...>`;
92//! `unwrap`/`expect` appear only on infallible primitives.
93//! - **Rule 9** — generated code is minimal and inspectable. Every
94//! emitted item is prefixed by a `// auto-generated by
95//! #[derive(Document)]` marker so `cargo expand` output is easy
96//! to spot.
97
98#![forbid(unsafe_code)]
99#![deny(missing_docs)]
100#![deny(rustdoc::broken_intra_doc_links)]
101
102use proc_macro::TokenStream;
103use quote::quote;
104use syn::spanned::Spanned;
105use syn::{
106 parse_macro_input, Attribute, Data, DataStruct, DeriveInput, Field, Fields, LitInt, LitStr,
107 Type, TypePath,
108};
109
110/// Derive macro for `obj::Document`.
111///
112/// Emits `impl ::obj::Document for <Ident> { ... }` with sensible
113/// defaults:
114///
115/// - `COLLECTION` defaults to the unqualified type name as a string;
116/// `#[obj(collection = "explicit_name")]` overrides.
117/// - `VERSION` defaults to `1`; `#[obj(version = N)]` overrides.
118/// - `indexes()` is omitted (the trait default `Vec::new()` is used)
119/// when the struct carries no index-related attributes; otherwise
120/// the derive emits a `Vec<::obj::IndexSpec>` in field-declaration
121/// order.
122///
123/// All emitted paths are absolute (`::obj::Document`,
124/// `::obj::IndexSpec`) so the derive is hygienic against local items
125/// that shadow these names.
126#[proc_macro_derive(Document, attributes(obj))]
127pub fn derive_document(input: TokenStream) -> TokenStream {
128 let input = parse_macro_input!(input as DeriveInput);
129 match emit_impl(&input) {
130 Ok(ts) => ts.into(),
131 Err(err) => err.to_compile_error().into(),
132 }
133}
134
135/// Build the `impl ::obj::Document` block for `input`. Also emits
136/// the companion `impl ::obj::Schema` and a `historical_schemas()`
137/// override when the user supplied `#[obj(history(...))]`.
138///
139/// The `Schema` impl is emitted ONLY when at least one of these is
140/// true:
141///
142/// - the struct carries `#[obj(history(...))]` (the user is
143/// opting into schema-evolution at this site, and the current
144/// type needs a self-describing schema so that *future* versions
145/// can reference it via `history(vN = ThisType)`);
146/// - the struct carries `#[obj(schema)]` (explicit opt-in).
147///
148/// Bare-derive sites do NOT emit `Schema` — a `Document` with no
149/// historical versions has no need for one, and emitting the impl
150/// would require every nested field type to also be `Schema`
151/// (which is too aggressive an ask for types that never participate
152/// in migration).
153fn emit_impl(input: &DeriveInput) -> syn::Result<proc_macro2::TokenStream> {
154 let attrs = parse_struct_attrs(input)?;
155 // Enums get a `Schema`-only emission. A `Document` impl on an
156 // enum is rejected: collections store records, not bare variants.
157 // The user opts in to the Schema impl via `#[obj(schema)]` or
158 // `#[obj(history(...))]`; bare `#[derive(Document)]` on an enum
159 // is a compile error so the diagnostic is loud.
160 if matches!(input.data, Data::Enum(_)) {
161 if !attrs.emit_schema {
162 return Err(syn::Error::new(
163 input.span(),
164 "#[derive(obj::Document)] on an enum requires `#[obj(schema)]` \
165 (or `#[obj(history(...))]`); an enum is never a Document itself",
166 ));
167 }
168 return emit_schema_impl(input);
169 }
170 emit_struct_impl(input, &attrs)
171}
172
173/// Build the `impl ::obj::Document` block for a struct + emit the
174/// companion `impl ::obj::Schema` when `attrs.emit_schema` is set.
175fn emit_struct_impl(
176 input: &DeriveInput,
177 attrs: &StructAttrs,
178) -> syn::Result<proc_macro2::TokenStream> {
179 let ident = &input.ident;
180 let collection = attrs
181 .collection
182 .clone()
183 .unwrap_or_else(|| ident.to_string());
184 let version: u32 = attrs.version.unwrap_or(1);
185 let mut index_specs = collect_field_indexes(input)?;
186 let composite_specs = validate_and_lift_composites(input, &attrs.composites)?;
187 index_specs.extend(composite_specs);
188 let indexes_body = emit_indexes_body(&index_specs);
189 let schema_impl = if attrs.emit_schema {
190 emit_schema_impl(input)?
191 } else {
192 proc_macro2::TokenStream::new()
193 };
194 let history_body = emit_history_body(&attrs.history);
195 let out = quote! {
196 // auto-generated by #[derive(Document)]
197 #[automatically_derived]
198 impl ::obj::Document for #ident {
199 const COLLECTION: &'static str = #collection;
200 const VERSION: u32 = #version;
201 #indexes_body
202 #history_body
203 }
204 #schema_impl
205 };
206 Ok(out)
207}
208
209/// Emit an `impl ::obj::Schema for <Ident>` block whose `schema()`
210/// returns the `DynamicSchema::Map(...)` corresponding to the
211/// struct's declared fields.
212///
213/// The mapping from Rust field type to `DynamicSchema` is the
214/// syntactic table documented in `obj_core::codec::schema`:
215/// scalar primitives map directly; `Vec<T>` maps to
216/// `DynamicSchema::seq(<T as Schema>::schema())`; anything else is
217/// treated as a `Schema`-implementing path and delegates via
218/// `<T as ::obj::Schema>::schema()`.
219fn emit_schema_impl(input: &DeriveInput) -> syn::Result<proc_macro2::TokenStream> {
220 let ident = &input.ident;
221 let body = match &input.data {
222 Data::Struct(_) => emit_schema_body_struct(input)?,
223 Data::Enum(data) => emit_schema_body_enum(data)?,
224 Data::Union(_) => {
225 return Err(syn::Error::new(
226 input.span(),
227 "#[derive(obj::Document)] does not support unions",
228 ));
229 }
230 };
231 Ok(quote! {
232 // auto-generated by #[derive(Document)]
233 #[automatically_derived]
234 impl ::obj::Schema for #ident {
235 fn schema() -> ::obj::DynamicSchema {
236 #body
237 }
238 }
239 })
240}
241
242/// Build the `Schema::schema()` body for a struct: a
243/// `DynamicSchema::Map` over each named field's syntactic type.
244fn emit_schema_body_struct(input: &DeriveInput) -> syn::Result<proc_macro2::TokenStream> {
245 let fields = named_fields(input)?;
246 let entries = fields
247 .iter()
248 .map(|f| {
249 let name = named_field_name(f)?;
250 let ty_schema = field_type_to_schema(&f.ty);
251 Ok(quote! { (::std::string::String::from(#name), #ty_schema) })
252 })
253 .collect::<syn::Result<Vec<_>>>()?;
254 Ok(quote! {
255 ::obj::DynamicSchema::Map(::std::vec![ #( #entries ),* ])
256 })
257}
258
259/// Pull the string name out of a named struct/variant field, returning
260/// a `syn::Error` (never a panic) if the field has no `ident`.
261///
262/// `syn` only constructs `Field` values with `ident == None` inside
263/// `Fields::Unnamed`. Every call site here is already guarded by a
264/// `Fields::Named(_)` pattern, so the `None` branch is structurally
265/// unreachable — but Power-of-Ten Rule 7 forbids panicking unwraps
266/// in production paths regardless. A surfaced `syn::Error` is the
267/// safe fallback if a future refactor breaks the invariant.
268fn named_field_name(field: &Field) -> syn::Result<String> {
269 field
270 .ident
271 .as_ref()
272 .map(ToString::to_string)
273 .ok_or_else(|| syn::Error::new(field.span(), "expected named field"))
274}
275
276/// Build the `Schema::schema()` body for an enum: a
277/// `DynamicSchema::Enum` over each variant in declaration order
278/// (postcard assigns discriminants by declaration order; the derive
279/// matches that). Unit variants get `Null` payloads; newtype
280/// variants get the inner type's schema; tuple variants get a
281/// synthetic `Map` keyed by `"0"`, `"1"`, …; struct variants get a
282/// `Map` keyed by the field names.
283fn emit_schema_body_enum(data: &syn::DataEnum) -> syn::Result<proc_macro2::TokenStream> {
284 let entries = data
285 .variants
286 .iter()
287 .enumerate()
288 .map(|(idx, v)| {
289 let discriminant = u32::try_from(idx).unwrap_or(u32::MAX);
290 let name = v.ident.to_string();
291 let payload = variant_payload_schema(&v.fields)?;
292 Ok(quote! {
293 ::obj::EnumVariantSchema::new(
294 #discriminant,
295 #name,
296 #payload,
297 )
298 })
299 })
300 .collect::<syn::Result<Vec<_>>>()?;
301 Ok(quote! {
302 ::obj::DynamicSchema::Enum(::std::vec![ #( #entries ),* ])
303 })
304}
305
306/// Map an enum variant's `Fields` shape to the token stream that
307/// constructs its payload [`DynamicSchema`] at runtime.
308fn variant_payload_schema(fields: &Fields) -> syn::Result<proc_macro2::TokenStream> {
309 match fields {
310 Fields::Unit => Ok(quote! { ::obj::DynamicSchema::Null }),
311 Fields::Unnamed(unnamed) => {
312 // Newtype variant `V(T)` → use `T`'s schema directly. Tuple
313 // variants `V(T, U, ...)` → synthesise a Map keyed by
314 // `"0"`, `"1"`, …; postcard writes the inner fields
315 // positionally, same wire shape as a struct's bytes.
316 let count = unnamed.unnamed.len();
317 if count == 1 {
318 let ty = &unnamed.unnamed[0].ty;
319 Ok(field_type_to_schema(ty))
320 } else {
321 let entries = unnamed.unnamed.iter().enumerate().map(|(i, f)| {
322 let key = i.to_string();
323 let ty_schema = field_type_to_schema(&f.ty);
324 quote! { (::std::string::String::from(#key), #ty_schema) }
325 });
326 Ok(quote! {
327 ::obj::DynamicSchema::Map(::std::vec![ #( #entries ),* ])
328 })
329 }
330 }
331 Fields::Named(named) => {
332 let entries = named
333 .named
334 .iter()
335 .map(|f| {
336 let name = named_field_name(f)?;
337 let ty_schema = field_type_to_schema(&f.ty);
338 Ok(quote! { (::std::string::String::from(#name), #ty_schema) })
339 })
340 .collect::<syn::Result<Vec<_>>>()?;
341 Ok(quote! {
342 ::obj::DynamicSchema::Map(::std::vec![ #( #entries ),* ])
343 })
344 }
345 }
346}
347
348/// Emit either a `Document::historical_schemas()` override or an
349/// empty token stream (which leaves the trait default in place).
350fn emit_history_body(entries: &[HistoryAttr]) -> proc_macro2::TokenStream {
351 if entries.is_empty() {
352 return proc_macro2::TokenStream::new();
353 }
354 // Pre-sort entries by version so the emitted vector is sorted
355 // ascending — the codec's `decode` dispatch binary-searches and
356 // debug-asserts on order.
357 let mut sorted: Vec<&HistoryAttr> = entries.iter().collect();
358 sorted.sort_by_key(|h| h.version);
359 let items = sorted.iter().map(|h| {
360 let version = h.version;
361 let path = &h.ty_path;
362 quote! { (#version, <#path as ::obj::Schema>::schema()) }
363 });
364 quote! {
365 fn historical_schemas() -> ::std::vec::Vec<(u32, ::obj::DynamicSchema)> {
366 // auto-generated by #[derive(Document)]
367 ::std::vec![ #( #items ),* ]
368 }
369 }
370}
371
372/// Map a struct field's syntactic Rust type to a token-stream that
373/// constructs a [`DynamicSchema`] value at runtime.
374fn field_type_to_schema(ty: &Type) -> proc_macro2::TokenStream {
375 if let Some(name) = scalar_schema_for(ty) {
376 let ident = quote::format_ident!("{name}");
377 return quote! { ::obj::DynamicSchema::#ident };
378 }
379 if let Some(inner) = vec_inner_type(ty) {
380 let inner_schema = field_type_to_schema(inner);
381 return quote! { ::obj::DynamicSchema::seq(#inner_schema) };
382 }
383 // Fallback: treat the type as `Schema`-implementing. This is
384 // the path used for nested user structs and for any type the
385 // syntactic scan does not recognise — the resulting expansion
386 // fails to compile if the type lacks a `Schema` impl, which is
387 // the diagnostic we want.
388 quote! { <#ty as ::obj::Schema>::schema() }
389}
390
391/// Return the [`DynamicSchema`] variant name for `ty` if `ty` is one
392/// of the built-in scalars; `None` otherwise. The result is used by
393/// [`field_type_to_schema`] to construct the leaf token stream.
394fn scalar_schema_for(ty: &Type) -> Option<&'static str> {
395 let Type::Path(TypePath { qself: None, path }) = ty else {
396 return None;
397 };
398 let segment = path.segments.last()?;
399 if !segment.arguments.is_none() {
400 return None;
401 }
402 let s = segment.ident.to_string();
403 match s.as_str() {
404 "bool" => Some("Bool"),
405 "u8" | "u16" | "u32" | "u64" | "usize" => Some("U64"),
406 "i8" | "i16" | "i32" | "i64" | "isize" => Some("I64"),
407 "f32" | "f64" => Some("F64"),
408 "String" => Some("String"),
409 _ => None,
410 }
411}
412
413/// If `ty` is `Vec<T>`, return `&T`; otherwise `None`.
414fn vec_inner_type(ty: &Type) -> Option<&Type> {
415 let Type::Path(TypePath { qself: None, path }) = ty else {
416 return None;
417 };
418 let seg = path.segments.last()?;
419 if seg.ident != "Vec" {
420 return None;
421 }
422 let syn::PathArguments::AngleBracketed(args) = &seg.arguments else {
423 return None;
424 };
425 args.args.iter().find_map(|a| match a {
426 syn::GenericArgument::Type(t) => Some(t),
427 _ => None,
428 })
429}
430
431/// Validate every composite declaration against the struct's named
432/// fields and lift each one into an `IndexSpecEmit`. Errors on:
433///
434/// - composite with fewer than 2 fields,
435/// - a referenced field name that is not declared on the struct.
436fn validate_and_lift_composites(
437 input: &DeriveInput,
438 composites: &[CompositeAttr],
439) -> syn::Result<Vec<IndexSpecEmit>> {
440 if composites.is_empty() {
441 return Ok(Vec::new());
442 }
443 let fields = named_fields(input)?;
444 let known: std::collections::HashSet<String> = fields
445 .iter()
446 .filter_map(|f| f.ident.as_ref().map(ToString::to_string))
447 .collect();
448 let mut out: Vec<IndexSpecEmit> = Vec::with_capacity(composites.len());
449 for c in composites {
450 if c.fields.len() < 2 {
451 return Err(syn::Error::new(c.span, "composite needs ≥ 2 fields"));
452 }
453 for field in &c.fields {
454 if !known.contains(field) {
455 return Err(syn::Error::new(
456 c.span,
457 format!("field '{field}' not declared on struct"),
458 ));
459 }
460 }
461 let index_name = c.custom_name.clone().unwrap_or_else(|| c.fields.join("__"));
462 out.push(IndexSpecEmit {
463 kind: IndexKind::Composite(c.fields.clone()),
464 field_name: String::new(),
465 index_name,
466 });
467 }
468 Ok(out)
469}
470
471/// Emit either an `indexes()` override or an empty token stream
472/// (which leaves the trait default in place).
473fn emit_indexes_body(specs: &[IndexSpecEmit]) -> proc_macro2::TokenStream {
474 if specs.is_empty() {
475 return proc_macro2::TokenStream::new();
476 }
477 let entries = specs.iter().map(IndexSpecEmit::emit);
478 quote! {
479 fn indexes() -> ::std::vec::Vec<::obj::IndexSpec> {
480 // auto-generated by #[derive(Document)]
481 //
482 // Each entry is an `IndexSpec::{standard,unique,each,composite}`
483 // call returning `Result`. Inputs were validated at derive
484 // expansion time, so the error arm is statically unreachable;
485 // we still handle it explicitly (push only `Ok`) so the
486 // generated code is panic-free (Power-of-Ten Rule 7).
487 let mut out: ::std::vec::Vec<::obj::IndexSpec> = ::std::vec::Vec::new();
488 #(
489 if let ::std::result::Result::Ok(spec) = #entries {
490 out.push(spec);
491 }
492 )*
493 out
494 }
495 }
496}
497
498/// One parsed `#[obj(index_composite(...))]` declaration.
499#[derive(Debug)]
500struct CompositeAttr {
501 /// User-provided field names. Each MUST exist on the struct.
502 fields: Vec<String>,
503 /// Optional `name = "..."` override; default is the fields joined
504 /// with `__`.
505 custom_name: Option<String>,
506 /// Span used for "field 'x' not declared on struct" diagnostics.
507 span: proc_macro2::Span,
508}
509
510/// Parsed struct-level attributes.
511#[derive(Default, Debug)]
512struct StructAttrs {
513 /// `#[obj(version = N)]` override.
514 version: Option<u32>,
515 /// `#[obj(collection = "name")]` override.
516 collection: Option<String>,
517 /// Zero or more `#[obj(index_composite(...))]` declarations,
518 /// preserved in declaration order so the emitted `indexes()` is
519 /// deterministic.
520 composites: Vec<CompositeAttr>,
521 /// `#[obj(history(v1 = Type1, v2 = Type2))]` entries — one per
522 /// historical version. Parsed in declaration order; the emitter
523 /// re-sorts by `version` before emitting.
524 history: Vec<HistoryAttr>,
525 /// `true` iff the user opted into emitting a companion
526 /// `impl ::obj::Schema` block. Set implicitly when
527 /// `#[obj(history(...))]` is present (the current type needs a
528 /// `Schema` impl so future versions can reference it from their
529 /// own `history(...)`), or explicitly via `#[obj(schema)]`.
530 emit_schema: bool,
531}
532
533/// One `vN = Type` pair from a `#[obj(history(...))]` declaration.
534#[derive(Debug)]
535struct HistoryAttr {
536 /// Version number parsed from the `vN` key.
537 version: u32,
538 /// The Rust type path naming the historical schema producer.
539 ty_path: syn::Path,
540}
541
542/// Walk every `#[obj(...)]` on the struct and merge them into a
543/// single `StructAttrs`. Duplicates (within one `#[obj(...)]` OR
544/// across two) error.
545fn parse_struct_attrs(input: &DeriveInput) -> syn::Result<StructAttrs> {
546 let mut acc = StructAttrs::default();
547 for attr in &input.attrs {
548 if !attr.path().is_ident("obj") {
549 continue;
550 }
551 parse_one_struct_attr(attr, &mut acc)?;
552 }
553 Ok(acc)
554}
555
556/// Parse a single `#[obj(...)]` attribute into `acc`. Duplicate
557/// scalar keys (within this attribute OR already present in `acc`)
558/// error; `index_composite(...)` / `history(...)` are non-scalar
559/// and append new entries.
560fn parse_one_struct_attr(attr: &Attribute, acc: &mut StructAttrs) -> syn::Result<()> {
561 attr.parse_nested_meta(|meta| {
562 if meta.path.is_ident("version") {
563 return parse_struct_version(&meta, acc);
564 }
565 if meta.path.is_ident("collection") {
566 return parse_struct_collection(&meta, acc);
567 }
568 if meta.path.is_ident("index_composite") {
569 let composite = parse_index_composite(&meta)?;
570 acc.composites.push(composite);
571 return Ok(());
572 }
573 if meta.path.is_ident("index") {
574 let composite = parse_struct_index_short(&meta)?;
575 acc.composites.push(composite);
576 return Ok(());
577 }
578 if meta.path.is_ident("history") {
579 parse_history(&meta, acc)?;
580 // Opting into history implies opting into Schema —
581 // future versions will reference this type from their
582 // own history(...) and need a `Schema` impl to lift it
583 // into a `DynamicSchema`.
584 acc.emit_schema = true;
585 return Ok(());
586 }
587 if meta.path.is_ident("schema") {
588 if acc.emit_schema {
589 // Redundant with an earlier `history` /
590 // `schema` declaration; surface anyway so the user
591 // notices the duplication.
592 return Err(meta.error("`schema` declared twice or already implied by `history`"));
593 }
594 acc.emit_schema = true;
595 return Ok(());
596 }
597 Err(meta.error(
598 "unknown obj attribute (expected `version`, `collection`, `index`, `index_composite`, `history`, or `schema`)",
599 ))
600 })
601}
602
603/// Parse the short composite-index form `#[obj(index = ("a", "b"))]`
604/// at struct level. The only valid RHS is a parenthesised tuple of
605/// string literals — `unique` / `each` / a bare path are field-level
606/// shapes and yield a struct-level diagnostic that points back at
607/// `index_composite` / field-level placement.
608///
609/// The returned [`CompositeAttr`] is validated downstream by
610/// [`validate_and_lift_composites`], which already enforces the
611/// `≥ 2 fields` and "field declared on struct" invariants — both the
612/// long and short forms share the same downstream gate.
613fn parse_struct_index_short(meta: &syn::meta::ParseNestedMeta<'_>) -> syn::Result<CompositeAttr> {
614 let span = meta.path.span();
615 let kind = parse_index_kind(meta)?;
616 match kind {
617 IndexKind::Composite(fields) => Ok(CompositeAttr {
618 fields,
619 custom_name: None,
620 span,
621 }),
622 _ => Err(syn::Error::new(
623 span,
624 "struct-level `index = ...` only accepts a tuple of field-name string literals \
625 (e.g. `index = (\"a\", \"b\")`); place `index`, `index = unique`, or `index = each` \
626 on a field instead",
627 )),
628 }
629}
630
631/// Parse `history(v1 = Type1, v2 = Type2, ...)`. Each key is of the
632/// form `vN` for a `u32` `N`; the value is a Rust path naming a
633/// `Schema`-implementing type. Pushes one `HistoryAttr` per pair
634/// into `acc.history` (preserving declaration order; the emitter
635/// re-sorts before emitting).
636fn parse_history(meta: &syn::meta::ParseNestedMeta<'_>, acc: &mut StructAttrs) -> syn::Result<()> {
637 meta.parse_nested_meta(|inner| {
638 let ident = inner
639 .path
640 .get_ident()
641 .ok_or_else(|| inner.error("expected `vN = Type` key"))?;
642 let key = ident.to_string();
643 let version = parse_history_key(&key).ok_or_else(|| {
644 syn::Error::new(
645 ident.span(),
646 "history keys must be of the form `vN` (e.g. `v1`, `v2`, ...)",
647 )
648 })?;
649 if acc.history.iter().any(|h| h.version == version) {
650 return Err(syn::Error::new(
651 ident.span(),
652 format!("history key `v{version}` declared twice"),
653 ));
654 }
655 let value = inner.value()?;
656 let ty_path: syn::Path = value.parse()?;
657 acc.history.push(HistoryAttr { version, ty_path });
658 Ok(())
659 })
660}
661
662/// Decode the `vN` key shape into the numeric version. Returns
663/// `None` on any other shape.
664fn parse_history_key(key: &str) -> Option<u32> {
665 let rest = key.strip_prefix('v')?;
666 rest.parse::<u32>().ok()
667}
668
669/// Parse `version = N`.
670fn parse_struct_version(
671 meta: &syn::meta::ParseNestedMeta<'_>,
672 acc: &mut StructAttrs,
673) -> syn::Result<()> {
674 if acc.version.is_some() {
675 return Err(meta.error("`version` declared twice"));
676 }
677 let value = meta.value()?;
678 let lit: LitInt = value.parse()?;
679 let n: u32 = lit
680 .base10_parse()
681 .map_err(|_| syn::Error::new(lit.span(), "expected unsigned integer for `version`"))?;
682 acc.version = Some(n);
683 Ok(())
684}
685
686/// Parse `collection = "name"`.
687fn parse_struct_collection(
688 meta: &syn::meta::ParseNestedMeta<'_>,
689 acc: &mut StructAttrs,
690) -> syn::Result<()> {
691 if acc.collection.is_some() {
692 return Err(meta.error("`collection` declared twice"));
693 }
694 let value = meta.value()?;
695 let lit: LitStr = value.parse()?;
696 let s = lit.value();
697 if s.is_empty() {
698 return Err(syn::Error::new(
699 lit.span(),
700 "collection name must not be empty",
701 ));
702 }
703 acc.collection = Some(s);
704 Ok(())
705}
706
707/// Parse `index_composite(fields = ("a", "b"), name = "by_a_b")`.
708///
709/// `fields` is required. `name` is optional and defaults to the
710/// fields joined with `__`. Field-existence validation runs after
711/// the struct's named fields are known — see
712/// `validate_and_emit_composites`.
713fn parse_index_composite(meta: &syn::meta::ParseNestedMeta<'_>) -> syn::Result<CompositeAttr> {
714 let span = meta.path.span();
715 let mut fields: Option<Vec<String>> = None;
716 let mut custom_name: Option<String> = None;
717 meta.parse_nested_meta(|inner| {
718 if inner.path.is_ident("fields") {
719 if fields.is_some() {
720 return Err(inner.error("`fields` declared twice"));
721 }
722 fields = Some(parse_composite_fields(&inner)?);
723 return Ok(());
724 }
725 if inner.path.is_ident("name") {
726 if custom_name.is_some() {
727 return Err(inner.error("`name` declared twice"));
728 }
729 let value = inner.value()?;
730 let lit: LitStr = value.parse()?;
731 let s = lit.value();
732 if s.is_empty() {
733 return Err(syn::Error::new(
734 lit.span(),
735 "composite index name must not be empty",
736 ));
737 }
738 custom_name = Some(s);
739 return Ok(());
740 }
741 Err(inner.error("expected `fields = (...)` or `name = \"...\"`"))
742 })?;
743 let fields = fields.ok_or_else(|| {
744 syn::Error::new(
745 span,
746 "index_composite requires `fields = (\"a\", \"b\", ...)`",
747 )
748 })?;
749 Ok(CompositeAttr {
750 fields,
751 custom_name,
752 span,
753 })
754}
755
756/// Parse the `fields = ("a", "b", ...)` parenthesised tuple of
757/// string literals. Returns the literal values verbatim.
758///
759/// Delegates to [`parse_composite_paren_paths`] so the long-form
760/// (`index_composite(fields = (...))`) and short-form
761/// (`index = (...)`) syntaxes go through one shared parser.
762fn parse_composite_fields(meta: &syn::meta::ParseNestedMeta<'_>) -> syn::Result<Vec<String>> {
763 let value = meta.value()?;
764 parse_composite_paren_paths(value)
765}
766
767/// Index-kind discriminator parsed from `#[obj(index = ...)]` or
768/// `#[obj(index_composite(...))]`.
769#[derive(Debug, Clone)]
770enum IndexKind {
771 Standard,
772 Unique,
773 Each,
774 /// Composite over the listed field paths (always ≥ 2).
775 Composite(Vec<String>),
776}
777
778/// One index emitted by the derive — carries the kind discriminator
779/// and the (key path, index name) pair to render.
780#[derive(Debug)]
781struct IndexSpecEmit {
782 kind: IndexKind,
783 /// The single struct field this index reads from (Standard /
784 /// Unique / Each). Unused for `Composite` — paths live inside
785 /// `IndexKind::Composite(...)`.
786 field_name: String,
787 /// User override via `#[obj(index, name = "...")]` or
788 /// `index_composite(name = "...")`, or the default name if none
789 /// was provided.
790 index_name: String,
791}
792
793impl IndexSpecEmit {
794 /// Emit the constructor call for this spec.
795 ///
796 /// We route through the kind-specific `IndexSpec` constructors
797 /// (`IndexSpec::standard` / `::unique` / `::each` / `::composite`)
798 /// rather than a struct literal: `IndexSpec` is `#[non_exhaustive]`
799 /// and so cannot be struct-literal-constructed from a downstream
800 /// user crate. The constructors return `Result`, but the derive
801 /// has already validated their inputs at proc-macro time (empty
802 /// struct field names are syntactically impossible, empty
803 /// `name = "..."` is rejected at parse time, and composites are
804 /// checked for ≥ 2 fields). The emitted code therefore handles the
805 /// (statically-unreachable) error arm by skipping rather than
806 /// panicking — keeping the generated `indexes()` panic-free.
807 fn emit(&self) -> proc_macro2::TokenStream {
808 let name = &self.index_name;
809 match &self.kind {
810 IndexKind::Standard => self.emit_scalar(name, "e! { standard }),
811 IndexKind::Unique => self.emit_scalar(name, "e! { unique }),
812 IndexKind::Each => self.emit_scalar(name, "e! { each }),
813 IndexKind::Composite(paths) => Self::emit_composite(name, paths),
814 }
815 }
816
817 fn emit_scalar(&self, name: &str, ctor: &proc_macro2::TokenStream) -> proc_macro2::TokenStream {
818 let path = &self.field_name;
819 quote! {
820 ::obj::IndexSpec::#ctor(
821 ::std::string::String::from(#name),
822 ::std::string::String::from(#path),
823 )
824 }
825 }
826
827 fn emit_composite(name: &str, paths: &[String]) -> proc_macro2::TokenStream {
828 let path_tokens = paths.iter().map(|p| quote! { #p });
829 quote! {
830 ::obj::IndexSpec::composite(
831 ::std::string::String::from(#name),
832 &[ #( #path_tokens ),* ],
833 )
834 }
835 }
836}
837
838/// Iterate the struct's fields and collect every field-level
839/// `#[obj(index ...)]` declaration in declaration order.
840fn collect_field_indexes(input: &DeriveInput) -> syn::Result<Vec<IndexSpecEmit>> {
841 let fields = named_fields(input)?;
842 let mut out: Vec<IndexSpecEmit> = Vec::new();
843 for field in fields {
844 for spec in parse_field_attrs(field)? {
845 out.push(spec);
846 }
847 }
848 Ok(out)
849}
850
851/// Extract `&FieldsNamed` from the `DeriveInput`. The derive is
852/// defined only for braced structs; anything else is a compile
853/// error at the struct's span.
854fn named_fields(
855 input: &DeriveInput,
856) -> syn::Result<&syn::punctuated::Punctuated<Field, syn::Token![,]>> {
857 match &input.data {
858 Data::Struct(DataStruct {
859 fields: Fields::Named(named),
860 ..
861 }) => Ok(&named.named),
862 _ => Err(syn::Error::new(
863 input.span(),
864 "#[derive(obj::Document)] only supports structs with named fields",
865 )),
866 }
867}
868
869/// Parse all `#[obj(...)]` attributes on a single field. Returns the
870/// list of `IndexSpecEmit`s contributed by this field (typically 0 or
871/// 1, but multiple `#[obj(index ...)]` attributes compose).
872fn parse_field_attrs(field: &Field) -> syn::Result<Vec<IndexSpecEmit>> {
873 let mut specs: Vec<IndexSpecEmit> = Vec::new();
874 let field_name = field
875 .ident
876 .as_ref()
877 .ok_or_else(|| syn::Error::new(field.span(), "expected named field"))?
878 .to_string();
879 for attr in &field.attrs {
880 if !attr.path().is_ident("obj") {
881 continue;
882 }
883 parse_one_field_attr(attr, field, &field_name, &mut specs)?;
884 }
885 Ok(specs)
886}
887
888/// Parse a single `#[obj(...)]` field attribute, contributing any
889/// `IndexSpecEmit` it declares into `specs`.
890fn parse_one_field_attr(
891 attr: &Attribute,
892 field: &Field,
893 field_name: &str,
894 specs: &mut Vec<IndexSpecEmit>,
895) -> syn::Result<()> {
896 let mut kind: Option<IndexKind> = None;
897 let mut custom_name: Option<String> = None;
898 attr.parse_nested_meta(|meta| {
899 if meta.path.is_ident("index") {
900 if kind.is_some() {
901 return Err(meta.error("`index` declared twice on the same field"));
902 }
903 let parsed = parse_index_kind(&meta)?;
904 if matches!(parsed, IndexKind::Composite(_)) {
905 return Err(syn::Error::new(
906 meta.path.span(),
907 "tuple-form `index = (\"a\", \"b\")` is struct-level only; \
908 place it directly above the struct, not on a field",
909 ));
910 }
911 kind = Some(parsed);
912 return Ok(());
913 }
914 if meta.path.is_ident("name") {
915 if custom_name.is_some() {
916 return Err(meta.error("`name` declared twice on the same field"));
917 }
918 let value = meta.value()?;
919 let lit: LitStr = value.parse()?;
920 let s = lit.value();
921 if s.is_empty() {
922 return Err(syn::Error::new(lit.span(), "index name must not be empty"));
923 }
924 custom_name = Some(s);
925 return Ok(());
926 }
927 Err(meta.error("unknown obj field attribute (expected `index`, `name`)"))
928 })?;
929 finalize_field_index(field, field_name, kind, custom_name, specs)
930}
931
932/// Decode the right-hand side of `index = ...` into an `IndexKind`.
933///
934/// Three syntactic shapes are accepted:
935///
936/// - `#[obj(index)]` (no `= ...`) → [`IndexKind::Standard`].
937/// - `#[obj(index = unique)]` / `#[obj(index = each)]` → the keyword
938/// variants. Field-level only; the struct-level caller rejects
939/// them with a more specific diagnostic.
940/// - `#[obj(index = ("a", "b", ...))]` → [`IndexKind::Composite`]
941/// over the listed field-name string literals. Struct-level only;
942/// the field-level caller rejects it for the same reason.
943///
944/// Single-element tuples (`("a",)`) are accepted and degenerate to
945/// `IndexKind::Composite(vec!["a".into()])`. The struct-level caller
946/// then runs the same field-existence + ≥-2 validation it runs for
947/// the long form, so a one-element short tuple produces the existing
948/// "composite needs ≥ 2 fields" diagnostic without bespoke handling.
949fn parse_index_kind(meta: &syn::meta::ParseNestedMeta<'_>) -> syn::Result<IndexKind> {
950 if !meta.input.peek(syn::Token![=]) {
951 return Ok(IndexKind::Standard);
952 }
953 let value = meta.value()?;
954 if value.peek(syn::token::Paren) {
955 let paths = parse_composite_paren_paths(value)?;
956 return Ok(IndexKind::Composite(paths));
957 }
958 let id: syn::Ident = value.parse().map_err(|_| {
959 syn::Error::new(
960 value.span(),
961 "expected one of: unique, each, or a tuple of field-name string literals like (\"a\", \"b\")",
962 )
963 })?;
964 if id == "unique" {
965 return Ok(IndexKind::Unique);
966 }
967 if id == "each" {
968 return Ok(IndexKind::Each);
969 }
970 Err(syn::Error::new(
971 id.span(),
972 "expected one of: unique, each (or omit `= ...` for a standard index)",
973 ))
974}
975
976/// Parse a parenthesised tuple of string literals into a `Vec<String>`.
977///
978/// Shared by the short composite form `index = ("a", "b")`; the long
979/// form `index_composite(fields = ("a", "b"))` runs through
980/// [`parse_composite_fields`] which wraps an outer
981/// `meta.value()` call before delegating here.
982///
983/// Non-`LitStr` entries (`(1, 2)`, `(foo, bar)`, …) produce a
984/// `syn::Error` pointing at the offending token with the message
985/// `expected a tuple of field-name string literals, e.g. ("a", "b")`,
986/// rather than the bare `expected string literal` diagnostic that
987/// `LitStr::parse` would otherwise emit.
988fn parse_composite_paren_paths(value: syn::parse::ParseStream<'_>) -> syn::Result<Vec<String>> {
989 const MAX_FIELDS: usize = 64;
990 let content;
991 syn::parenthesized!(content in value);
992 if content.is_empty() {
993 return Err(syn::Error::new(
994 content.span(),
995 "expected a tuple of field-name string literals, e.g. (\"a\", \"b\")",
996 ));
997 }
998 let mut out: Vec<String> = Vec::new();
999 // Bounded loop (Power-of-Ten Rule 2): a single derive attribute
1000 // can never reasonably span more than `MAX_FIELDS` columns; any
1001 // input exceeding that is malformed.
1002 while !content.is_empty() {
1003 if out.len() >= MAX_FIELDS {
1004 return Err(syn::Error::new(
1005 content.span(),
1006 "too many composite-index fields (limit 64)",
1007 ));
1008 }
1009 let lit: LitStr = content.parse().map_err(|e| {
1010 syn::Error::new(
1011 e.span(),
1012 "expected a tuple of field-name string literals, e.g. (\"a\", \"b\")",
1013 )
1014 })?;
1015 let s = lit.value();
1016 if s.is_empty() {
1017 return Err(syn::Error::new(
1018 lit.span(),
1019 "composite field name must not be empty",
1020 ));
1021 }
1022 out.push(s);
1023 if content.is_empty() {
1024 break;
1025 }
1026 content.parse::<syn::Token![,]>()?;
1027 }
1028 Ok(out)
1029}
1030
1031/// Combine the parsed `kind` + `custom_name` into an `IndexSpecEmit`.
1032/// Enforces the `each` ⇒ `Vec<_>` invariant.
1033fn finalize_field_index(
1034 field: &Field,
1035 field_name: &str,
1036 kind: Option<IndexKind>,
1037 custom_name: Option<String>,
1038 specs: &mut Vec<IndexSpecEmit>,
1039) -> syn::Result<()> {
1040 let Some(kind) = kind else {
1041 if custom_name.is_some() {
1042 return Err(syn::Error::new(
1043 field.span(),
1044 "`#[obj(name = \"...\")]` requires an `index` declaration on the same field",
1045 ));
1046 }
1047 return Ok(());
1048 };
1049 if matches!(kind, IndexKind::Each) && !type_is_vec(&field.ty) {
1050 return Err(syn::Error::new(
1051 field.ty.span(),
1052 "#[obj(index = each)] requires Vec<T>",
1053 ));
1054 }
1055 specs.push(IndexSpecEmit {
1056 kind,
1057 field_name: field_name.to_owned(),
1058 index_name: custom_name.unwrap_or_else(|| field_name.to_owned()),
1059 });
1060 Ok(())
1061}
1062
1063/// Cheap syntactic check: is `ty` a `Vec<...>`?
1064///
1065/// We accept any path whose last segment ident is `Vec`. That covers
1066/// `Vec`, `::std::vec::Vec`, `alloc::vec::Vec` etc. Anything else
1067/// (including `Option<Vec<T>>` or a typedef) is rejected — the user
1068/// can use `#[obj(index)]` instead.
1069fn type_is_vec(ty: &Type) -> bool {
1070 let Type::Path(TypePath { qself: None, path }) = ty else {
1071 return false;
1072 };
1073 match path.segments.last() {
1074 Some(seg) => seg.ident == "Vec",
1075 None => false,
1076 }
1077}
1078
1079#[cfg(test)]
1080mod tests {
1081 //! Internal proc-macro tests. Tests live next to the derive's
1082 //! helpers so they can exercise the emit pipeline without
1083 //! depending on the `obj` crate (which would be a cycle, since
1084 //! `obj` depends on `obj-derive`).
1085
1086 use super::*;
1087 use syn::parse_str;
1088
1089 /// M9 #80 exit gate: expanded code for a typical struct must
1090 /// stay under ~200 lines.
1091 ///
1092 /// "Typical struct" per the issue notes: 5 fields + 3 indexes.
1093 /// We pick a shape representative of an everyday user document:
1094 /// two scalar fields, one unique index, one each-index over a
1095 /// `Vec`, one composite spanning two of the scalars.
1096 #[test]
1097 fn typical_struct_expansion_is_under_200_lines() {
1098 let input: DeriveInput = parse_str(
1099 r#"
1100 #[obj(version = 2, collection = "orders")]
1101 #[obj(index_composite(fields = ("customer_id", "placed_at")))]
1102 struct Order {
1103 #[obj(index)]
1104 customer_id: u64,
1105 #[obj(index = unique)]
1106 order_no: String,
1107 #[obj(index = each)]
1108 tags: Vec<String>,
1109 placed_at: u64,
1110 total_cents: u64,
1111 }
1112 "#,
1113 )
1114 .expect("parse typical struct");
1115 let ts = emit_impl(&input).expect("emit");
1116 let expanded = ts.to_string();
1117 let line_count = expanded.lines().count();
1118 // proc-macro2's `to_string()` collapses formatting onto one
1119 // logical line per group. `lines()` gives the logical line
1120 // count post-collapse — for a real `cargo expand`-style
1121 // count we re-format via prettyplease-equivalent and count
1122 // those. To keep the test free of additional dependencies we
1123 // approximate by counting top-level groups using `;` + `{`
1124 // separators, which is a stable upper bound for an
1125 // unformatted impl block.
1126 let approx_lines = expanded.matches(';').count()
1127 + expanded.matches('{').count()
1128 + expanded.matches('}').count();
1129 assert!(
1130 approx_lines <= 200,
1131 "expanded `#[derive(Document)]` exceeds 200-line budget: \
1132 approx_lines = {approx_lines}; line_count = {line_count}; \
1133 expansion = {expanded}",
1134 );
1135 }
1136
1137 /// Bare-derive shape: emitted impl carries only COLLECTION +
1138 /// VERSION + the `// auto-generated` marker. Confirms #75's
1139 /// "minimal output" promise is still true after the M9 stack.
1140 #[test]
1141 fn bare_derive_expansion_is_small() {
1142 let input: DeriveInput = parse_str("struct Bare { x: u32 }").expect("parse");
1143 let ts = emit_impl(&input).expect("emit");
1144 let expanded = ts.to_string();
1145 // No `indexes()` override.
1146 assert!(
1147 !expanded.contains("fn indexes"),
1148 "bare derive must NOT emit an indexes() override (expanded: {expanded})",
1149 );
1150 // COLLECTION + VERSION are present.
1151 assert!(expanded.contains("COLLECTION"));
1152 assert!(expanded.contains("VERSION"));
1153 }
1154}