Skip to main content

obj_derive/
lib.rs

1//! `obj-derive` — procedural macros for `obj`.
2//!
3//! # ⚠️ UNSTABLE — consume via `obj-db`, not directly
4//!
5//! `obj-derive` is an implementation detail of `obj-db` (re-exported as
6//! `obj::Document`). It is published only so `obj-db` can depend on it and
7//! carries **no `SemVer` guarantee** as a standalone crate — depend on
8//! `obj-db` and write `#[derive(obj::Document)]`. Only `obj-db`'s public
9//! surface is frozen at 1.0; `obj-derive` is excluded from the public-api
10//! freeze gate (see `docs/public-api.md`).
11//!
12//! This crate provides `#[derive(obj::Document)]`, which emits the
13//! [`obj_core::Document`](https://docs.rs/obj/latest/obj/trait.Document.html)
14//! implementation for a user struct. The derive is intentionally
15//! small — it fills in the trait's associated constants
16//! (`COLLECTION`, `VERSION`) from optional `#[obj(...)]` attributes
17//! and emits an `indexes()` override whenever any field carries an
18//! `#[obj(index ...)]` attribute.
19//!
20//! # Supported attributes
21//!
22//! Struct-level (`#[obj(...)]` directly above the `struct` keyword):
23//!
24//! - `version = N` (integer ≥ 0) — sets `Document::VERSION`.
25//! - `collection = "name"` (non-empty string literal) — sets
26//!   `Document::COLLECTION`.
27//!
28//! Multiple `#[obj(...)]` attributes compose; the same scalar key
29//! (`version`, `collection`) declared twice is a compile error.
30//!
31//! Struct-level composite (one or more occurrences compose, each
32//! adding one `Composite` `IndexSpec`):
33//!
34//! - `index_composite(fields = ("a", "b"), name = "by_a_b")` — emit a
35//!   `Composite` `IndexSpec` spanning the listed fields. `name`
36//!   defaults to the field names joined with `__`. The referenced
37//!   fields must exist on the struct; fewer than two is a compile
38//!   error.
39//! - `index = ("a", "b")` — short form, equivalent to
40//!   `index_composite(fields = ("a", "b"))`. Same downstream
41//!   validation (≥ 2 fields, each declared on the struct). The
42//!   default index name is the fields joined with `__`; there is no
43//!   `name = "..."` slot on the short form — use `index_composite`
44//!   when a custom name is required. Both syntaxes coexist; the
45//!   short form mirrors `design.md` § Indexes verbatim.
46//!
47//! Field-level (`#[obj(...)]` on a struct field):
48//!
49//! - `index` — emit a `Standard` `IndexSpec` for this field.
50//! - `index = unique` — emit a `Unique` `IndexSpec` for this field.
51//! - `index = each` — emit an `Each` `IndexSpec` for this field. The
52//!   field type must syntactically be `Vec<...>` — otherwise the
53//!   derive errors at compile time.
54//! - `name = "..."` — alongside any `index = ...`, overrides the
55//!   default index name (which is the field name).
56//!
57//! Struct-level historical schema registry (M10 #82):
58//!
59//! - `history(v1 = OldType1, v2 = OldType2)` — emit a
60//!   `Document::historical_schemas()` override that lifts each
61//!   version into a `(version, DynamicSchema)` pair via
62//!   `<OldType as ::obj::Schema>::schema()`. The keys (`v1`, `v2`,
63//!   …) must be `vN` for integer `N` ≥ 1; the values are arbitrary
64//!   type paths. Each named type must implement `::obj::Schema`
65//!   (hand-impls are accepted; the derive auto-implements `Schema`
66//!   for types that opt in via `#[obj(history(...))]` or
67//!   `#[obj(schema)]`). Entries are emitted in ascending version
68//!   order.
69//! - `schema` — explicitly opt the current type into a derived
70//!   `Schema` impl WITHOUT declaring any history. Useful when the
71//!   type is referenced from a future version's `history(...)`.
72//!
73//! When either `history(...)` or `schema` is declared, the derive
74//! emits a companion `impl ::obj::Schema` block whose `schema()`
75//! body maps each field to a `DynamicSchema` variant. Scalar
76//! primitives (bool, u\*, i\*, f\*, String) map directly; `Vec<T>`
77//! maps to `DynamicSchema::seq(<T as Schema>::schema())`; anything
78//! else delegates via `<T as ::obj::Schema>::schema()`, which fails
79//! to compile if `T` lacks a `Schema` impl.
80//!
81//! # Serde requirements
82//!
83//! The derive does **NOT** emit `serde::Serialize` or
84//! `serde::Deserialize` for you. Users still write
85//! `#[derive(serde::Serialize, serde::Deserialize)]` on the struct
86//! alongside `#[derive(obj::Document)]`.
87//!
88//! # Power-of-ten posture
89//!
90//! - **Rule 4** — every function in this crate is ≤ 60 lines.
91//! - **Rule 7** — every fallible path returns `syn::Result<...>`;
92//!   `unwrap`/`expect` appear only on infallible primitives.
93//! - **Rule 9** — generated code is minimal and inspectable. Every
94//!   emitted item is prefixed by a `// auto-generated by
95//!   #[derive(Document)]` marker so `cargo expand` output is easy
96//!   to spot.
97
98#![forbid(unsafe_code)]
99#![deny(missing_docs)]
100#![deny(rustdoc::broken_intra_doc_links)]
101
102use proc_macro::TokenStream;
103use quote::quote;
104use syn::spanned::Spanned;
105use syn::{
106    parse_macro_input, Attribute, Data, DataStruct, DeriveInput, Field, Fields, LitInt, LitStr,
107    Type, TypePath,
108};
109
110/// Derive macro for `obj::Document`.
111///
112/// Emits `impl ::obj::Document for <Ident> { ... }` with sensible
113/// defaults:
114///
115/// - `COLLECTION` defaults to the unqualified type name as a string;
116///   `#[obj(collection = "explicit_name")]` overrides.
117/// - `VERSION` defaults to `1`; `#[obj(version = N)]` overrides.
118/// - `indexes()` is omitted (the trait default `Vec::new()` is used)
119///   when the struct carries no index-related attributes; otherwise
120///   the derive emits a `Vec<::obj::IndexSpec>` in field-declaration
121///   order.
122///
123/// All emitted paths are absolute (`::obj::Document`,
124/// `::obj::IndexSpec`) so the derive is hygienic against local items
125/// that shadow these names.
126#[proc_macro_derive(Document, attributes(obj))]
127pub fn derive_document(input: TokenStream) -> TokenStream {
128    let input = parse_macro_input!(input as DeriveInput);
129    match emit_impl(&input) {
130        Ok(ts) => ts.into(),
131        Err(err) => err.to_compile_error().into(),
132    }
133}
134
135/// Build the `impl ::obj::Document` block for `input`. Also emits
136/// the companion `impl ::obj::Schema` and a `historical_schemas()`
137/// override when the user supplied `#[obj(history(...))]`.
138///
139/// The `Schema` impl is emitted ONLY when at least one of these is
140/// true:
141///
142/// - the struct carries `#[obj(history(...))]` (the user is
143///   opting into schema-evolution at this site, and the current
144///   type needs a self-describing schema so that *future* versions
145///   can reference it via `history(vN = ThisType)`);
146/// - the struct carries `#[obj(schema)]` (explicit opt-in).
147///
148/// Bare-derive sites do NOT emit `Schema` — a `Document` with no
149/// historical versions has no need for one, and emitting the impl
150/// would require every nested field type to also be `Schema`
151/// (which is too aggressive an ask for types that never participate
152/// in migration).
153fn emit_impl(input: &DeriveInput) -> syn::Result<proc_macro2::TokenStream> {
154    let attrs = parse_struct_attrs(input)?;
155    // Enums get a `Schema`-only emission. A `Document` impl on an
156    // enum is rejected: collections store records, not bare variants.
157    // The user opts in to the Schema impl via `#[obj(schema)]` or
158    // `#[obj(history(...))]`; bare `#[derive(Document)]` on an enum
159    // is a compile error so the diagnostic is loud.
160    if matches!(input.data, Data::Enum(_)) {
161        if !attrs.emit_schema {
162            return Err(syn::Error::new(
163                input.span(),
164                "#[derive(obj::Document)] on an enum requires `#[obj(schema)]` \
165                 (or `#[obj(history(...))]`); an enum is never a Document itself",
166            ));
167        }
168        return emit_schema_impl(input);
169    }
170    emit_struct_impl(input, &attrs)
171}
172
173/// Build the `impl ::obj::Document` block for a struct + emit the
174/// companion `impl ::obj::Schema` when `attrs.emit_schema` is set.
175fn emit_struct_impl(
176    input: &DeriveInput,
177    attrs: &StructAttrs,
178) -> syn::Result<proc_macro2::TokenStream> {
179    let ident = &input.ident;
180    let collection = attrs
181        .collection
182        .clone()
183        .unwrap_or_else(|| ident.to_string());
184    let version: u32 = attrs.version.unwrap_or(1);
185    let mut index_specs = collect_field_indexes(input)?;
186    let composite_specs = validate_and_lift_composites(input, &attrs.composites)?;
187    index_specs.extend(composite_specs);
188    let indexes_body = emit_indexes_body(&index_specs);
189    let schema_impl = if attrs.emit_schema {
190        emit_schema_impl(input)?
191    } else {
192        proc_macro2::TokenStream::new()
193    };
194    let history_body = emit_history_body(&attrs.history);
195    let out = quote! {
196        // auto-generated by #[derive(Document)]
197        #[automatically_derived]
198        impl ::obj::Document for #ident {
199            const COLLECTION: &'static str = #collection;
200            const VERSION: u32 = #version;
201            #indexes_body
202            #history_body
203        }
204        #schema_impl
205    };
206    Ok(out)
207}
208
209/// Emit an `impl ::obj::Schema for <Ident>` block whose `schema()`
210/// returns the `DynamicSchema::Map(...)` corresponding to the
211/// struct's declared fields.
212///
213/// The mapping from Rust field type to `DynamicSchema` is the
214/// syntactic table documented in `obj_core::codec::schema`:
215/// scalar primitives map directly; `Vec<T>` maps to
216/// `DynamicSchema::seq(<T as Schema>::schema())`; anything else is
217/// treated as a `Schema`-implementing path and delegates via
218/// `<T as ::obj::Schema>::schema()`.
219fn emit_schema_impl(input: &DeriveInput) -> syn::Result<proc_macro2::TokenStream> {
220    let ident = &input.ident;
221    let body = match &input.data {
222        Data::Struct(_) => emit_schema_body_struct(input)?,
223        Data::Enum(data) => emit_schema_body_enum(data)?,
224        Data::Union(_) => {
225            return Err(syn::Error::new(
226                input.span(),
227                "#[derive(obj::Document)] does not support unions",
228            ));
229        }
230    };
231    Ok(quote! {
232        // auto-generated by #[derive(Document)]
233        #[automatically_derived]
234        impl ::obj::Schema for #ident {
235            fn schema() -> ::obj::DynamicSchema {
236                #body
237            }
238        }
239    })
240}
241
242/// Build the `Schema::schema()` body for a struct: a
243/// `DynamicSchema::Map` over each named field's syntactic type.
244fn emit_schema_body_struct(input: &DeriveInput) -> syn::Result<proc_macro2::TokenStream> {
245    let fields = named_fields(input)?;
246    let entries = fields
247        .iter()
248        .map(|f| {
249            let name = named_field_name(f)?;
250            let ty_schema = field_type_to_schema(&f.ty);
251            Ok(quote! { (::std::string::String::from(#name), #ty_schema) })
252        })
253        .collect::<syn::Result<Vec<_>>>()?;
254    Ok(quote! {
255        ::obj::DynamicSchema::Map(::std::vec![ #( #entries ),* ])
256    })
257}
258
259/// Pull the string name out of a named struct/variant field, returning
260/// a `syn::Error` (never a panic) if the field has no `ident`.
261///
262/// `syn` only constructs `Field` values with `ident == None` inside
263/// `Fields::Unnamed`. Every call site here is already guarded by a
264/// `Fields::Named(_)` pattern, so the `None` branch is structurally
265/// unreachable — but Power-of-Ten Rule 7 forbids panicking unwraps
266/// in production paths regardless. A surfaced `syn::Error` is the
267/// safe fallback if a future refactor breaks the invariant.
268fn named_field_name(field: &Field) -> syn::Result<String> {
269    field
270        .ident
271        .as_ref()
272        .map(ToString::to_string)
273        .ok_or_else(|| syn::Error::new(field.span(), "expected named field"))
274}
275
276/// Build the `Schema::schema()` body for an enum: a
277/// `DynamicSchema::Enum` over each variant in declaration order
278/// (postcard assigns discriminants by declaration order; the derive
279/// matches that). Unit variants get `Null` payloads; newtype
280/// variants get the inner type's schema; tuple variants get a
281/// synthetic `Map` keyed by `"0"`, `"1"`, …; struct variants get a
282/// `Map` keyed by the field names.
283fn emit_schema_body_enum(data: &syn::DataEnum) -> syn::Result<proc_macro2::TokenStream> {
284    let entries = data
285        .variants
286        .iter()
287        .enumerate()
288        .map(|(idx, v)| {
289            let discriminant = u32::try_from(idx).unwrap_or(u32::MAX);
290            let name = v.ident.to_string();
291            let payload = variant_payload_schema(&v.fields)?;
292            Ok(quote! {
293                ::obj::EnumVariantSchema::new(
294                    #discriminant,
295                    #name,
296                    #payload,
297                )
298            })
299        })
300        .collect::<syn::Result<Vec<_>>>()?;
301    Ok(quote! {
302        ::obj::DynamicSchema::Enum(::std::vec![ #( #entries ),* ])
303    })
304}
305
306/// Map an enum variant's `Fields` shape to the token stream that
307/// constructs its payload [`DynamicSchema`] at runtime.
308fn variant_payload_schema(fields: &Fields) -> syn::Result<proc_macro2::TokenStream> {
309    match fields {
310        Fields::Unit => Ok(quote! { ::obj::DynamicSchema::Null }),
311        Fields::Unnamed(unnamed) => {
312            // Newtype variant `V(T)` → use `T`'s schema directly. Tuple
313            // variants `V(T, U, ...)` → synthesise a Map keyed by
314            // `"0"`, `"1"`, …; postcard writes the inner fields
315            // positionally, same wire shape as a struct's bytes.
316            let count = unnamed.unnamed.len();
317            if count == 1 {
318                let ty = &unnamed.unnamed[0].ty;
319                Ok(field_type_to_schema(ty))
320            } else {
321                let entries = unnamed.unnamed.iter().enumerate().map(|(i, f)| {
322                    let key = i.to_string();
323                    let ty_schema = field_type_to_schema(&f.ty);
324                    quote! { (::std::string::String::from(#key), #ty_schema) }
325                });
326                Ok(quote! {
327                    ::obj::DynamicSchema::Map(::std::vec![ #( #entries ),* ])
328                })
329            }
330        }
331        Fields::Named(named) => {
332            let entries = named
333                .named
334                .iter()
335                .map(|f| {
336                    let name = named_field_name(f)?;
337                    let ty_schema = field_type_to_schema(&f.ty);
338                    Ok(quote! { (::std::string::String::from(#name), #ty_schema) })
339                })
340                .collect::<syn::Result<Vec<_>>>()?;
341            Ok(quote! {
342                ::obj::DynamicSchema::Map(::std::vec![ #( #entries ),* ])
343            })
344        }
345    }
346}
347
348/// Emit either a `Document::historical_schemas()` override or an
349/// empty token stream (which leaves the trait default in place).
350fn emit_history_body(entries: &[HistoryAttr]) -> proc_macro2::TokenStream {
351    if entries.is_empty() {
352        return proc_macro2::TokenStream::new();
353    }
354    // Pre-sort entries by version so the emitted vector is sorted
355    // ascending — the codec's `decode` dispatch binary-searches and
356    // debug-asserts on order.
357    let mut sorted: Vec<&HistoryAttr> = entries.iter().collect();
358    sorted.sort_by_key(|h| h.version);
359    let items = sorted.iter().map(|h| {
360        let version = h.version;
361        let path = &h.ty_path;
362        quote! { (#version, <#path as ::obj::Schema>::schema()) }
363    });
364    quote! {
365        fn historical_schemas() -> ::std::vec::Vec<(u32, ::obj::DynamicSchema)> {
366            // auto-generated by #[derive(Document)]
367            ::std::vec![ #( #items ),* ]
368        }
369    }
370}
371
372/// Map a struct field's syntactic Rust type to a token-stream that
373/// constructs a [`DynamicSchema`] value at runtime.
374fn field_type_to_schema(ty: &Type) -> proc_macro2::TokenStream {
375    if let Some(name) = scalar_schema_for(ty) {
376        let ident = quote::format_ident!("{name}");
377        return quote! { ::obj::DynamicSchema::#ident };
378    }
379    if let Some(inner) = vec_inner_type(ty) {
380        let inner_schema = field_type_to_schema(inner);
381        return quote! { ::obj::DynamicSchema::seq(#inner_schema) };
382    }
383    // Fallback: treat the type as `Schema`-implementing. This is
384    // the path used for nested user structs and for any type the
385    // syntactic scan does not recognise — the resulting expansion
386    // fails to compile if the type lacks a `Schema` impl, which is
387    // the diagnostic we want.
388    quote! { <#ty as ::obj::Schema>::schema() }
389}
390
391/// Return the [`DynamicSchema`] variant name for `ty` if `ty` is one
392/// of the built-in scalars; `None` otherwise. The result is used by
393/// [`field_type_to_schema`] to construct the leaf token stream.
394fn scalar_schema_for(ty: &Type) -> Option<&'static str> {
395    let Type::Path(TypePath { qself: None, path }) = ty else {
396        return None;
397    };
398    let segment = path.segments.last()?;
399    if !segment.arguments.is_none() {
400        return None;
401    }
402    let s = segment.ident.to_string();
403    match s.as_str() {
404        "bool" => Some("Bool"),
405        "u8" | "u16" | "u32" | "u64" | "usize" => Some("U64"),
406        "i8" | "i16" | "i32" | "i64" | "isize" => Some("I64"),
407        "f32" | "f64" => Some("F64"),
408        "String" => Some("String"),
409        _ => None,
410    }
411}
412
413/// If `ty` is `Vec<T>`, return `&T`; otherwise `None`.
414fn vec_inner_type(ty: &Type) -> Option<&Type> {
415    let Type::Path(TypePath { qself: None, path }) = ty else {
416        return None;
417    };
418    let seg = path.segments.last()?;
419    if seg.ident != "Vec" {
420        return None;
421    }
422    let syn::PathArguments::AngleBracketed(args) = &seg.arguments else {
423        return None;
424    };
425    args.args.iter().find_map(|a| match a {
426        syn::GenericArgument::Type(t) => Some(t),
427        _ => None,
428    })
429}
430
431/// Validate every composite declaration against the struct's named
432/// fields and lift each one into an `IndexSpecEmit`. Errors on:
433///
434/// - composite with fewer than 2 fields,
435/// - a referenced field name that is not declared on the struct.
436fn validate_and_lift_composites(
437    input: &DeriveInput,
438    composites: &[CompositeAttr],
439) -> syn::Result<Vec<IndexSpecEmit>> {
440    if composites.is_empty() {
441        return Ok(Vec::new());
442    }
443    let fields = named_fields(input)?;
444    let known: std::collections::HashSet<String> = fields
445        .iter()
446        .filter_map(|f| f.ident.as_ref().map(ToString::to_string))
447        .collect();
448    let mut out: Vec<IndexSpecEmit> = Vec::with_capacity(composites.len());
449    for c in composites {
450        if c.fields.len() < 2 {
451            return Err(syn::Error::new(c.span, "composite needs ≥ 2 fields"));
452        }
453        for field in &c.fields {
454            if !known.contains(field) {
455                return Err(syn::Error::new(
456                    c.span,
457                    format!("field '{field}' not declared on struct"),
458                ));
459            }
460        }
461        let index_name = c.custom_name.clone().unwrap_or_else(|| c.fields.join("__"));
462        out.push(IndexSpecEmit {
463            kind: IndexKind::Composite(c.fields.clone()),
464            field_name: String::new(),
465            index_name,
466        });
467    }
468    Ok(out)
469}
470
471/// Emit either an `indexes()` override or an empty token stream
472/// (which leaves the trait default in place).
473fn emit_indexes_body(specs: &[IndexSpecEmit]) -> proc_macro2::TokenStream {
474    if specs.is_empty() {
475        return proc_macro2::TokenStream::new();
476    }
477    let entries = specs.iter().map(IndexSpecEmit::emit);
478    quote! {
479        fn indexes() -> ::std::vec::Vec<::obj::IndexSpec> {
480            // auto-generated by #[derive(Document)]
481            //
482            // Each entry is an `IndexSpec::{standard,unique,each,composite}`
483            // call returning `Result`. Inputs were validated at derive
484            // expansion time, so the error arm is statically unreachable;
485            // we still handle it explicitly (push only `Ok`) so the
486            // generated code is panic-free (Power-of-Ten Rule 7).
487            let mut out: ::std::vec::Vec<::obj::IndexSpec> = ::std::vec::Vec::new();
488            #(
489                if let ::std::result::Result::Ok(spec) = #entries {
490                    out.push(spec);
491                }
492            )*
493            out
494        }
495    }
496}
497
498/// One parsed `#[obj(index_composite(...))]` declaration.
499#[derive(Debug)]
500struct CompositeAttr {
501    /// User-provided field names. Each MUST exist on the struct.
502    fields: Vec<String>,
503    /// Optional `name = "..."` override; default is the fields joined
504    /// with `__`.
505    custom_name: Option<String>,
506    /// Span used for "field 'x' not declared on struct" diagnostics.
507    span: proc_macro2::Span,
508}
509
510/// Parsed struct-level attributes.
511#[derive(Default, Debug)]
512struct StructAttrs {
513    /// `#[obj(version = N)]` override.
514    version: Option<u32>,
515    /// `#[obj(collection = "name")]` override.
516    collection: Option<String>,
517    /// Zero or more `#[obj(index_composite(...))]` declarations,
518    /// preserved in declaration order so the emitted `indexes()` is
519    /// deterministic.
520    composites: Vec<CompositeAttr>,
521    /// `#[obj(history(v1 = Type1, v2 = Type2))]` entries — one per
522    /// historical version. Parsed in declaration order; the emitter
523    /// re-sorts by `version` before emitting.
524    history: Vec<HistoryAttr>,
525    /// `true` iff the user opted into emitting a companion
526    /// `impl ::obj::Schema` block. Set implicitly when
527    /// `#[obj(history(...))]` is present (the current type needs a
528    /// `Schema` impl so future versions can reference it from their
529    /// own `history(...)`), or explicitly via `#[obj(schema)]`.
530    emit_schema: bool,
531}
532
533/// One `vN = Type` pair from a `#[obj(history(...))]` declaration.
534#[derive(Debug)]
535struct HistoryAttr {
536    /// Version number parsed from the `vN` key.
537    version: u32,
538    /// The Rust type path naming the historical schema producer.
539    ty_path: syn::Path,
540}
541
542/// Walk every `#[obj(...)]` on the struct and merge them into a
543/// single `StructAttrs`. Duplicates (within one `#[obj(...)]` OR
544/// across two) error.
545fn parse_struct_attrs(input: &DeriveInput) -> syn::Result<StructAttrs> {
546    let mut acc = StructAttrs::default();
547    for attr in &input.attrs {
548        if !attr.path().is_ident("obj") {
549            continue;
550        }
551        parse_one_struct_attr(attr, &mut acc)?;
552    }
553    Ok(acc)
554}
555
556/// Parse a single `#[obj(...)]` attribute into `acc`. Duplicate
557/// scalar keys (within this attribute OR already present in `acc`)
558/// error; `index_composite(...)` / `history(...)` are non-scalar
559/// and append new entries.
560fn parse_one_struct_attr(attr: &Attribute, acc: &mut StructAttrs) -> syn::Result<()> {
561    attr.parse_nested_meta(|meta| {
562        if meta.path.is_ident("version") {
563            return parse_struct_version(&meta, acc);
564        }
565        if meta.path.is_ident("collection") {
566            return parse_struct_collection(&meta, acc);
567        }
568        if meta.path.is_ident("index_composite") {
569            let composite = parse_index_composite(&meta)?;
570            acc.composites.push(composite);
571            return Ok(());
572        }
573        if meta.path.is_ident("index") {
574            let composite = parse_struct_index_short(&meta)?;
575            acc.composites.push(composite);
576            return Ok(());
577        }
578        if meta.path.is_ident("history") {
579            parse_history(&meta, acc)?;
580            // Opting into history implies opting into Schema —
581            // future versions will reference this type from their
582            // own history(...) and need a `Schema` impl to lift it
583            // into a `DynamicSchema`.
584            acc.emit_schema = true;
585            return Ok(());
586        }
587        if meta.path.is_ident("schema") {
588            if acc.emit_schema {
589                // Redundant with an earlier `history` /
590                // `schema` declaration; surface anyway so the user
591                // notices the duplication.
592                return Err(meta.error("`schema` declared twice or already implied by `history`"));
593            }
594            acc.emit_schema = true;
595            return Ok(());
596        }
597        Err(meta.error(
598            "unknown obj attribute (expected `version`, `collection`, `index`, `index_composite`, `history`, or `schema`)",
599        ))
600    })
601}
602
603/// Parse the short composite-index form `#[obj(index = ("a", "b"))]`
604/// at struct level. The only valid RHS is a parenthesised tuple of
605/// string literals — `unique` / `each` / a bare path are field-level
606/// shapes and yield a struct-level diagnostic that points back at
607/// `index_composite` / field-level placement.
608///
609/// The returned [`CompositeAttr`] is validated downstream by
610/// [`validate_and_lift_composites`], which already enforces the
611/// `≥ 2 fields` and "field declared on struct" invariants — both the
612/// long and short forms share the same downstream gate.
613fn parse_struct_index_short(meta: &syn::meta::ParseNestedMeta<'_>) -> syn::Result<CompositeAttr> {
614    let span = meta.path.span();
615    let kind = parse_index_kind(meta)?;
616    match kind {
617        IndexKind::Composite(fields) => Ok(CompositeAttr {
618            fields,
619            custom_name: None,
620            span,
621        }),
622        _ => Err(syn::Error::new(
623            span,
624            "struct-level `index = ...` only accepts a tuple of field-name string literals \
625             (e.g. `index = (\"a\", \"b\")`); place `index`, `index = unique`, or `index = each` \
626             on a field instead",
627        )),
628    }
629}
630
631/// Parse `history(v1 = Type1, v2 = Type2, ...)`. Each key is of the
632/// form `vN` for a `u32` `N`; the value is a Rust path naming a
633/// `Schema`-implementing type. Pushes one `HistoryAttr` per pair
634/// into `acc.history` (preserving declaration order; the emitter
635/// re-sorts before emitting).
636fn parse_history(meta: &syn::meta::ParseNestedMeta<'_>, acc: &mut StructAttrs) -> syn::Result<()> {
637    meta.parse_nested_meta(|inner| {
638        let ident = inner
639            .path
640            .get_ident()
641            .ok_or_else(|| inner.error("expected `vN = Type` key"))?;
642        let key = ident.to_string();
643        let version = parse_history_key(&key).ok_or_else(|| {
644            syn::Error::new(
645                ident.span(),
646                "history keys must be of the form `vN` (e.g. `v1`, `v2`, ...)",
647            )
648        })?;
649        if acc.history.iter().any(|h| h.version == version) {
650            return Err(syn::Error::new(
651                ident.span(),
652                format!("history key `v{version}` declared twice"),
653            ));
654        }
655        let value = inner.value()?;
656        let ty_path: syn::Path = value.parse()?;
657        acc.history.push(HistoryAttr { version, ty_path });
658        Ok(())
659    })
660}
661
662/// Decode the `vN` key shape into the numeric version. Returns
663/// `None` on any other shape.
664fn parse_history_key(key: &str) -> Option<u32> {
665    let rest = key.strip_prefix('v')?;
666    rest.parse::<u32>().ok()
667}
668
669/// Parse `version = N`.
670fn parse_struct_version(
671    meta: &syn::meta::ParseNestedMeta<'_>,
672    acc: &mut StructAttrs,
673) -> syn::Result<()> {
674    if acc.version.is_some() {
675        return Err(meta.error("`version` declared twice"));
676    }
677    let value = meta.value()?;
678    let lit: LitInt = value.parse()?;
679    let n: u32 = lit
680        .base10_parse()
681        .map_err(|_| syn::Error::new(lit.span(), "expected unsigned integer for `version`"))?;
682    acc.version = Some(n);
683    Ok(())
684}
685
686/// Parse `collection = "name"`.
687fn parse_struct_collection(
688    meta: &syn::meta::ParseNestedMeta<'_>,
689    acc: &mut StructAttrs,
690) -> syn::Result<()> {
691    if acc.collection.is_some() {
692        return Err(meta.error("`collection` declared twice"));
693    }
694    let value = meta.value()?;
695    let lit: LitStr = value.parse()?;
696    let s = lit.value();
697    if s.is_empty() {
698        return Err(syn::Error::new(
699            lit.span(),
700            "collection name must not be empty",
701        ));
702    }
703    acc.collection = Some(s);
704    Ok(())
705}
706
707/// Parse `index_composite(fields = ("a", "b"), name = "by_a_b")`.
708///
709/// `fields` is required. `name` is optional and defaults to the
710/// fields joined with `__`. Field-existence validation runs after
711/// the struct's named fields are known — see
712/// `validate_and_emit_composites`.
713fn parse_index_composite(meta: &syn::meta::ParseNestedMeta<'_>) -> syn::Result<CompositeAttr> {
714    let span = meta.path.span();
715    let mut fields: Option<Vec<String>> = None;
716    let mut custom_name: Option<String> = None;
717    meta.parse_nested_meta(|inner| {
718        if inner.path.is_ident("fields") {
719            if fields.is_some() {
720                return Err(inner.error("`fields` declared twice"));
721            }
722            fields = Some(parse_composite_fields(&inner)?);
723            return Ok(());
724        }
725        if inner.path.is_ident("name") {
726            if custom_name.is_some() {
727                return Err(inner.error("`name` declared twice"));
728            }
729            let value = inner.value()?;
730            let lit: LitStr = value.parse()?;
731            let s = lit.value();
732            if s.is_empty() {
733                return Err(syn::Error::new(
734                    lit.span(),
735                    "composite index name must not be empty",
736                ));
737            }
738            custom_name = Some(s);
739            return Ok(());
740        }
741        Err(inner.error("expected `fields = (...)` or `name = \"...\"`"))
742    })?;
743    let fields = fields.ok_or_else(|| {
744        syn::Error::new(
745            span,
746            "index_composite requires `fields = (\"a\", \"b\", ...)`",
747        )
748    })?;
749    Ok(CompositeAttr {
750        fields,
751        custom_name,
752        span,
753    })
754}
755
756/// Parse the `fields = ("a", "b", ...)` parenthesised tuple of
757/// string literals. Returns the literal values verbatim.
758///
759/// Delegates to [`parse_composite_paren_paths`] so the long-form
760/// (`index_composite(fields = (...))`) and short-form
761/// (`index = (...)`) syntaxes go through one shared parser.
762fn parse_composite_fields(meta: &syn::meta::ParseNestedMeta<'_>) -> syn::Result<Vec<String>> {
763    let value = meta.value()?;
764    parse_composite_paren_paths(value)
765}
766
767/// Index-kind discriminator parsed from `#[obj(index = ...)]` or
768/// `#[obj(index_composite(...))]`.
769#[derive(Debug, Clone)]
770enum IndexKind {
771    Standard,
772    Unique,
773    Each,
774    /// Composite over the listed field paths (always ≥ 2).
775    Composite(Vec<String>),
776}
777
778/// One index emitted by the derive — carries the kind discriminator
779/// and the (key path, index name) pair to render.
780#[derive(Debug)]
781struct IndexSpecEmit {
782    kind: IndexKind,
783    /// The single struct field this index reads from (Standard /
784    /// Unique / Each). Unused for `Composite` — paths live inside
785    /// `IndexKind::Composite(...)`.
786    field_name: String,
787    /// User override via `#[obj(index, name = "...")]` or
788    /// `index_composite(name = "...")`, or the default name if none
789    /// was provided.
790    index_name: String,
791}
792
793impl IndexSpecEmit {
794    /// Emit the constructor call for this spec.
795    ///
796    /// We route through the kind-specific `IndexSpec` constructors
797    /// (`IndexSpec::standard` / `::unique` / `::each` / `::composite`)
798    /// rather than a struct literal: `IndexSpec` is `#[non_exhaustive]`
799    /// and so cannot be struct-literal-constructed from a downstream
800    /// user crate. The constructors return `Result`, but the derive
801    /// has already validated their inputs at proc-macro time (empty
802    /// struct field names are syntactically impossible, empty
803    /// `name = "..."` is rejected at parse time, and composites are
804    /// checked for ≥ 2 fields). The emitted code therefore handles the
805    /// (statically-unreachable) error arm by skipping rather than
806    /// panicking — keeping the generated `indexes()` panic-free.
807    fn emit(&self) -> proc_macro2::TokenStream {
808        let name = &self.index_name;
809        match &self.kind {
810            IndexKind::Standard => self.emit_scalar(name, &quote! { standard }),
811            IndexKind::Unique => self.emit_scalar(name, &quote! { unique }),
812            IndexKind::Each => self.emit_scalar(name, &quote! { each }),
813            IndexKind::Composite(paths) => Self::emit_composite(name, paths),
814        }
815    }
816
817    fn emit_scalar(&self, name: &str, ctor: &proc_macro2::TokenStream) -> proc_macro2::TokenStream {
818        let path = &self.field_name;
819        quote! {
820            ::obj::IndexSpec::#ctor(
821                ::std::string::String::from(#name),
822                ::std::string::String::from(#path),
823            )
824        }
825    }
826
827    fn emit_composite(name: &str, paths: &[String]) -> proc_macro2::TokenStream {
828        let path_tokens = paths.iter().map(|p| quote! { #p });
829        quote! {
830            ::obj::IndexSpec::composite(
831                ::std::string::String::from(#name),
832                &[ #( #path_tokens ),* ],
833            )
834        }
835    }
836}
837
838/// Iterate the struct's fields and collect every field-level
839/// `#[obj(index ...)]` declaration in declaration order.
840fn collect_field_indexes(input: &DeriveInput) -> syn::Result<Vec<IndexSpecEmit>> {
841    let fields = named_fields(input)?;
842    let mut out: Vec<IndexSpecEmit> = Vec::new();
843    for field in fields {
844        for spec in parse_field_attrs(field)? {
845            out.push(spec);
846        }
847    }
848    Ok(out)
849}
850
851/// Extract `&FieldsNamed` from the `DeriveInput`. The derive is
852/// defined only for braced structs; anything else is a compile
853/// error at the struct's span.
854fn named_fields(
855    input: &DeriveInput,
856) -> syn::Result<&syn::punctuated::Punctuated<Field, syn::Token![,]>> {
857    match &input.data {
858        Data::Struct(DataStruct {
859            fields: Fields::Named(named),
860            ..
861        }) => Ok(&named.named),
862        _ => Err(syn::Error::new(
863            input.span(),
864            "#[derive(obj::Document)] only supports structs with named fields",
865        )),
866    }
867}
868
869/// Parse all `#[obj(...)]` attributes on a single field. Returns the
870/// list of `IndexSpecEmit`s contributed by this field (typically 0 or
871/// 1, but multiple `#[obj(index ...)]` attributes compose).
872fn parse_field_attrs(field: &Field) -> syn::Result<Vec<IndexSpecEmit>> {
873    let mut specs: Vec<IndexSpecEmit> = Vec::new();
874    let field_name = field
875        .ident
876        .as_ref()
877        .ok_or_else(|| syn::Error::new(field.span(), "expected named field"))?
878        .to_string();
879    for attr in &field.attrs {
880        if !attr.path().is_ident("obj") {
881            continue;
882        }
883        parse_one_field_attr(attr, field, &field_name, &mut specs)?;
884    }
885    Ok(specs)
886}
887
888/// Parse a single `#[obj(...)]` field attribute, contributing any
889/// `IndexSpecEmit` it declares into `specs`.
890fn parse_one_field_attr(
891    attr: &Attribute,
892    field: &Field,
893    field_name: &str,
894    specs: &mut Vec<IndexSpecEmit>,
895) -> syn::Result<()> {
896    let mut kind: Option<IndexKind> = None;
897    let mut custom_name: Option<String> = None;
898    attr.parse_nested_meta(|meta| {
899        if meta.path.is_ident("index") {
900            if kind.is_some() {
901                return Err(meta.error("`index` declared twice on the same field"));
902            }
903            let parsed = parse_index_kind(&meta)?;
904            if matches!(parsed, IndexKind::Composite(_)) {
905                return Err(syn::Error::new(
906                    meta.path.span(),
907                    "tuple-form `index = (\"a\", \"b\")` is struct-level only; \
908                     place it directly above the struct, not on a field",
909                ));
910            }
911            kind = Some(parsed);
912            return Ok(());
913        }
914        if meta.path.is_ident("name") {
915            if custom_name.is_some() {
916                return Err(meta.error("`name` declared twice on the same field"));
917            }
918            let value = meta.value()?;
919            let lit: LitStr = value.parse()?;
920            let s = lit.value();
921            if s.is_empty() {
922                return Err(syn::Error::new(lit.span(), "index name must not be empty"));
923            }
924            custom_name = Some(s);
925            return Ok(());
926        }
927        Err(meta.error("unknown obj field attribute (expected `index`, `name`)"))
928    })?;
929    finalize_field_index(field, field_name, kind, custom_name, specs)
930}
931
932/// Decode the right-hand side of `index = ...` into an `IndexKind`.
933///
934/// Three syntactic shapes are accepted:
935///
936/// - `#[obj(index)]` (no `= ...`) → [`IndexKind::Standard`].
937/// - `#[obj(index = unique)]` / `#[obj(index = each)]` → the keyword
938///   variants. Field-level only; the struct-level caller rejects
939///   them with a more specific diagnostic.
940/// - `#[obj(index = ("a", "b", ...))]` → [`IndexKind::Composite`]
941///   over the listed field-name string literals. Struct-level only;
942///   the field-level caller rejects it for the same reason.
943///
944/// Single-element tuples (`("a",)`) are accepted and degenerate to
945/// `IndexKind::Composite(vec!["a".into()])`. The struct-level caller
946/// then runs the same field-existence + ≥-2 validation it runs for
947/// the long form, so a one-element short tuple produces the existing
948/// "composite needs ≥ 2 fields" diagnostic without bespoke handling.
949fn parse_index_kind(meta: &syn::meta::ParseNestedMeta<'_>) -> syn::Result<IndexKind> {
950    if !meta.input.peek(syn::Token![=]) {
951        return Ok(IndexKind::Standard);
952    }
953    let value = meta.value()?;
954    if value.peek(syn::token::Paren) {
955        let paths = parse_composite_paren_paths(value)?;
956        return Ok(IndexKind::Composite(paths));
957    }
958    let id: syn::Ident = value.parse().map_err(|_| {
959        syn::Error::new(
960            value.span(),
961            "expected one of: unique, each, or a tuple of field-name string literals like (\"a\", \"b\")",
962        )
963    })?;
964    if id == "unique" {
965        return Ok(IndexKind::Unique);
966    }
967    if id == "each" {
968        return Ok(IndexKind::Each);
969    }
970    Err(syn::Error::new(
971        id.span(),
972        "expected one of: unique, each (or omit `= ...` for a standard index)",
973    ))
974}
975
976/// Parse a parenthesised tuple of string literals into a `Vec<String>`.
977///
978/// Shared by the short composite form `index = ("a", "b")`; the long
979/// form `index_composite(fields = ("a", "b"))` runs through
980/// [`parse_composite_fields`] which wraps an outer
981/// `meta.value()` call before delegating here.
982///
983/// Non-`LitStr` entries (`(1, 2)`, `(foo, bar)`, …) produce a
984/// `syn::Error` pointing at the offending token with the message
985/// `expected a tuple of field-name string literals, e.g. ("a", "b")`,
986/// rather than the bare `expected string literal` diagnostic that
987/// `LitStr::parse` would otherwise emit.
988fn parse_composite_paren_paths(value: syn::parse::ParseStream<'_>) -> syn::Result<Vec<String>> {
989    const MAX_FIELDS: usize = 64;
990    let content;
991    syn::parenthesized!(content in value);
992    if content.is_empty() {
993        return Err(syn::Error::new(
994            content.span(),
995            "expected a tuple of field-name string literals, e.g. (\"a\", \"b\")",
996        ));
997    }
998    let mut out: Vec<String> = Vec::new();
999    // Bounded loop (Power-of-Ten Rule 2): a single derive attribute
1000    // can never reasonably span more than `MAX_FIELDS` columns; any
1001    // input exceeding that is malformed.
1002    while !content.is_empty() {
1003        if out.len() >= MAX_FIELDS {
1004            return Err(syn::Error::new(
1005                content.span(),
1006                "too many composite-index fields (limit 64)",
1007            ));
1008        }
1009        let lit: LitStr = content.parse().map_err(|e| {
1010            syn::Error::new(
1011                e.span(),
1012                "expected a tuple of field-name string literals, e.g. (\"a\", \"b\")",
1013            )
1014        })?;
1015        let s = lit.value();
1016        if s.is_empty() {
1017            return Err(syn::Error::new(
1018                lit.span(),
1019                "composite field name must not be empty",
1020            ));
1021        }
1022        out.push(s);
1023        if content.is_empty() {
1024            break;
1025        }
1026        content.parse::<syn::Token![,]>()?;
1027    }
1028    Ok(out)
1029}
1030
1031/// Combine the parsed `kind` + `custom_name` into an `IndexSpecEmit`.
1032/// Enforces the `each` ⇒ `Vec<_>` invariant.
1033fn finalize_field_index(
1034    field: &Field,
1035    field_name: &str,
1036    kind: Option<IndexKind>,
1037    custom_name: Option<String>,
1038    specs: &mut Vec<IndexSpecEmit>,
1039) -> syn::Result<()> {
1040    let Some(kind) = kind else {
1041        if custom_name.is_some() {
1042            return Err(syn::Error::new(
1043                field.span(),
1044                "`#[obj(name = \"...\")]` requires an `index` declaration on the same field",
1045            ));
1046        }
1047        return Ok(());
1048    };
1049    if matches!(kind, IndexKind::Each) && !type_is_vec(&field.ty) {
1050        return Err(syn::Error::new(
1051            field.ty.span(),
1052            "#[obj(index = each)] requires Vec<T>",
1053        ));
1054    }
1055    specs.push(IndexSpecEmit {
1056        kind,
1057        field_name: field_name.to_owned(),
1058        index_name: custom_name.unwrap_or_else(|| field_name.to_owned()),
1059    });
1060    Ok(())
1061}
1062
1063/// Cheap syntactic check: is `ty` a `Vec<...>`?
1064///
1065/// We accept any path whose last segment ident is `Vec`. That covers
1066/// `Vec`, `::std::vec::Vec`, `alloc::vec::Vec` etc. Anything else
1067/// (including `Option<Vec<T>>` or a typedef) is rejected — the user
1068/// can use `#[obj(index)]` instead.
1069fn type_is_vec(ty: &Type) -> bool {
1070    let Type::Path(TypePath { qself: None, path }) = ty else {
1071        return false;
1072    };
1073    match path.segments.last() {
1074        Some(seg) => seg.ident == "Vec",
1075        None => false,
1076    }
1077}
1078
1079#[cfg(test)]
1080mod tests {
1081    //! Internal proc-macro tests. Tests live next to the derive's
1082    //! helpers so they can exercise the emit pipeline without
1083    //! depending on the `obj` crate (which would be a cycle, since
1084    //! `obj` depends on `obj-derive`).
1085
1086    use super::*;
1087    use syn::parse_str;
1088
1089    /// M9 #80 exit gate: expanded code for a typical struct must
1090    /// stay under ~200 lines.
1091    ///
1092    /// "Typical struct" per the issue notes: 5 fields + 3 indexes.
1093    /// We pick a shape representative of an everyday user document:
1094    /// two scalar fields, one unique index, one each-index over a
1095    /// `Vec`, one composite spanning two of the scalars.
1096    #[test]
1097    fn typical_struct_expansion_is_under_200_lines() {
1098        let input: DeriveInput = parse_str(
1099            r#"
1100            #[obj(version = 2, collection = "orders")]
1101            #[obj(index_composite(fields = ("customer_id", "placed_at")))]
1102            struct Order {
1103                #[obj(index)]
1104                customer_id: u64,
1105                #[obj(index = unique)]
1106                order_no: String,
1107                #[obj(index = each)]
1108                tags: Vec<String>,
1109                placed_at: u64,
1110                total_cents: u64,
1111            }
1112            "#,
1113        )
1114        .expect("parse typical struct");
1115        let ts = emit_impl(&input).expect("emit");
1116        let expanded = ts.to_string();
1117        let line_count = expanded.lines().count();
1118        // proc-macro2's `to_string()` collapses formatting onto one
1119        // logical line per group. `lines()` gives the logical line
1120        // count post-collapse — for a real `cargo expand`-style
1121        // count we re-format via prettyplease-equivalent and count
1122        // those. To keep the test free of additional dependencies we
1123        // approximate by counting top-level groups using `;` + `{`
1124        // separators, which is a stable upper bound for an
1125        // unformatted impl block.
1126        let approx_lines = expanded.matches(';').count()
1127            + expanded.matches('{').count()
1128            + expanded.matches('}').count();
1129        assert!(
1130            approx_lines <= 200,
1131            "expanded `#[derive(Document)]` exceeds 200-line budget: \
1132             approx_lines = {approx_lines}; line_count = {line_count}; \
1133             expansion = {expanded}",
1134        );
1135    }
1136
1137    /// Bare-derive shape: emitted impl carries only COLLECTION +
1138    /// VERSION + the `// auto-generated` marker. Confirms #75's
1139    /// "minimal output" promise is still true after the M9 stack.
1140    #[test]
1141    fn bare_derive_expansion_is_small() {
1142        let input: DeriveInput = parse_str("struct Bare { x: u32 }").expect("parse");
1143        let ts = emit_impl(&input).expect("emit");
1144        let expanded = ts.to_string();
1145        // No `indexes()` override.
1146        assert!(
1147            !expanded.contains("fn indexes"),
1148            "bare derive must NOT emit an indexes() override (expanded: {expanded})",
1149        );
1150        // COLLECTION + VERSION are present.
1151        assert!(expanded.contains("COLLECTION"));
1152        assert!(expanded.contains("VERSION"));
1153    }
1154}