buffa_codegen/lib.rs
1//! Shared code generation logic for buffa.
2//!
3//! This crate takes protobuf descriptors (`google.protobuf.FileDescriptorProto`,
4//! decoded from binary `FileDescriptorSet` data) and emits Rust source code
5//! that uses the `buffa` runtime.
6//!
7//! It is used by:
8//! - `protoc-gen-buffa` (protoc plugin)
9//! - `buffa-build` (build.rs integration)
10//!
11//! # Architecture
12//!
13//! The code generator is intentionally decoupled from how descriptors are
14//! obtained. It receives fully-resolved `FileDescriptorProto`s and produces
15//! Rust source strings. This means:
16//!
17//! - It doesn't parse `.proto` files.
18//! - It doesn't invoke `protoc`.
19//! - It doesn't do import resolution or name linking.
20//!
21//! All of that is handled upstream (by protoc, buf, or a future parser).
22
23pub(crate) mod comments;
24pub mod context;
25pub(crate) mod defaults;
26pub(crate) mod enumeration;
27pub(crate) mod extension;
28pub(crate) mod feature_gates;
29pub(crate) mod features;
30#[doc(hidden)]
31pub use buffa_descriptor::generated;
32pub mod idents;
33pub(crate) mod impl_message;
34pub(crate) mod impl_text;
35pub(crate) mod imports;
36pub(crate) mod message;
37pub(crate) mod oneof;
38pub(crate) mod owned_view;
39pub(crate) mod reflect;
40pub(crate) mod reflect_owned;
41pub(crate) mod reflect_view;
42pub(crate) mod view;
43
44use crate::generated::descriptor::FileDescriptorProto;
45use proc_macro2::TokenStream;
46use quote::{format_ident, quote};
47
48/// Lints suppressed on generated code at module boundaries.
49///
50/// Consumed by [`generate_module_tree`], the per-package `.mod.rs`
51/// stitcher, and `buffa-build`'s `_include.rs` writer. One list keeps
52/// them in sync.
53pub const ALLOW_LINTS: &[&str] = &[
54 "non_camel_case_types",
55 "dead_code",
56 "unused_imports",
57 // Cross-proto refs within the same package are emitted through the
58 // canonical `super::super::__buffa::view::…` path even though the
59 // target lives in the same generated module — using the bare name
60 // would resolve, but the canonical path is stable when a sibling
61 // proto defines a same-named natural-path re-export.
62 "unused_qualifications",
63 "clippy::derivable_impls",
64 "clippy::match_single_binding",
65 "clippy::uninlined_format_args",
66 "clippy::doc_lazy_continuation",
67 // A user `message View { message Inner }` produces
68 // `__buffa::view::view::InnerView`; harmless but trips this lint.
69 "clippy::module_inception",
70];
71
72/// Render [`ALLOW_LINTS`] as a `#[allow(…)]` attribute token stream.
73pub fn allow_lints_attr() -> TokenStream {
74 let lints: Vec<TokenStream> = ALLOW_LINTS
75 .iter()
76 .map(|l| syn::parse_str(l).expect("lint name parses as path"))
77 .collect();
78 quote! { #[allow( #(#lints),* )] }
79}
80
81/// One generated output file.
82///
83/// Each `.proto` produces up to five **content files** (`<stem>.rs`,
84/// `<stem>.__view.rs`, `<stem>.__oneof.rs`, `<stem>.__view_oneof.rs`,
85/// `<stem>.__ext.rs`) and each proto package produces one
86/// `<dotted.pkg>.mod.rs` **stitcher** that `include!`s the content files
87/// and authors the `pub mod __buffa { … }` ancillary tree.
88/// Ancillary kinds with no content for that input file (e.g. a message
89/// with no oneofs and no extensions) are omitted, and the stitcher's
90/// `include!` set is filtered to match. The `__buffa` wrapper (and each
91/// `view` / `oneof` / `ext` submodule inside it) is itself omitted when
92/// it would be empty, so packages with only owned messages emit no
93/// `__buffa` block at all.
94/// See `DESIGN.md` → "Generated code layout".
95///
96/// Consumers normally only need to wire up the
97/// [`GeneratedFileKind::PackageMod`] entries (one per package); the
98/// per-proto content kinds are reached transitively via `include!` from
99/// the stitcher. Write all files to disk; build a module tree from only
100/// the `PackageMod` ones.
101///
102/// With [`CodeGenConfig::file_per_package`] set, the per-proto content
103/// kinds are not emitted at all — the single `<dotted.pkg>.rs` (still
104/// kind `PackageMod`) inlines what the stitcher would `include!`.
105#[derive(Debug)]
106pub struct GeneratedFile {
107 /// The output file path (e.g., `"my.pkg.foo.rs"` or `"my.pkg.mod.rs"`).
108 pub name: String,
109 /// The proto package this file belongs to.
110 pub package: String,
111 /// What this file contains. Build integrations only need to wire up
112 /// [`GeneratedFileKind::PackageMod`] files; everything else is reached
113 /// via `include!` from there.
114 pub kind: GeneratedFileKind,
115 /// The generated Rust source code.
116 pub content: String,
117}
118
119/// Kind of [`GeneratedFile`].
120///
121/// [`generate`] produces up to five per-proto content kinds — one each
122/// of [`Owned`](Self::Owned), [`View`](Self::View), [`Oneof`](Self::Oneof),
123/// [`ViewOneof`](Self::ViewOneof), and [`Ext`](Self::Ext) per input
124/// `.proto` file — plus one [`PackageMod`](Self::PackageMod) stitcher per
125/// package. Kinds with no content for the input (a proto with no oneofs
126/// emits no [`Oneof`](Self::Oneof) / [`ViewOneof`](Self::ViewOneof);
127/// no extensions, no [`Ext`](Self::Ext); etc.) are omitted. Build
128/// integrations only need to wire up `PackageMod` entries; the per-proto
129/// content kinds are reached via `include!` from the stitcher and need
130/// only be written to disk alongside it. Under
131/// [`CodeGenConfig::file_per_package`] only `PackageMod` is emitted.
132///
133/// [`Companion`](Self::Companion) is the one kind *not* produced by
134/// [`generate`]: downstream code generators construct `Companion` files
135/// themselves and merge them into buffa's output via
136/// [`apply_companions`].
137///
138/// This enum is `#[non_exhaustive]` — match with a wildcard arm so new
139/// kinds can be added without a major version bump.
140#[derive(Debug, Clone, Copy, PartialEq, Eq)]
141#[non_exhaustive]
142pub enum GeneratedFileKind {
143 /// Owned message structs and enums (`<stem>.rs`).
144 Owned,
145 /// View structs (`<stem>.__view.rs`).
146 View,
147 /// Owned oneof enums (`<stem>.__oneof.rs`).
148 Oneof,
149 /// View oneof enums (`<stem>.__view_oneof.rs`).
150 ViewOneof,
151 /// File-level proto-extension consts (`<stem>.__ext.rs`) — the
152 /// `pub const` `ExtensionDescriptor` items generated from `extend`
153 /// blocks. Not to be confused with [`Companion`](Self::Companion),
154 /// which is unrelated downstream-supplied content.
155 Ext,
156 /// Per-package stitcher (`<dotted.pkg>.mod.rs`). The only file build
157 /// systems need to wire up directly.
158 PackageMod,
159 /// Extra per-proto content from a downstream code generator (service
160 /// stubs, extra trait impls, etc.) that travels with buffa's output.
161 ///
162 /// Not produced by [`generate`]. Construct these in your own generator
163 /// and pass them to [`apply_companions`], which appends an `include!`
164 /// for each one at file scope in the matching package's
165 /// [`PackageMod`](Self::PackageMod) — after buffa's own output, at
166 /// package root alongside the owned message types (**not** under the
167 /// `__buffa::` sentinel module). Items declared `pub` in a companion
168 /// file are visible at `crate::<pkg>::*`.
169 ///
170 /// Not to be confused with [`Ext`](Self::Ext), which is the buffa-
171 /// generated file holding protobuf `extend` consts.
172 Companion,
173}
174
175/// The Rust type a proto `string` field maps to in generated owned structs.
176///
177/// The default is [`String`](StringRepr::String). The other variants are
178/// small-string-optimized types that avoid `String`'s growable buffer for
179/// read-mostly schemas; each is gated behind the matching `buffa` Cargo feature
180/// (`smol_str`, `ecow`, `compact_str`), and the downstream crate must enable
181/// that feature so the re-exported type path (`::buffa::smol_str::SmolStr`,
182/// etc.) resolves.
183///
184/// Select a representation through `buffa_build`'s `string_type` /
185/// `string_type_in` builder methods. The wire format is identical regardless of
186/// representation — only the in-memory owned type changes; view types keep
187/// borrowing `&str`, and `map<_, string>` / `map<string, _>` keys and values
188/// always stay `String`.
189///
190/// Sizes below are for 64-bit targets. See the buffa README for a fuller
191/// comparison of the small-string crates.
192#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
193#[non_exhaustive]
194pub enum StringRepr {
195 /// `::buffa::alloc::string::String` — 24-byte struct, growable and mutable
196 /// (the default).
197 #[default]
198 String,
199 /// `smol_str::SmolStr` — 24-byte struct, inlines up to 23 bytes, `O(1)`
200 /// clone of long strings via `Arc<str>`. **Immutable** (assign a new value
201 /// to mutate). Requires the `buffa/smol_str` feature.
202 SmolStr,
203 /// `ecow::EcoString` — 16-byte struct, inlines up to 15 bytes, clone-on-write
204 /// with `O(1)` clone. **Immutable** (assign a new value to mutate).
205 /// Requires the `buffa/ecow` feature.
206 EcoString,
207 /// `compact_str::CompactString` — 24-byte struct, inlines up to 24 bytes,
208 /// mutable (a drop-in `String` replacement). Requires the
209 /// `buffa/compact_str` feature.
210 CompactString,
211}
212
213impl StringRepr {
214 /// The owned Rust type path emitted for a `string` field with this
215 /// representation.
216 pub(crate) fn type_path(self, resolver: &imports::ImportResolver) -> proc_macro2::TokenStream {
217 use quote::quote;
218 match self {
219 StringRepr::String => resolver.string(),
220 StringRepr::SmolStr => quote! { ::buffa::smol_str::SmolStr },
221 StringRepr::EcoString => quote! { ::buffa::ecow::EcoString },
222 StringRepr::CompactString => quote! { ::buffa::compact_str::CompactString },
223 }
224 }
225
226 /// Whether this is the default `String` representation, which keeps the
227 /// `String`-specialized fast paths (in-place `merge_string`, `clear()`,
228 /// native `Arbitrary`) instead of the generic `ProtoString` ones.
229 pub(crate) fn is_default(self) -> bool {
230 matches!(self, StringRepr::String)
231 }
232}
233
234/// How much reflection support generated types get.
235///
236/// Selected through `buffa_build`'s `reflect_mode` builder method (or the
237/// `protoc-gen-buffa` `reflect_mode=` option). All modes need the consuming
238/// crate to depend on `buffa-descriptor` with its `reflect` feature and on
239/// `std`; the call site is `foo.reflect().get(fd)` regardless of mode.
240#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
241#[non_exhaustive]
242pub enum ReflectMode {
243 /// No reflection impls.
244 #[default]
245 Off,
246 /// `Reflectable::reflect()` round-trips the message through a
247 /// `DynamicMessage` (encode → decode → boxed handle). Smaller generated
248 /// code; pays an allocation and a re-encode per `reflect()` call.
249 Bridge,
250 /// `impl ReflectMessage` directly on the owned and view types, and
251 /// `Reflectable::reflect()` borrows `self` with no round-trip. Larger
252 /// generated code; near-free reflective access. Does not require view
253 /// generation — with views off, only the owned impls are emitted.
254 VTable,
255}
256
257impl ReflectMode {
258 /// Apply this mode to a [`CodeGenConfig`] (sets `generate_reflection` /
259 /// `generate_reflection_vtable`). Used by the `buffa-build` and
260 /// `protoc-gen-buffa` front-ends.
261 pub fn apply(self, config: &mut CodeGenConfig) {
262 let (reflection, vtable) = match self {
263 ReflectMode::Off => (false, false),
264 ReflectMode::Bridge => (true, false),
265 ReflectMode::VTable => (true, true),
266 };
267 config.generate_reflection = reflection;
268 config.generate_reflection_vtable = vtable;
269 }
270}
271
272/// Configuration for code generation.
273#[derive(Debug, Clone)]
274#[non_exhaustive]
275pub struct CodeGenConfig {
276 /// Whether to generate borrowed view types (`MyMessageView<'a>`) in
277 /// addition to owned types.
278 pub generate_views: bool,
279 /// Whether to preserve unknown fields (default: true).
280 pub preserve_unknown_fields: bool,
281 /// Whether to derive `serde::Serialize` / `serde::Deserialize` on
282 /// generated message structs and enum types, and emit `#[serde(with = "...")]`
283 /// attributes for proto3 JSON's special scalar encodings (int64 as quoted
284 /// string, bytes as base64, etc.).
285 ///
286 /// When this is `true`, the downstream crate must depend on `serde` and
287 /// must enable the `buffa/json` feature for the runtime helpers.
288 ///
289 /// Oneof fields use `#[serde(flatten)]` with custom `Serialize` /
290 /// `Deserialize` impls so that each variant appears as a top-level
291 /// JSON field (proto3 JSON inline oneof encoding).
292 pub generate_json: bool,
293 /// Whether to emit `#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]`
294 /// on generated message structs and enum types.
295 ///
296 /// When this is `true`, the downstream crate must add `arbitrary` as an
297 /// optional dependency and enable the `buffa/arbitrary` feature. The
298 /// downstream crate's Cargo feature that gates `arbitrary` must be named
299 /// exactly `"arbitrary"` — the generated `cfg_attr` uses that literal
300 /// string and cannot be customized. This applies to both the struct-level
301 /// `derive(Arbitrary)` and the per-field `#[arbitrary(with = ...)]`
302 /// attributes emitted for `bytes_fields`-typed fields.
303 ///
304 /// For `bytes_fields`-typed fields, codegen emits `#[arbitrary(with = ...)]`
305 /// using helpers in `::buffa::__private` since `bytes::Bytes` has no
306 /// `Arbitrary` impl. Singular, optional, and repeated bytes fields are all
307 /// covered. Map values are always `Vec<u8>` regardless of `bytes_fields`
308 /// and require no special handling.
309 pub generate_arbitrary: bool,
310 /// External type path mappings.
311 ///
312 /// Each entry maps either a fully-qualified protobuf package prefix
313 /// (e.g., `".my.common"`) to a Rust module path (e.g.,
314 /// `"::common_protos"`), or a single type FQN (e.g.,
315 /// `".my.common.Shared"`) to a full Rust type path (e.g.,
316 /// `"::shared_types::Shared"`). Matched types reference the extern Rust
317 /// path instead of being generated, allowing shared proto packages to be
318 /// compiled once in a dedicated crate and referenced from others. An
319 /// exact type-FQN entry wins over a covering package prefix; otherwise
320 /// the longest matching prefix wins.
321 ///
322 /// Well-known types (`google.protobuf.*`) are automatically mapped to
323 /// `::buffa_types::google::protobuf::*` without needing an explicit
324 /// entry here. To override with a custom implementation, add an
325 /// `extern_path` for `.google.protobuf` pointing to your crate.
326 pub extern_paths: Vec<(String, String)>,
327 /// Fully-qualified proto field paths whose `bytes` fields should use
328 /// `bytes::Bytes` instead of `Vec<u8>`.
329 ///
330 /// Each entry is a proto path prefix (e.g., `".my.pkg.MyMessage.data"` for
331 /// a specific field, or `"."` for all bytes fields). The path is matched
332 /// as a prefix, so `"."` applies to every bytes field in every message.
333 pub bytes_fields: Vec<String>,
334 /// Ordered (proto-path-prefix, [`StringRepr`]) rules selecting the Rust type
335 /// for `string` fields. Later rules win, so a broad rule (e.g. `"."` →
336 /// `SmolStr`) can be refined by a more specific one
337 /// (`".my.pkg.Msg.field"` → `CompactString`). Fields matching no rule use
338 /// `String`. The path is matched with the same proto-segment-aware prefix
339 /// logic as [`bytes_fields`](Self::bytes_fields).
340 ///
341 /// Applies to singular, optional, and repeated `string` fields and oneof
342 /// `string` variants. Map keys and values always stay `String`, mirroring
343 /// the bytes path (where map values always stay `Vec<u8>`).
344 pub string_fields: Vec<(String, StringRepr)>,
345 /// Fully-qualified proto paths whose message-typed oneof variants should
346 /// **not** be wrapped in `Box<T>`. By default every message/group oneof
347 /// variant is boxed (so recursive types compile); entries here opt matching
348 /// variants out, storing the message inline in the enum.
349 ///
350 /// Each entry is a proto path prefix matched with the same
351 /// proto-segment-aware logic as [`bytes_fields`](Self::bytes_fields)
352 /// (`"."` matches every variant). Recursive variants cannot be stored
353 /// inline (the type would be unsized): an entry naming one *exactly* is
354 /// rejected at codegen time, while a broader prefix entry silently keeps
355 /// recursive variants boxed and inlines the rest.
356 pub unboxed_oneof_fields: Vec<String>,
357 /// Honor `features.utf8_validation = NONE` by emitting `Vec<u8>` / `&[u8]`
358 /// for such string fields instead of `String` / `&str`.
359 ///
360 /// When `false` (the default), buffa emits `String` for all string fields
361 /// and **validates UTF-8 on decode** — stricter than proto2 requires, but
362 /// ergonomic and safe.
363 ///
364 /// When `true`, string fields with `utf8_validation = NONE` (all proto2
365 /// strings by default, and editions fields that opt into `NONE`) become
366 /// `Vec<u8>` / `&[u8]`. Decode skips validation; the caller decides at the
367 /// call site whether to `std::str::from_utf8` (checked) or
368 /// `from_utf8_unchecked` (trusted-input fast path). This is the only
369 /// sound Rust mapping when strings may actually contain non-UTF-8 bytes.
370 ///
371 /// **This is a breaking change for proto2** — enable only for new code or
372 /// when profiling identifies UTF-8 validation as a bottleneck.
373 pub strict_utf8_mapping: bool,
374 /// Permit `option message_set_wire_format = true` on input messages.
375 ///
376 /// MessageSet is a legacy Google-internal wire format that wraps each
377 /// extension in a group structure instead of using regular field tags.
378 /// When `false` (the default), encountering such a message is a codegen
379 /// error — the flag exists to make MessageSet use explicit, since the
380 /// format is obsolete outside of interop with very old Google protos.
381 pub allow_message_set: bool,
382 /// Whether to emit `impl buffa::text::TextFormat` on generated message
383 /// structs for textproto (human-readable text format) encoding/decoding.
384 ///
385 /// When this is `true`, the downstream crate must enable the `buffa/text`
386 /// feature for the runtime encoder/decoder.
387 pub generate_text: bool,
388 /// Whether the per-package `.mod.rs` stitcher emits
389 /// `__buffa::register_types(&mut TypeRegistry)`.
390 ///
391 /// Default `true`. The fn aggregates `Any` type entries and extension
392 /// entries for every message in the package. Set to `false` for
393 /// crates that don't use extensions/`Any`, or that hand-roll
394 /// registration (e.g. `buffa-types`' `register_wkt_types`, which
395 /// knows the JSON-Any `is_wkt` special-casing the generic fn does
396 /// not). The per-message `__*_JSON_ANY` / `__*_TEXT_ANY` consts are
397 /// still emitted; only the aggregating fn is suppressed.
398 pub emit_register_fn: bool,
399 /// Emit one `<dotted.package>.rs` per proto package instead of the
400 /// per-proto-file content set plus `<pkg>.mod.rs` stitcher.
401 ///
402 /// The single file inlines what the stitcher would otherwise `include!`,
403 /// producing the same `__buffa::{view,oneof,ext,...}` module structure.
404 /// Intended for Buf Schema Registry generated SDKs, whose `lib.rs`
405 /// synthesis builds the module tree from `<dotted.package>.rs` filenames.
406 ///
407 /// Under `strategy: directory` this only sees one directory's files per
408 /// invocation, so the input module must be `PACKAGE_DIRECTORY_MATCH`-clean
409 /// (one package per directory) for the output to be complete. BSR-hosted
410 /// modules satisfy this by lint default. If a package spans multiple
411 /// directories, separate invocations each emit their own `<pkg>.rs` and
412 /// the last write wins — silent partial output, not a codegen error.
413 pub file_per_package: bool,
414 /// Custom attributes to inject on generated types (messages and enums).
415 ///
416 /// Each entry is `(proto_path, attribute)`. The `proto_path` is matched
417 /// as a prefix against the fully-qualified proto name: `"."` applies to
418 /// all types, `".my.pkg"` to types in that package, `".my.pkg.MyMessage"`
419 /// to a specific type. The `attribute` is a raw Rust attribute string
420 /// (e.g., `"#[derive(serde::Serialize)]"`).
421 pub type_attributes: Vec<(String, String)>,
422 /// Custom attributes to inject on generated struct fields.
423 ///
424 /// Each entry is `(proto_path, attribute)`. The `proto_path` is matched
425 /// as a prefix against the fully-qualified field path (e.g.,
426 /// `".my.pkg.MyMessage.my_field"`). `"."` applies to all fields.
427 pub field_attributes: Vec<(String, String)>,
428 /// Custom attributes to inject on generated message structs only (not enums).
429 ///
430 /// Same path-matching semantics as `type_attributes`, but only applied to
431 /// message structs, not enum types. Useful for struct-only attributes like
432 /// `#[serde(default)]`.
433 pub message_attributes: Vec<(String, String)>,
434 /// Custom attributes to inject on generated enum types only (not messages).
435 ///
436 /// Same path-matching semantics as `type_attributes`, but only applied to
437 /// enum types. Useful for enum-only attributes like
438 /// `#[derive(strum::EnumIter)]` when the user does not want to apply the
439 /// same attribute to every message in the matched scope.
440 pub enum_attributes: Vec<(String, String)>,
441 /// Wrap generated `impl`s in `#[cfg(feature = "...")]` instead of
442 /// emitting them unconditionally.
443 ///
444 /// When `true`, the impls controlled by [`generate_json`],
445 /// [`generate_views`], and [`generate_text`] are emitted wrapped in
446 /// `#[cfg(feature = "json" | "views" | "text")]` (or
447 /// `#[cfg_attr(feature = ..., ...)]` for derives and field attributes)
448 /// rather than unconditionally. The consuming crate must define matching
449 /// Cargo features that enable the corresponding runtime support, e.g.:
450 ///
451 /// ```toml
452 /// [features]
453 /// json = ["buffa/json", "dep:serde", "dep:serde_json"]
454 /// views = []
455 /// text = ["buffa/text"]
456 /// ```
457 ///
458 /// The [`generate_*`] flags still control *whether* an impl kind is
459 /// emitted at all — this flag only controls whether it is `cfg`-gated.
460 /// `generate_arbitrary` is always `cfg_attr`-gated on
461 /// `feature = "arbitrary"` regardless of this flag, because `arbitrary`
462 /// is an optional dependency by design.
463 ///
464 /// When [`generate_reflection`](Self::generate_reflection) is also on, the
465 /// reflection impls are gated on `feature = "reflect"` alongside
466 /// json/views/text. To gate *only* reflection without gating json/views/text,
467 /// use [`gate_reflect_on_crate_feature`](Self::gate_reflect_on_crate_feature)
468 /// instead.
469 ///
470 /// This is the mechanism that lets `buffa-descriptor` and `buffa-types`
471 /// ship every impl while keeping the codegen toolchain
472 /// (`buffa-codegen`/`buffa-build`/`protoc-gen-buffa`) lean: those crates
473 /// depend on `buffa-descriptor` with `default-features = false` and so
474 /// don't pull `serde`/`serde_json`/`base64`. Most consumers don't need
475 /// this — they decide at build-script time whether to generate JSON, and
476 /// if they say yes, they want `impl Serialize` to just exist.
477 ///
478 /// [`generate_json`]: Self::generate_json
479 /// [`generate_views`]: Self::generate_views
480 /// [`generate_text`]: Self::generate_text
481 /// [`generate_*`]: Self::generate_json
482 pub gate_impls_on_crate_features: bool,
483 /// Generate `with_*` builder-style setter methods for explicit-presence fields.
484 ///
485 /// Each explicit-presence scalar, bytes, or enum field gets a
486 /// `pub fn with_<name>(mut self, value: T) -> Self` method that wraps the
487 /// value in `Some` and returns `self`, enabling chained construction:
488 ///
489 /// ```ignore
490 /// let req = MyRequest::default()
491 /// .with_name("alice")
492 /// .with_timeout_ms(30_000);
493 /// ```
494 ///
495 /// **Fields that receive a setter:** proto3 `optional`, proto2 `optional`,
496 /// and editions fields with `field_presence = EXPLICIT`.
497 ///
498 /// **Fields that do not receive a setter:** message fields
499 /// (`MessageField<T>`), repeated fields, map fields, oneof variant fields,
500 /// proto2 `required` fields, and any implicit-presence field.
501 ///
502 /// There is no `clear_<name>` companion — to clear a field, assign `None`
503 /// directly: `msg.name = None;`.
504 ///
505 /// Defaults to `true`.
506 pub generate_with_setters: bool,
507 /// Generate `impl Reflectable` for owned message types (bridge mode).
508 ///
509 /// When enabled, each generated message gets an
510 /// `impl ::buffa_descriptor::reflect::Reflectable` whose `reflect()`
511 /// round-trips through `DynamicMessage` (encode → decode → reflective
512 /// handle), and the package's `__buffa::reflect` submodule embeds the
513 /// `FileDescriptorSet` bytes plus a lazily-built `DescriptorPool`.
514 ///
515 /// **Runtime requirements** — the consuming crate must depend on:
516 /// - `buffa-descriptor` with the `reflect` feature.
517 /// - `std` (the lazy pool accessor uses `std::sync::OnceLock`).
518 ///
519 /// When [`gate_impls_on_crate_features`](Self::gate_impls_on_crate_features)
520 /// is on, the impls are wrapped in `#[cfg(feature = "reflect")]` so the
521 /// consuming crate can opt out per build.
522 ///
523 /// **Performance** — `reflect()` is one full encode/decode round-trip
524 /// plus a heap allocation. The first call also pays a one-time pool
525 /// build cost (linking the embedded `FileDescriptorSet`). For zero-copy
526 /// reflective access over view types without the round-trip, additionally
527 /// enable [`generate_reflection_vtable`](Self::generate_reflection_vtable).
528 ///
529 /// **Binary size** — each package embeds its own copy of the full
530 /// `FileDescriptorSet` (transitive closure). For a multi-package
531 /// codegen run this duplicates the FDS bytes per package. Acceptable
532 /// for the bridge prototype; deduplication via a crate-root module is
533 /// a planned follow-up.
534 ///
535 /// Defaults to `false`.
536 pub generate_reflection: bool,
537 /// Emit vtable-mode reflection: `impl ReflectMessage` / `impl
538 /// ReflectElement` on the owned message structs and (when views are
539 /// generated) the view types, and switch the owned
540 /// `Reflectable::reflect()` body to borrow `self`
541 /// (`ReflectCow::Borrowed(self)`) instead of the bridge round-trip.
542 ///
543 /// Reflective access then reads struct fields in place — no encode/decode
544 /// round-trip and no per-field allocation — for both a decoded view and an
545 /// in-memory owned message.
546 ///
547 /// Requires [`generate_reflection`](Self::generate_reflection) (the impls
548 /// resolve against the same embedded `DescriptorPool`) but not
549 /// [`generate_views`](Self::generate_views) — with views off, only the
550 /// owned impls are emitted. Set via [`ReflectMode::VTable`]
551 /// — front-ends expose it as `buffa_build::Config::reflect_mode` /
552 /// `protoc-gen-buffa`'s `reflect_mode=vtable`.
553 ///
554 /// Defaults to `false`.
555 pub generate_reflection_vtable: bool,
556 /// Gate the reflection impls behind a `reflect` crate feature, *without*
557 /// gating json/views/text (unlike
558 /// [`gate_impls_on_crate_features`](Self::gate_impls_on_crate_features),
559 /// which gates them all together).
560 ///
561 /// Used by crates that ship view/text impls unconditionally but want the
562 /// reflection surface — which pulls a `buffa-descriptor` dependency and
563 /// `std` — to be opt-in. `buffa-types` is the motivating case: its WKT
564 /// views are always available, but `impl ReflectMessage` for them is gated
565 /// behind `buffa-types`'s `reflect` feature.
566 ///
567 /// When [`gate_impls_on_crate_features`](Self::gate_impls_on_crate_features)
568 /// is already on, reflection is gated regardless and this flag is ignored.
569 ///
570 /// A low-level knob for crates whose generated code is a public interface
571 /// (`buffa-types`, the conformance harness). Set directly by `gen_wkt_types`
572 /// and exposed through `buffa_build::Config::gate_reflect_on_crate_feature`
573 /// (currently `#[doc(hidden)]`, paired with the experimental vtable flag).
574 ///
575 /// Defaults to `false`.
576 pub gate_reflect_on_crate_feature: bool,
577 /// Emit idiomatic `UpperCamelCase` constant aliases alongside each enum
578 /// variant.
579 ///
580 /// Protobuf style names enum values in `SHOUTY_SNAKE_CASE`, conventionally
581 /// prefixed with the enum name (`RULE_LEVEL_HIGH`). Those names remain the
582 /// definitive Rust variants — they are guaranteed unique and valid by
583 /// protobuf, and existing references (including `Debug` output) are
584 /// unchanged. When this is enabled, codegen additionally emits associated
585 /// `const`s with the prefix stripped and the name converted to
586 /// `UpperCamelCase` (`RULE_LEVEL_HIGH` → `High`), so downstream code can
587 /// write `RuleLevel::High`.
588 ///
589 /// The conversion is lossy, so two values can collide (`FOO_BAR` and
590 /// `FOO__BAR` both map to `FooBar`). The rule is all-or-nothing per enum:
591 /// if any two values would collide after conversion, or a value would yield
592 /// an invalid identifier, **no** aliases are emitted for that enum (a
593 /// [`CodeGenWarning`] and an enum doc note explain why). This keeps every
594 /// match either fully `SHOUTY_SNAKE_CASE` or fully idiomatic, never a forced
595 /// mix.
596 ///
597 /// The aliases are associated `const`s, which work in pattern position too:
598 /// a `match` written entirely against aliases is still exhaustiveness-checked
599 /// (the "non-exhaustive" error names the underlying `SHOUTY_SNAKE_CASE`
600 /// variant, since that is the canonical name).
601 ///
602 /// Defaults to `true`: the aliases are purely additive (the proto names
603 /// remain the variants, and `Debug` is unchanged), so enabling by default is
604 /// backward-compatible, and the all-or-nothing rule guarantees correctness on
605 /// any enum.
606 pub idiomatic_enum_aliases: bool,
607}
608
609impl Default for CodeGenConfig {
610 fn default() -> Self {
611 Self {
612 generate_views: true,
613 preserve_unknown_fields: true,
614 generate_json: false,
615 generate_arbitrary: false,
616 extern_paths: Vec::new(),
617 bytes_fields: Vec::new(),
618 string_fields: Vec::new(),
619 unboxed_oneof_fields: Vec::new(),
620 strict_utf8_mapping: false,
621 allow_message_set: false,
622 generate_text: false,
623 emit_register_fn: true,
624 file_per_package: false,
625 type_attributes: Vec::new(),
626 field_attributes: Vec::new(),
627 message_attributes: Vec::new(),
628 enum_attributes: Vec::new(),
629 gate_impls_on_crate_features: false,
630 generate_with_setters: true,
631 generate_reflection: false,
632 generate_reflection_vtable: false,
633 gate_reflect_on_crate_feature: false,
634 idiomatic_enum_aliases: true,
635 }
636 }
637}
638
639impl CodeGenConfig {
640 /// Active [`feature_gates::FeatureGates`] for this config.
641 ///
642 /// Recomputed on each call (cheap — three boolean ANDs); call once at
643 /// the top of a generation function and thread through, or call inline
644 /// at each use site, whichever reads better.
645 pub(crate) fn feature_gates(&self) -> feature_gates::FeatureGates {
646 feature_gates::FeatureGates::for_config(self)
647 }
648}
649
650/// Compute the effective extern path list by starting with user-provided
651/// mappings and adding the default WKT mapping if appropriate.
652///
653/// The default mapping `".google.protobuf" → "::buffa_types::google::protobuf"`
654/// is added unless:
655/// - The user already provided an extern_path covering `.google.protobuf`
656/// - Any of the files being generated are in the `google.protobuf` package
657/// (i.e., we're building `buffa-types` itself)
658pub(crate) fn effective_extern_paths(
659 file_descriptors: &[FileDescriptorProto],
660 files_to_generate: &[String],
661 config: &CodeGenConfig,
662) -> Vec<(String, String)> {
663 let mut paths = config.extern_paths.clone();
664
665 // Only an EXACT .google.protobuf mapping suppresses auto-injection.
666 // A sub-package mapping like .google.protobuf.compiler does NOT cover
667 // WKTs like Timestamp — resolve_extern_prefix's longest-prefix matching
668 // lets both coexist, so we still inject the parent mapping.
669 let has_wkt_mapping = paths.iter().any(|(proto, _)| proto == ".google.protobuf");
670
671 if !has_wkt_mapping {
672 // Check if we're generating google.protobuf files ourselves
673 // (e.g., building buffa-types). If so, don't auto-map.
674 let generating_wkts = file_descriptors
675 .iter()
676 .filter(|fd| {
677 fd.name
678 .as_deref()
679 .is_some_and(|n| files_to_generate.iter().any(|f| f == n))
680 })
681 .any(|fd| fd.package.as_deref() == Some("google.protobuf"));
682
683 if !generating_wkts {
684 paths.push((
685 ".google.protobuf".to_string(),
686 "::buffa_types::google::protobuf".to_string(),
687 ));
688 }
689 }
690
691 paths
692}
693
694/// Compute the effective file-level extern path list.
695///
696/// File-level mappings route a specific `.proto` file to a Rust module root,
697/// taking priority over the package-level mappings from
698/// [`effective_extern_paths`]. They exist to resolve a structural problem:
699/// `descriptor.proto` is in the same `google.protobuf` package as the
700/// JSON-mappable WKTs (`Timestamp`, `Any`, …), but its types live in
701/// `buffa-descriptor`, not `buffa-types`. A single package-keyed
702/// `.google.protobuf` extern_path can route the package to one crate or the
703/// other; it can't split it. The file-level mapping splits it.
704///
705/// Auto-injected mappings (when not suppressed):
706///
707/// | Proto file | Rust module |
708/// |---|---|
709/// | `google/protobuf/descriptor.proto` | `::buffa_descriptor::generated::descriptor` |
710/// | `google/protobuf/compiler/plugin.proto` | `::buffa_descriptor::generated::compiler` |
711///
712/// Suppression conditions, evaluated **per file**:
713///
714/// - **A user-provided `extern_path` covers the file's package.** That
715/// override has covered the file's types since the package mapping was
716/// introduced; auto-injecting a higher-priority file-level mapping would
717/// silently redirect them away from the user's crate. Matching is via
718/// the same longest-prefix logic the package resolver uses, so both an
719/// exact `.google.protobuf` mapping and a sub-package
720/// `.google.protobuf.compiler` mapping suppress the entries they cover —
721/// `.google.protobuf` suppresses both, `.google.protobuf.compiler`
722/// suppresses only `plugin.proto`.
723/// - **The proto file itself is in `files_to_generate`.** When building
724/// `buffa-descriptor` (or any local copy of `descriptor.proto`), its types
725/// must resolve to the local module, not externally.
726///
727/// Currently internal-only — there is no `CodeGenConfig` field for
728/// user-provided *file-level* mappings. The user-facing `extern_path` API is
729/// keyed by proto package *or* type FQN (per-type overrides, issue #111);
730/// per-file overrides may be added later as a public feature if a concrete
731/// need arises.
732pub(crate) fn effective_file_extern_paths(
733 files_to_generate: &[String],
734 config: &CodeGenConfig,
735) -> Vec<(String, String)> {
736 // (proto file path, proto package, Rust module root). The package is
737 // recorded alongside the file so the user-override suppression check
738 // is per-file: a `.google.protobuf.compiler` extern_path covers only
739 // `plugin.proto`, while `.google.protobuf` covers both.
740 const DESCRIPTOR_FILES: [(&str, &str, &str); 2] = [
741 (
742 "google/protobuf/descriptor.proto",
743 "google.protobuf",
744 "::buffa_descriptor::generated::descriptor",
745 ),
746 (
747 "google/protobuf/compiler/plugin.proto",
748 "google.protobuf.compiler",
749 "::buffa_descriptor::generated::compiler",
750 ),
751 ];
752
753 DESCRIPTOR_FILES
754 .into_iter()
755 .filter(|(proto_file, package, _)| {
756 // Yield to a user package-level extern_path that already covers
757 // this file's package: anyone who wrote
758 // `extern_path(".google.protobuf", "::my_crate")` (or a
759 // sub-package mapping) today routes these types to their crate;
760 // the auto-injected file-level mapping must not silently
761 // outrank it.
762 if context::resolve_extern_prefix(package, &config.extern_paths).is_some() {
763 return false;
764 }
765 // Don't externalize a file we're generating locally.
766 !files_to_generate.iter().any(|f| f == proto_file)
767 })
768 .map(|(proto_file, _, rust_module)| (proto_file.to_string(), rust_module.to_string()))
769 .collect()
770}
771
772/// One CamelCase collision: a target identifier and the proto value names that
773/// would all convert onto it.
774///
775/// Part of [`CodeGenWarning::IdiomaticAliasesSuppressed`].
776#[derive(Debug, Clone, PartialEq, Eq)]
777#[non_exhaustive]
778pub struct AliasConflict {
779 /// The `UpperCamelCase` identifier the colliding values map to.
780 pub camel_target: String,
781 /// The proto value names that convert onto `camel_target` (includes a
782 /// literal variant name when an alias would shadow it).
783 pub proto_values: Vec<String>,
784}
785
786/// A non-fatal diagnostic produced during code generation.
787///
788/// Returned by [`generate_with_diagnostics`]. Render the human-readable form via
789/// the [`Display`](core::fmt::Display) impl (e.g. `cargo:warning={warning}`), or
790/// match on the variant for programmatic handling. The enum and its variants are
791/// `#[non_exhaustive]` so new diagnostic kinds and fields can be added without a
792/// breaking change.
793#[derive(Debug, Clone, PartialEq, Eq)]
794#[non_exhaustive]
795pub enum CodeGenWarning {
796 /// Idiomatic CamelCase aliases were suppressed for an enum because two or
797 /// more proto values collide after conversion, or a value would convert to
798 /// an invalid identifier. The enum's `SHOUTY_SNAKE_CASE` variants are
799 /// unaffected.
800 #[non_exhaustive]
801 IdiomaticAliasesSuppressed {
802 /// The Rust name of the affected enum.
803 enum_name: String,
804 /// Each collision, by target identifier. Empty if the only problem was
805 /// invalid identifiers.
806 conflicts: Vec<AliasConflict>,
807 /// Proto values that would convert to an invalid Rust identifier.
808 invalid: Vec<String>,
809 },
810 /// A field or oneof accessor on a generated `FooOwnedView` wrapper was
811 /// suppressed because the proto name collides with one of the wrapper's
812 /// reserved method names (`decode`, `view`, `bytes`, …). The field stays
813 /// fully accessible through `view()` on the wrapper (or
814 /// `OwnedView::reborrow`).
815 #[non_exhaustive]
816 OwnedViewAccessorSuppressed {
817 /// The Rust name of the wrapper type (e.g. `FooOwnedView`).
818 wrapper_name: String,
819 /// The proto field or oneof name whose accessor was suppressed.
820 field_name: String,
821 },
822}
823
824impl core::fmt::Display for CodeGenWarning {
825 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
826 match self {
827 Self::IdiomaticAliasesSuppressed {
828 enum_name,
829 conflicts,
830 invalid,
831 } => {
832 // Name the cause accurately: a collision, an invalid identifier,
833 // or both.
834 let cause = match (conflicts.is_empty(), invalid.is_empty()) {
835 (false, true) => "naming conflict",
836 (true, false) => "invalid identifier",
837 _ => "naming conflict / invalid identifier",
838 };
839 write!(
840 f,
841 "enum `{enum_name}`: idiomatic CamelCase aliases suppressed ({cause})"
842 )?;
843 let mut parts: Vec<String> = conflicts
844 .iter()
845 .map(|c| format!("{} → {}", c.proto_values.join(", "), c.camel_target))
846 .collect();
847 parts.extend(invalid.iter().map(|n| format!("{n} → invalid identifier")));
848 if !parts.is_empty() {
849 write!(f, ": {}", parts.join("; "))?;
850 }
851 Ok(())
852 }
853 Self::OwnedViewAccessorSuppressed {
854 wrapper_name,
855 field_name,
856 } => {
857 write!(
858 f,
859 "`{wrapper_name}`: accessor for field `{field_name}` suppressed \
860 (collides with a reserved wrapper method); use `.view().{field_name}` instead"
861 )
862 }
863 }
864 }
865}
866
867/// Generate Rust source files from a set of file descriptors.
868///
869/// `files_to_generate` is the set of file names that were explicitly requested
870/// (matching `CodeGeneratorRequest.file_to_generate`). Descriptors for
871/// dependencies may be present in `file_descriptors` but won't produce output
872/// files unless they appear in `files_to_generate`.
873///
874/// Each `.proto` emits up to five content files (kinds with no content
875/// are omitted); each distinct package emits one `<pkg>.mod.rs`
876/// stitcher. Packages are processed in sorted order for deterministic
877/// output.
878///
879/// # Diagnostics
880///
881/// Non-fatal diagnostics produced during generation (e.g. an enum whose
882/// idiomatic CamelCase aliases were suppressed by a naming conflict) are
883/// **discarded** here. Use [`generate_with_diagnostics`] to receive them and
884/// surface them as build warnings.
885pub fn generate(
886 file_descriptors: &[FileDescriptorProto],
887 files_to_generate: &[String],
888 config: &CodeGenConfig,
889) -> Result<Vec<GeneratedFile>, CodeGenError> {
890 Ok(generate_with_diagnostics(file_descriptors, files_to_generate, config)?.0)
891}
892
893/// Like [`generate`], but also returns the non-fatal [`CodeGenWarning`]s
894/// collected during generation (e.g. enums whose idiomatic CamelCase aliases
895/// were suppressed by a naming conflict).
896///
897/// Surface each warning via its [`Display`](core::fmt::Display) impl — e.g. as a
898/// `cargo:warning=...` from a `build.rs`, or on stderr from a standalone
899/// generator — or match on it for programmatic handling. [`generate`] discards
900/// them, so existing callers are unaffected.
901///
902/// Warnings are returned only on success. On error, any warnings already
903/// collected are dropped along with the partial output — the [`CodeGenError`]
904/// is the actionable signal.
905///
906/// # Errors
907///
908/// Returns [`CodeGenError::FileNotFound`] if a name in `files_to_generate` has
909/// no matching descriptor, [`CodeGenError::Other`] if `generate_reflection_vtable`
910/// is set without `generate_reflection`, and other [`CodeGenError`] variants for
911/// malformed descriptors (e.g. a missing required field) encountered while
912/// generating.
913pub fn generate_with_diagnostics(
914 file_descriptors: &[FileDescriptorProto],
915 files_to_generate: &[String],
916 config: &CodeGenConfig,
917) -> Result<(Vec<GeneratedFile>, Vec<CodeGenWarning>), CodeGenError> {
918 // Vtable reflection resolves against the per-package descriptor pool, which
919 // is emitted by bridge-mode reflection — so it requires `generate_reflection`.
920 // It does NOT require views: the owned `impl ReflectMessage` is self-contained,
921 // so with views off, vtable mode still emits owned-message reflection (the
922 // view impls are simply skipped along with the views).
923 if config.generate_reflection_vtable && !config.generate_reflection {
924 return Err(CodeGenError::Other(
925 "generate_reflection_vtable requires generate_reflection to be enabled \
926 (it provides the descriptor pool the reflect impls resolve against)"
927 .into(),
928 ));
929 }
930
931 let ctx = context::CodeGenContext::for_generate(file_descriptors, files_to_generate, config);
932
933 // Group requested files by package. BTreeMap → deterministic output order.
934 let mut by_package: std::collections::BTreeMap<String, Vec<&FileDescriptorProto>> =
935 std::collections::BTreeMap::new();
936 for file_name in files_to_generate {
937 let file_desc = file_descriptors
938 .iter()
939 .find(|f| f.name.as_deref() == Some(file_name.as_str()))
940 .ok_or_else(|| CodeGenError::FileNotFound(file_name.clone()))?;
941 let pkg = file_desc.package.as_deref().unwrap_or("").to_string();
942 by_package.entry(pkg).or_default().push(file_desc);
943 }
944
945 // Reflection: serialize the FileDescriptorSet once, regardless of how
946 // many packages are in the request. Each package embeds its own copy of
947 // the bytes (binary-size dedup is a follow-up), but the build-time
948 // re-encoding cost shouldn't scale with the package count.
949 let fds_bytes = if config.generate_reflection {
950 reflect::encode_fds_once(file_descriptors)
951 } else {
952 Vec::new()
953 };
954
955 let mut output = Vec::new();
956 for (package, files) in by_package {
957 generate_package(&ctx, &package, &files, &fds_bytes, &mut output)?;
958 }
959
960 Ok((output, ctx.take_warnings()))
961}
962
963/// Generate a module tree that assembles per-package `.mod.rs` files into
964/// nested `pub mod` blocks matching the protobuf package hierarchy.
965///
966/// Each entry is a `(mod_file_name, package)` pair where `package` is the
967/// dot-separated protobuf package name (e.g., `"google.api"`) and
968/// `mod_file_name` is the corresponding `<pkg>.mod.rs` (only
969/// [`GeneratedFileKind::PackageMod`] outputs need wiring; per-proto
970/// content files are reached via `include!` from the stitcher).
971///
972/// `include_mode` controls how `include!` paths are emitted.
973///
974/// `emit_inner_allow` adds a `#![allow(...)]` inner attribute at the top —
975/// valid when the output is used directly as a module file (`mod.rs`),
976/// invalid when consumed via `include!`.
977pub fn generate_module_tree<F: AsRef<str>, P: AsRef<str>>(
978 entries: &[(F, P)],
979 include_mode: IncludeMode<'_>,
980 emit_inner_allow: bool,
981) -> String {
982 use std::collections::BTreeMap;
983 use std::fmt::Write;
984
985 use crate::idents::escape_mod_ident;
986
987 #[derive(Default)]
988 struct ModNode {
989 files: Vec<String>,
990 children: BTreeMap<String, Self>,
991 }
992
993 let mut root = ModNode::default();
994
995 for (file_name, package) in entries {
996 let package = package.as_ref();
997 let pkg_parts: Vec<&str> = if package.is_empty() {
998 vec![]
999 } else {
1000 package.split('.').collect()
1001 };
1002
1003 let mut node = &mut root;
1004 for seg in &pkg_parts {
1005 node = node.children.entry(seg.to_string()).or_default();
1006 }
1007 node.files.push(file_name.as_ref().to_string());
1008 }
1009
1010 let lints = ALLOW_LINTS.join(", ");
1011 let mut out = String::new();
1012 let _ = writeln!(out, "// @generated by buffa-codegen. DO NOT EDIT.");
1013 if emit_inner_allow {
1014 let _ = writeln!(out, "#![allow({lints})]");
1015 }
1016 let _ = writeln!(out);
1017
1018 fn emit(out: &mut String, node: &ModNode, depth: usize, mode: IncludeMode<'_>, lints: &str) {
1019 let indent = " ".repeat(depth);
1020
1021 for file in &node.files {
1022 match mode {
1023 IncludeMode::Relative(prefix) => {
1024 let _ = writeln!(out, r#"{indent}include!("{prefix}{file}");"#);
1025 }
1026 IncludeMode::OutDir => {
1027 let _ = writeln!(
1028 out,
1029 r#"{indent}include!(concat!(env!("OUT_DIR"), "/{file}"));"#
1030 );
1031 }
1032 }
1033 }
1034
1035 for (name, child) in &node.children {
1036 let escaped = escape_mod_ident(name);
1037 let _ = writeln!(out, "{indent}#[allow({lints})]");
1038 let _ = writeln!(out, "{indent}pub mod {escaped} {{");
1039 let _ = writeln!(out, "{indent} use super::*;");
1040 emit(out, child, depth + 1, mode, lints);
1041 let _ = writeln!(out, "{indent}}}");
1042 }
1043 }
1044
1045 emit(&mut out, &root, 0, include_mode, &lints);
1046 out
1047}
1048
1049/// How [`generate_module_tree`] emits `include!` paths.
1050#[derive(Debug, Clone, Copy)]
1051pub enum IncludeMode<'a> {
1052 /// `include!("<prefix><file>")` — relative to the including file.
1053 /// Prefix is typically `""` or `"gen/"`.
1054 Relative(&'a str),
1055 /// `include!(concat!(env!("OUT_DIR"), "/<file>"))` — for build.rs output.
1056 OutDir,
1057}
1058
1059/// Validate one input descriptor before generating code for it.
1060///
1061/// Checks, in one walk of the message tree:
1062///
1063/// - **Reserved field names**: no field starts with `__buffa_` (would clash
1064/// with generated `__buffa_unknown_fields` / `__buffa_cached_size`).
1065/// - **Module-name conflicts**: no two sibling messages snake_case to the
1066/// same module name (e.g. `HTTPRequest` vs `HttpRequest`).
1067/// - **Reserved sentinel**: no package segment, message-module name, or
1068/// file-level enum name equals [`SENTINEL_MOD`](context::SENTINEL_MOD).
1069/// Ancillary types live under `pkg::__buffa::…`; a proto element
1070/// emitting an item named `__buffa` at package root would produce
1071/// E0428 against `pub mod __buffa`. This is the only name buffa
1072/// reserves in user namespace.
1073fn validate_file(file: &FileDescriptorProto) -> Result<(), CodeGenError> {
1074 use std::collections::HashMap;
1075
1076 let sentinel = context::SENTINEL_MOD;
1077 let package = file.package.as_deref().unwrap_or("");
1078 if package.split('.').any(|seg| seg == sentinel) {
1079 return Err(CodeGenError::ReservedModuleName {
1080 name: sentinel.to_string(),
1081 location: format!("package '{package}'"),
1082 });
1083 }
1084 // File-level enums emit `pub enum <name>` at package root with the
1085 // proto name preserved verbatim (no PascalCase normalization), so a
1086 // proto `enum __buffa` would land beside `pub mod __buffa`. Nested
1087 // enums live inside their owner message's module and cannot collide
1088 // with the package-root sentinel, so only file-level is checked.
1089 for enum_type in &file.enum_type {
1090 let name = enum_type.name.as_deref().unwrap_or("");
1091 if name == sentinel {
1092 return Err(CodeGenError::ReservedModuleName {
1093 name: sentinel.to_string(),
1094 location: format!("enum '{package}.{name}'"),
1095 });
1096 }
1097 }
1098
1099 fn walk(
1100 messages: &[crate::generated::descriptor::DescriptorProto],
1101 scope: &str,
1102 sentinel: &str,
1103 ) -> Result<(), CodeGenError> {
1104 // snake_case module name → original proto name (for conflict diag).
1105 let mut seen: HashMap<String, &str> = HashMap::new();
1106
1107 for msg in messages {
1108 let name = msg.name.as_deref().unwrap_or("");
1109 let fqn = if scope.is_empty() {
1110 name.to_string()
1111 } else {
1112 format!("{scope}.{name}")
1113 };
1114
1115 for field in &msg.field {
1116 if let Some(fname) = &field.name {
1117 if fname.starts_with("__buffa_") {
1118 return Err(CodeGenError::ReservedFieldName {
1119 message_name: fqn,
1120 field_name: fname.clone(),
1121 });
1122 }
1123 }
1124 }
1125
1126 let module_name = crate::oneof::to_snake_case(name);
1127 if module_name == sentinel {
1128 return Err(CodeGenError::ReservedModuleName {
1129 name: sentinel.to_string(),
1130 location: format!("message '{fqn}'"),
1131 });
1132 }
1133 if let Some(existing) = seen.get(&module_name) {
1134 return Err(CodeGenError::ModuleNameConflict {
1135 scope: scope.to_string(),
1136 name_a: existing.to_string(),
1137 name_b: name.to_string(),
1138 module_name,
1139 });
1140 }
1141 seen.insert(module_name, name);
1142
1143 walk(&msg.nested_type, &fqn, sentinel)?;
1144 }
1145 Ok(())
1146 }
1147
1148 walk(&file.message_type, package, sentinel)
1149}
1150
1151/// Per-proto content streams plus the file stem, ready to be formatted.
1152struct ProtoContent {
1153 stem: String,
1154 owned: TokenStream,
1155 view: TokenStream,
1156 oneof: TokenStream,
1157 view_oneof: TokenStream,
1158 ext: TokenStream,
1159 /// Candidate `pub use` re-exports targeting the package root (top-level
1160 /// view structs, file-level extension consts). Filtered against the
1161 /// package-wide root namespace in [`generate_package_mod`] — the package
1162 /// can span multiple `.proto` files, so collisions are only knowable at
1163 /// the stitcher level.
1164 root_reexports: Vec<message::ReexportCandidate>,
1165}
1166
1167/// Generate the per-`.proto` content token streams for one input file.
1168/// Each ancillary kind that has no content yields an empty stream and
1169/// is dropped at the file-emission stage.
1170fn generate_proto_content(
1171 ctx: &context::CodeGenContext,
1172 current_package: &str,
1173 file: &FileDescriptorProto,
1174 reg: &mut message::RegistryPaths,
1175) -> Result<ProtoContent, CodeGenError> {
1176 use crate::idents::make_field_ident;
1177 use crate::message::MessageOutput;
1178
1179 validate_file(file)?;
1180
1181 let resolver = imports::ImportResolver::new();
1182 let features = crate::features::for_file(file);
1183
1184 let mut owned = TokenStream::new();
1185 let mut view = TokenStream::new();
1186 let mut oneof = TokenStream::new();
1187 let mut view_oneof = TokenStream::new();
1188 let mut ext = TokenStream::new();
1189 let mut root_reexports: Vec<message::ReexportCandidate> = Vec::new();
1190 let sentinel = make_field_ident(context::SENTINEL_MOD);
1191
1192 for enum_type in &file.enum_type {
1193 let enum_rust_name = enum_type.name.as_deref().unwrap_or("");
1194 let enum_fqn = if current_package.is_empty() {
1195 enum_rust_name.to_string()
1196 } else {
1197 format!("{}.{}", current_package, enum_rust_name)
1198 };
1199 owned.extend(enumeration::generate_enum(
1200 ctx,
1201 enum_type,
1202 enum_rust_name,
1203 &enum_fqn,
1204 &features,
1205 &resolver,
1206 )?);
1207 }
1208
1209 for message_type in &file.message_type {
1210 let top_level_name = message_type.name.as_deref().unwrap_or("");
1211 let proto_fqn = if current_package.is_empty() {
1212 top_level_name.to_string()
1213 } else {
1214 format!("{}.{}", current_package, top_level_name)
1215 };
1216 let MessageOutput {
1217 owned_top,
1218 owned_mod,
1219 oneof_tree: msg_oneof,
1220 view_tree: msg_view,
1221 view_oneof_tree: msg_view_oneof,
1222 reg: msg_reg,
1223 } = message::generate_message(
1224 ctx,
1225 message_type,
1226 current_package,
1227 top_level_name,
1228 &proto_fqn,
1229 &features,
1230 &resolver,
1231 )?;
1232 owned.extend(owned_top);
1233 let mod_name = ctx.nested_module_name(current_package, top_level_name);
1234 let mod_ident = make_field_ident(&mod_name);
1235 // When the nested-types module was deconflicted from a sub-package
1236 // (issue #135), document why the name carries a trailing `_`.
1237 let mod_doc = if mod_name == crate::oneof::to_snake_case(top_level_name) {
1238 quote! {}
1239 } else {
1240 let doc = format!(
1241 "Nested items of `{top_level_name}`. The module name carries a \
1242 trailing `_` to avoid a collision with another module in this \
1243 scope (a sub-package or sibling message of the same name). See \
1244 buffa#135."
1245 );
1246 quote! { #[doc = #doc] }
1247 };
1248 for p in msg_reg.json_ext {
1249 reg.json_ext.push(quote! { #mod_ident :: #p });
1250 }
1251 for p in msg_reg.text_ext {
1252 reg.text_ext.push(quote! { #mod_ident :: #p });
1253 }
1254 reg.json_any.extend(msg_reg.json_any);
1255 reg.text_any.extend(msg_reg.text_any);
1256
1257 if !owned_mod.is_empty() {
1258 owned.extend(quote! {
1259 #mod_doc
1260 pub mod #mod_ident {
1261 #[allow(unused_imports)]
1262 use super::*;
1263 #owned_mod
1264 }
1265 });
1266 }
1267 oneof.extend(msg_oneof);
1268 view.extend(msg_view);
1269 view_oneof.extend(msg_view_oneof);
1270
1271 // Top-level message view → re-export at package root. The leading
1272 // `self::` is load-bearing: when consumers nest packages with
1273 // `pub mod a { use super::*; pub mod a_b { use super::*; … } }`
1274 // (`buffa-build`'s `_include.rs` does this), a parent package's
1275 // `__buffa` is in scope via the glob, and Rust's import-resolution
1276 // pass treats a glob-imported name as ambiguous against a
1277 // **macro-expanded** local one (the `pub mod __buffa` block arrives
1278 // via `include!()`), even though a non-macro local definition would
1279 // shadow the glob — see rustc E0659. `self::` resolves it
1280 // deterministically. `#[doc(inline)]` makes rustdoc render the type's
1281 // full page at the natural path instead of a "Re-export of …" stub.
1282 if ctx.config.generate_views {
1283 let view_ident = format_ident!("{top_level_name}View");
1284 root_reexports.push(message::ReexportCandidate {
1285 name: view_ident.to_string(),
1286 tokens: feature_gates::cfg_block(
1287 quote! {
1288 #[doc(inline)]
1289 pub use self :: #sentinel :: view :: #view_ident;
1290 },
1291 ctx.config.feature_gates().views,
1292 ),
1293 });
1294 // The owned-view wrapper gets the same natural-path treatment as
1295 // the view struct, so `pkg::FooOwnedView` works out of the box.
1296 let owned_view_ident = format_ident!("{top_level_name}OwnedView");
1297 root_reexports.push(message::ReexportCandidate {
1298 name: owned_view_ident.to_string(),
1299 tokens: feature_gates::cfg_block(
1300 quote! {
1301 #[doc(inline)]
1302 pub use self :: #sentinel :: view :: #owned_view_ident;
1303 },
1304 ctx.config.feature_gates().views,
1305 ),
1306 });
1307 }
1308 }
1309
1310 // File-level `extend` declarations → `__buffa::ext::` (depth 2).
1311 let (file_ext_tokens, file_ext_json, file_ext_text) = extension::generate_extensions(
1312 ctx,
1313 &file.extension,
1314 current_package,
1315 2,
1316 &features,
1317 current_package,
1318 )?;
1319 ext.extend(file_ext_tokens);
1320 for id in file_ext_json {
1321 reg.json_ext.push(quote! { #sentinel :: ext :: #id });
1322 }
1323 for id in file_ext_text {
1324 reg.text_ext.push(quote! { #sentinel :: ext :: #id });
1325 }
1326 // File-level extension consts → re-export at package root. `self::` and
1327 // `#[doc(inline)]` for the same reasons as the view re-exports above.
1328 for ext_field in &file.extension {
1329 let const_ident = extension::extension_const_ident(ext_field.name.as_deref().unwrap_or(""));
1330 root_reexports.push(message::ReexportCandidate {
1331 name: const_ident.to_string(),
1332 tokens: quote! {
1333 #[doc(inline)]
1334 pub use self :: #sentinel :: ext :: #const_ident;
1335 },
1336 });
1337 }
1338
1339 Ok(ProtoContent {
1340 stem: proto_path_to_stem(file.name.as_deref().unwrap_or("")),
1341 owned,
1342 view,
1343 oneof,
1344 view_oneof,
1345 ext,
1346 root_reexports,
1347 })
1348}
1349
1350/// Per-section token streams for one package, ready for the stitcher.
1351///
1352/// In per-file mode each section holds `include!("<stem>...rs")` calls; in
1353/// `file_per_package` mode each holds the actual generated items.
1354#[derive(Default)]
1355struct PackageSections {
1356 owned: Vec<TokenStream>,
1357 view: Vec<TokenStream>,
1358 oneof: Vec<TokenStream>,
1359 view_oneof: Vec<TokenStream>,
1360 ext: Vec<TokenStream>,
1361}
1362
1363impl PackageSections {
1364 /// Append one proto file's generated items in-line.
1365 ///
1366 /// Empty streams are skipped so each section's emptiness reflects
1367 /// "the package has no content of this kind" — symmetric with the
1368 /// per-file branch that filters at file-emission time.
1369 fn push_inline(&mut self, pc: ProtoContent) {
1370 let push_if_nonempty = |dst: &mut Vec<TokenStream>, ts: TokenStream| {
1371 if !ts.is_empty() {
1372 dst.push(ts);
1373 }
1374 };
1375 push_if_nonempty(&mut self.owned, pc.owned);
1376 push_if_nonempty(&mut self.view, pc.view);
1377 push_if_nonempty(&mut self.oneof, pc.oneof);
1378 push_if_nonempty(&mut self.view_oneof, pc.view_oneof);
1379 push_if_nonempty(&mut self.ext, pc.ext);
1380 }
1381}
1382
1383/// Generate all output files for one proto package: up to five content
1384/// files per `.proto` (empty ancillary kinds are skipped) plus one
1385/// `<pkg>.mod.rs` stitcher, or a single `<pkg>.rs` when
1386/// [`CodeGenConfig::file_per_package`] is set.
1387fn generate_package(
1388 ctx: &context::CodeGenContext,
1389 current_package: &str,
1390 files: &[&FileDescriptorProto],
1391 fds_bytes: &[u8],
1392 out: &mut Vec<GeneratedFile>,
1393) -> Result<(), CodeGenError> {
1394 // Registry paths are package-root-relative; `register_types` lives at
1395 // `__buffa::register_types` (one level deep), so each path gets a
1396 // single `super::` prefix when emitted into the fn body.
1397 let mut reg = message::RegistryPaths::default();
1398 let mut root_reexports: Vec<message::ReexportCandidate> = Vec::new();
1399
1400 let sections = if ctx.config.file_per_package {
1401 let mut sections = PackageSections::default();
1402 for file in files {
1403 let mut pc = generate_proto_content(ctx, current_package, file, &mut reg)?;
1404 root_reexports.append(&mut pc.root_reexports);
1405 sections.push_inline(pc);
1406 }
1407 sections
1408 } else {
1409 let mut sections = PackageSections::default();
1410 for file in files {
1411 let mut pc = generate_proto_content(ctx, current_package, file, &mut reg)?;
1412 root_reexports.append(&mut pc.root_reexports);
1413 let source = file.name.as_deref().unwrap_or("");
1414 let stem = pc.stem;
1415
1416 // Empty ancillary token streams are skipped — neither the
1417 // content file nor the stitcher's `include!` is emitted.
1418 let emit = |suffix: &str,
1419 kind: GeneratedFileKind,
1420 tokens: TokenStream,
1421 section: &mut Vec<TokenStream>,
1422 out: &mut Vec<GeneratedFile>|
1423 -> Result<(), CodeGenError> {
1424 if tokens.is_empty() {
1425 return Ok(());
1426 }
1427 let name = format!("{stem}{suffix}.rs");
1428 section.push(quote! { include!(#name); });
1429 out.push(GeneratedFile {
1430 name,
1431 package: current_package.to_string(),
1432 kind,
1433 content: format_tokens(tokens, source)?,
1434 });
1435 Ok(())
1436 };
1437 emit(
1438 "",
1439 GeneratedFileKind::Owned,
1440 pc.owned,
1441 &mut sections.owned,
1442 out,
1443 )?;
1444 emit(
1445 ".__view",
1446 GeneratedFileKind::View,
1447 pc.view,
1448 &mut sections.view,
1449 out,
1450 )?;
1451 emit(
1452 ".__oneof",
1453 GeneratedFileKind::Oneof,
1454 pc.oneof,
1455 &mut sections.oneof,
1456 out,
1457 )?;
1458 emit(
1459 ".__view_oneof",
1460 GeneratedFileKind::ViewOneof,
1461 pc.view_oneof,
1462 &mut sections.view_oneof,
1463 out,
1464 )?;
1465 emit(
1466 ".__ext",
1467 GeneratedFileKind::Ext,
1468 pc.ext,
1469 &mut sections.ext,
1470 out,
1471 )?;
1472 }
1473 sections
1474 };
1475
1476 let reexport_block = surviving_root_reexports(ctx, files, ®, root_reexports);
1477
1478 out.push(GeneratedFile {
1479 name: if ctx.config.file_per_package {
1480 package_to_filename(current_package)
1481 } else {
1482 package_to_mod_filename(current_package)
1483 },
1484 package: current_package.to_string(),
1485 kind: GeneratedFileKind::PackageMod,
1486 content: generate_package_mod(ctx, §ions, ®, &reexport_block, fds_bytes)?,
1487 });
1488
1489 Ok(())
1490}
1491
1492/// Filter the candidate package-root re-exports against the package's
1493/// existing root namespace and against each other, returning the surviving
1494/// `pub use` lines.
1495///
1496/// The package root is shared across every `.proto` file in the package, so
1497/// the occupied-name set must be built from *all* of them — a top-level
1498/// message named `FooView` declared in `a.proto` would shadow `Foo`'s view
1499/// re-export from `b.proto`.
1500fn surviving_root_reexports(
1501 ctx: &context::CodeGenContext,
1502 files: &[&FileDescriptorProto],
1503 reg: &message::RegistryPaths,
1504 mut candidates: Vec<message::ReexportCandidate>,
1505) -> TokenStream {
1506 use crate::idents::make_field_ident;
1507 use std::collections::BTreeSet;
1508
1509 // Names already occupied at package root by real items: top-level
1510 // messages, enums, message nested-types modules (deconflicted name, #135),
1511 // and the `__buffa` sentinel itself. File-level extension consts live in
1512 // `__buffa::ext::`, not at the root, so they are *candidates* (added
1513 // by `generate_proto_content`) rather than occupants.
1514 let mut occupied: BTreeSet<String> = BTreeSet::new();
1515 occupied.insert(context::SENTINEL_MOD.to_string());
1516 for file in files {
1517 let package = file.package.as_deref().unwrap_or("");
1518 for m in &file.message_type {
1519 let name = m.name.as_deref().unwrap_or("");
1520 occupied.insert(name.to_string());
1521 // The actual module name (deconflicted from sub-packages, #135).
1522 occupied.insert(ctx.nested_module_name(package, name));
1523 }
1524 for e in &file.enum_type {
1525 occupied.insert(e.name.as_deref().unwrap_or("").to_string());
1526 }
1527 }
1528
1529 // `register_types`, when emitted, lives at `__buffa::register_types`.
1530 // `self::` and `#[doc(inline)]` for the same reasons as the view
1531 // re-exports above. Same `any(json, text)` gate as the fn itself.
1532 if ctx.config.emit_register_fn && !reg.is_empty() {
1533 let sentinel = make_field_ident(context::SENTINEL_MOD);
1534 let json_or_text = ctx.config.feature_gates().json_or_text();
1535 candidates.push(message::ReexportCandidate {
1536 name: "register_types".to_string(),
1537 tokens: feature_gates::cfg_block_any(
1538 quote! {
1539 #[doc(inline)]
1540 pub use self :: #sentinel :: register_types;
1541 },
1542 &json_or_text,
1543 ),
1544 });
1545 }
1546
1547 message::emit_surviving_reexports(candidates, &occupied)
1548}
1549
1550/// Render the per-package stitcher: owned items at root plus the
1551/// `__buffa::{view,oneof,ext,...}` module wrappers, followed by the
1552/// surviving package-root `pub use` re-exports.
1553fn generate_package_mod(
1554 ctx: &context::CodeGenContext,
1555 sections: &PackageSections,
1556 reg: &message::RegistryPaths,
1557 root_reexports: &TokenStream,
1558 fds_bytes: &[u8],
1559) -> Result<String, CodeGenError> {
1560 use crate::idents::make_field_ident;
1561
1562 let owned = §ions.owned;
1563 let view = §ions.view;
1564 let view_oneof = §ions.view_oneof;
1565 let oneof = §ions.oneof;
1566 let ext = §ions.ext;
1567
1568 // Each ancillary module is emitted only when its section has
1569 // content. The natural-path re-exports outside `__buffa` target
1570 // these modules — they are emitted only when their target items
1571 // exist, so the conditions align and re-exports never reference
1572 // a missing module.
1573 let view_oneof_mod = if !view_oneof.is_empty() {
1574 quote! {
1575 pub mod oneof {
1576 #[allow(unused_imports)]
1577 use super::*;
1578 #(#view_oneof)*
1579 }
1580 }
1581 } else {
1582 TokenStream::new()
1583 };
1584
1585 // `view_oneof` is only populated for messages that have oneofs, and
1586 // every message also contributes to `view`, so `!view.is_empty()` is
1587 // sufficient — `view_oneof` non-empty implies `view` non-empty.
1588 debug_assert!(view_oneof.is_empty() || !view.is_empty());
1589 let view_mod = if ctx.config.generate_views && !view.is_empty() {
1590 feature_gates::cfg_block(
1591 quote! {
1592 pub mod view {
1593 #[allow(unused_imports)]
1594 use super::*;
1595 #(#view)*
1596 #view_oneof_mod
1597 }
1598 },
1599 ctx.config.feature_gates().views,
1600 )
1601 } else {
1602 TokenStream::new()
1603 };
1604
1605 let oneof_mod = if !oneof.is_empty() {
1606 quote! {
1607 pub mod oneof {
1608 #[allow(unused_imports)]
1609 use super::*;
1610 #(#oneof)*
1611 }
1612 }
1613 } else {
1614 TokenStream::new()
1615 };
1616
1617 let ext_mod = if !ext.is_empty() {
1618 quote! {
1619 pub mod ext {
1620 #[allow(unused_imports)]
1621 use super::*;
1622 #(#ext)*
1623 }
1624 }
1625 } else {
1626 TokenStream::new()
1627 };
1628
1629 let register_fn = if ctx.config.emit_register_fn && !reg.is_empty() {
1630 let gates = ctx.config.feature_gates();
1631 // When the gated consts (`__*_JSON_ANY` / `__*_TEXT_ANY`) are
1632 // `#[cfg(feature = "...")]`, each registration statement that
1633 // references them gets the same gate. `#[cfg]` on a statement is
1634 // allowed; the call disappears with the const.
1635 let json_regs = reg
1636 .json_any
1637 .iter()
1638 .map(|p| {
1639 feature_gates::cfg_block(quote! { reg.register_json_any(super::#p); }, gates.json)
1640 })
1641 .chain(reg.json_ext.iter().map(|p| {
1642 feature_gates::cfg_block(quote! { reg.register_json_ext(super::#p); }, gates.json)
1643 }));
1644 let text_regs = reg
1645 .text_any
1646 .iter()
1647 .map(|p| {
1648 feature_gates::cfg_block(quote! { reg.register_text_any(super::#p); }, gates.text)
1649 })
1650 .chain(reg.text_ext.iter().map(|p| {
1651 feature_gates::cfg_block(quote! { reg.register_text_ext(super::#p); }, gates.text)
1652 }));
1653 // When gating, a feature subset may leave one bucket of statements
1654 // cfg'd out while the other survives — `reg` is still used. But if
1655 // `register_types` itself is gated on `any(json, text)` (below),
1656 // the only reachable bodies have at least one statement, so `reg`
1657 // can't be unused. Keep `#[allow(unused_variables)]` defensively
1658 // anyway: it's harmless, and the alternative — proving the
1659 // invariant holds across future statement-shape changes — is
1660 // brittle.
1661 let allow_unused = if ctx.config.gate_impls_on_crate_features {
1662 quote! { #[allow(unused_variables)] }
1663 } else {
1664 quote! {}
1665 };
1666 // The fn is useless without at least one of the gated modes that
1667 // populate it — and `::buffa::type_registry::TypeRegistry` may
1668 // become feature-gated in the runtime in a future release. Gate the
1669 // fn on `any(...)` of whichever modes are active so it disappears
1670 // alongside the last entry.
1671 feature_gates::cfg_block_any(
1672 quote! {
1673 /// Register this package's `Any` type entries and extension entries.
1674 #allow_unused
1675 pub fn register_types(reg: &mut ::buffa::type_registry::TypeRegistry) {
1676 #(#json_regs)*
1677 #(#text_regs)*
1678 }
1679 },
1680 &gates.json_or_text(),
1681 )
1682 } else {
1683 TokenStream::new()
1684 };
1685
1686 // Reflection: embed the FileDescriptorSet bytes and a lazy pool
1687 // accessor so per-message `Reflectable` impls have a descriptor pool to
1688 // resolve against. Lives inside `__buffa` so the impls can reach it via
1689 // a relative `__buffa::reflect::descriptor_pool()` path. A package-root
1690 // `pub use` re-exports `descriptor_pool` so consumers don't have to
1691 // route through the reserved `__buffa` sentinel.
1692 let (reflect_mod, reflect_reexport) = if ctx.config.generate_reflection {
1693 let gate = ctx.config.feature_gates().reflect;
1694 (
1695 feature_gates::cfg_block(reflect::reflect_pool_module(fds_bytes), gate),
1696 feature_gates::cfg_block(reflect::pool_accessor_reexport("e! { __buffa }), gate),
1697 )
1698 } else {
1699 (TokenStream::new(), TokenStream::new())
1700 };
1701
1702 let sentinel = make_field_ident(context::SENTINEL_MOD);
1703 // The whole `pub mod __buffa { ... }` wrapper is itself omitted
1704 // when none of its inner modules or `register_types` exist.
1705 let buffa_mod = if view_mod.is_empty()
1706 && oneof_mod.is_empty()
1707 && ext_mod.is_empty()
1708 && register_fn.is_empty()
1709 && reflect_mod.is_empty()
1710 {
1711 TokenStream::new()
1712 } else {
1713 let allow = allow_lints_attr();
1714 quote! {
1715 #allow
1716 pub mod #sentinel {
1717 #[allow(unused_imports)]
1718 use super::*;
1719 #view_mod
1720 #oneof_mod
1721 #ext_mod
1722 #register_fn
1723 #reflect_mod
1724 }
1725 }
1726 };
1727
1728 let tokens = quote! {
1729 #(#owned)*
1730 #buffa_mod
1731 #reflect_reexport
1732 #root_reexports
1733 };
1734
1735 format_tokens(tokens, "")
1736}
1737
1738/// Format a token stream into a generated-file string with the standard
1739/// header comment.
1740fn format_tokens(tokens: TokenStream, source: &str) -> Result<String, CodeGenError> {
1741 let syntax_tree =
1742 syn::parse2::<syn::File>(tokens).map_err(|e| CodeGenError::InvalidSyntax(e.to_string()))?;
1743 let formatted = prettyplease::unparse(&syntax_tree);
1744 let source_line = if source.is_empty() {
1745 String::new()
1746 } else {
1747 format!("// source: {source}\n")
1748 };
1749 Ok(format!(
1750 "// @generated by buffa-codegen. DO NOT EDIT.\n{source_line}\n{formatted}"
1751 ))
1752}
1753
1754/// Convert a proto package name to its `.mod.rs` stitcher filename.
1755///
1756/// e.g., `"google.protobuf"` → `"google.protobuf.mod.rs"`. The unnamed
1757/// package uses the [`SENTINEL_MOD`](context::SENTINEL_MOD) name as its
1758/// filename stem — `package __buffa;` is already rejected by
1759/// `validate_file`, so the unnamed-package stitcher cannot
1760/// collide with any real package's.
1761pub fn package_to_mod_filename(package: &str) -> String {
1762 if package.is_empty() {
1763 format!("{}.mod.rs", context::SENTINEL_MOD)
1764 } else {
1765 format!("{package}.mod.rs")
1766 }
1767}
1768
1769/// Convert a proto package name to its [`file_per_package`] output filename.
1770///
1771/// e.g., `"google.protobuf"` → `"google.protobuf.rs"`. The unnamed
1772/// package uses [`SENTINEL_MOD`](context::SENTINEL_MOD) — same
1773/// collision-avoidance as [`package_to_mod_filename`].
1774///
1775/// [`file_per_package`]: CodeGenConfig::file_per_package
1776pub fn package_to_filename(package: &str) -> String {
1777 if package.is_empty() {
1778 format!("{}.rs", context::SENTINEL_MOD)
1779 } else {
1780 format!("{package}.rs")
1781 }
1782}
1783
1784/// Convert a `.proto` file path to its content-file stem.
1785///
1786/// e.g., `"google/protobuf/timestamp.proto"` → `"google.protobuf.timestamp"`.
1787/// Content files append `""`, `".__view"`, `".__oneof"`,
1788/// `".__view_oneof"`, or `".__ext"` plus `".rs"` — emitted only for
1789/// kinds with non-empty content.
1790pub fn proto_path_to_stem(proto_path: &str) -> String {
1791 let without_ext = proto_path.strip_suffix(".proto").unwrap_or(proto_path);
1792 without_ext.replace('/', ".")
1793}
1794
1795/// Merge downstream [`Companion`](GeneratedFileKind::Companion) files into
1796/// the per-package stitcher produced by [`generate`].
1797///
1798/// For each companion file this function locates the
1799/// [`PackageMod`](GeneratedFileKind::PackageMod) entry in `files` with a
1800/// matching package and appends `include!("<name>");` at file scope after
1801/// buffa's own output — at package root, alongside the owned message types,
1802/// not under `__buffa::`. The companion files themselves are appended to
1803/// `files` so that build integrations can write everything to disk in one
1804/// pass.
1805///
1806/// **Call this once per build**; it does not deduplicate, so a second call
1807/// with the same companions emits a second `include!` for each, which fails
1808/// to compile downstream with a duplicate-definition error.
1809///
1810/// `name` must be a bare-sibling filename — the same convention buffa uses
1811/// for its own `include!` calls, so it resolves relative to the stitcher
1812/// without any `OUT_DIR` prefix. Names must not contain `"`, `\`, `/`, or
1813/// newlines (the function `debug_assert!`s this in debug builds), and must
1814/// not collide with any of buffa's own generated filenames for the same
1815/// package (`<stem>.rs`, `<stem>.__view.rs`, etc.) — pick an unused suffix
1816/// such as `<stem>.__myplugin.rs`.
1817///
1818/// Companion files with no matching `PackageMod` (e.g. for a package buffa
1819/// did not generate any output for) are still appended to `files` but no
1820/// `include!` is emitted; the caller is responsible for wiring them up. If
1821/// you don't expect orphans, check that every companion's `package` appears
1822/// in `files` as a `PackageMod` after calling.
1823pub fn apply_companions(files: &mut Vec<GeneratedFile>, companions: Vec<GeneratedFile>) {
1824 for comp in &companions {
1825 debug_assert!(
1826 !comp.name.contains(['"', '\\', '/', '\n']),
1827 "companion file name {:?} contains a character that would break \
1828 the generated include!() literal or its bare-sibling resolution",
1829 comp.name
1830 );
1831 if let Some(pkg_mod) = files
1832 .iter_mut()
1833 .find(|f| f.kind == GeneratedFileKind::PackageMod && f.package == comp.package)
1834 {
1835 pkg_mod
1836 .content
1837 .push_str(&format!("include!(\"{}\");\n", comp.name));
1838 }
1839 }
1840 files.extend(companions);
1841}
1842
1843/// Code generation error.
1844#[derive(Debug, Clone, thiserror::Error)]
1845#[non_exhaustive]
1846pub enum CodeGenError {
1847 /// A required field was absent in a descriptor.
1848 ///
1849 /// The `&'static str` names the missing field for diagnostics.
1850 #[error("missing required descriptor field: {0}")]
1851 MissingField(&'static str),
1852 /// A resolved type path string could not be parsed as a Rust type.
1853 #[error("invalid Rust type path: '{0}'")]
1854 InvalidTypePath(String),
1855 /// The accumulated `TokenStream` failed to parse as valid Rust syntax.
1856 #[error("generated code failed to parse as Rust: {0}")]
1857 InvalidSyntax(String),
1858 /// A requested file was not present in the descriptor set.
1859 #[error("file_to_generate '{0}' not found in descriptor set")]
1860 FileNotFound(String),
1861 /// Unexpected descriptor state (e.g. a map entry or oneof that cannot be
1862 /// resolved to a known descriptor field).
1863 #[error("codegen error: {0}")]
1864 Other(String),
1865 /// A proto field name uses the `__buffa_` reserved prefix, which would
1866 /// conflict with buffa's internal generated fields.
1867 #[error(
1868 "reserved field name '{field_name}' in message '{message_name}': \
1869 proto field names starting with '__buffa_' conflict with buffa's \
1870 internal fields"
1871 )]
1872 ReservedFieldName {
1873 message_name: String,
1874 field_name: String,
1875 },
1876 /// Two sibling messages produce the same Rust module name after
1877 /// snake_case conversion (e.g., `HTTPRequest` and `HttpRequest` both
1878 /// become `pub mod http_request`).
1879 #[error(
1880 "module name conflict in '{scope}': messages '{name_a}' and '{name_b}' \
1881 both produce module '{module_name}'"
1882 )]
1883 ModuleNameConflict {
1884 scope: String,
1885 name_a: String,
1886 name_b: String,
1887 module_name: String,
1888 },
1889 /// A proto package segment, message name, or file-level enum name
1890 /// would emit a Rust item matching the reserved sentinel `__buffa`.
1891 ///
1892 /// This is the only name buffa reserves in user namespace. Resolve by
1893 /// renaming the proto element.
1894 #[error(
1895 "reserved name '{name}' at {location}: this name is reserved for \
1896 buffa's generated ancillary types (views, oneof enums, \
1897 extensions). Rename the proto element."
1898 )]
1899 ReservedModuleName { name: String, location: String },
1900 /// The input contains a message with `option message_set_wire_format = true`
1901 /// but [`CodeGenConfig::allow_message_set`] was not set.
1902 #[error(
1903 "message '{message_name}' uses `option message_set_wire_format = true` \
1904 but CodeGenConfig::allow_message_set is false; MessageSet is a legacy \
1905 wire format — set allow_message_set(true) if this is intentional"
1906 )]
1907 MessageSetNotSupported { message_name: String },
1908 /// A custom attribute string configured via [`CodeGenConfig::type_attributes`],
1909 /// [`CodeGenConfig::field_attributes`], or [`CodeGenConfig::message_attributes`]
1910 /// could not be parsed as a Rust attribute.
1911 #[error(
1912 "invalid custom attribute for path '{path}': '{attribute}' is not a valid \
1913 Rust attribute ({detail})"
1914 )]
1915 InvalidCustomAttribute {
1916 path: String,
1917 attribute: String,
1918 detail: String,
1919 },
1920}
1921
1922#[cfg(test)]
1923mod tests;