buffa_codegen/lib.rs
1//! Shared code generation logic for buffa.
2//!
3//! This crate takes protobuf descriptors (`google.protobuf.FileDescriptorProto`,
4//! decoded from binary `FileDescriptorSet` data) and emits Rust source code
5//! that uses the `buffa` runtime.
6//!
7//! It is used by:
8//! - `protoc-gen-buffa` (protoc plugin)
9//! - `buffa-build` (build.rs integration)
10//!
11//! # Architecture
12//!
13//! The code generator is intentionally decoupled from how descriptors are
14//! obtained. It receives fully-resolved `FileDescriptorProto`s and produces
15//! Rust source strings. This means:
16//!
17//! - It doesn't parse `.proto` files.
18//! - It doesn't invoke `protoc`.
19//! - It doesn't do import resolution or name linking.
20//!
21//! All of that is handled upstream (by protoc, buf, or a future parser).
22
23pub(crate) mod comments;
24pub mod context;
25pub(crate) mod defaults;
26pub(crate) mod enumeration;
27pub(crate) mod extension;
28pub(crate) mod feature_gates;
29pub(crate) mod features;
30#[doc(hidden)]
31pub use buffa_descriptor::generated;
32pub mod idents;
33pub(crate) mod impl_message;
34pub(crate) mod impl_text;
35pub(crate) mod imports;
36pub(crate) mod message;
37pub(crate) mod oneof;
38pub(crate) mod owned_view;
39pub(crate) mod reflect;
40pub(crate) mod reflect_owned;
41pub(crate) mod reflect_view;
42pub(crate) mod view;
43
44use crate::generated::descriptor::FileDescriptorProto;
45use proc_macro2::TokenStream;
46use quote::{format_ident, quote};
47
48/// Lints suppressed on generated code at module boundaries.
49///
50/// Consumed by [`generate_module_tree`], the per-package `.mod.rs`
51/// stitcher, and `buffa-build`'s `_include.rs` writer. One list keeps
52/// them in sync.
53pub const ALLOW_LINTS: &[&str] = &[
54 "non_camel_case_types",
55 "dead_code",
56 "unused_imports",
57 // Cross-proto refs within the same package are emitted through the
58 // canonical `super::super::__buffa::view::…` path even though the
59 // target lives in the same generated module — using the bare name
60 // would resolve, but the canonical path is stable when a sibling
61 // proto defines a same-named natural-path re-export.
62 "unused_qualifications",
63 "clippy::derivable_impls",
64 "clippy::match_single_binding",
65 "clippy::uninlined_format_args",
66 "clippy::doc_lazy_continuation",
67 // A user `message View { message Inner }` produces
68 // `__buffa::view::view::InnerView`; harmless but trips this lint.
69 "clippy::module_inception",
70];
71
72/// Render [`ALLOW_LINTS`] as a `#[allow(…)]` attribute token stream.
73pub fn allow_lints_attr() -> TokenStream {
74 let lints: Vec<TokenStream> = ALLOW_LINTS
75 .iter()
76 .map(|l| syn::parse_str(l).expect("lint name parses as path"))
77 .collect();
78 quote! { #[allow( #(#lints),* )] }
79}
80
81/// One generated output file.
82///
83/// Each `.proto` produces up to five **content files** (`<stem>.rs`,
84/// `<stem>.__view.rs`, `<stem>.__oneof.rs`, `<stem>.__view_oneof.rs`,
85/// `<stem>.__ext.rs`) and each proto package produces one
86/// `<dotted.pkg>.mod.rs` **stitcher** that `include!`s the content files
87/// and authors the `pub mod __buffa { … }` ancillary tree.
88/// Ancillary kinds with no content for that input file (e.g. a message
89/// with no oneofs and no extensions) are omitted, and the stitcher's
90/// `include!` set is filtered to match. The `__buffa` wrapper (and each
91/// `view` / `oneof` / `ext` submodule inside it) is itself omitted when
92/// it would be empty, so packages with only owned messages emit no
93/// `__buffa` block at all.
94/// See `DESIGN.md` → "Generated code layout".
95///
96/// Consumers normally only need to wire up the
97/// [`GeneratedFileKind::PackageMod`] entries (one per package); the
98/// per-proto content kinds are reached transitively via `include!` from
99/// the stitcher. Write all files to disk; build a module tree from only
100/// the `PackageMod` ones.
101///
102/// With [`CodeGenConfig::file_per_package`] set, the per-proto content
103/// kinds are not emitted at all — the single `<dotted.pkg>.rs` (still
104/// kind `PackageMod`) inlines what the stitcher would `include!`.
105#[derive(Debug)]
106pub struct GeneratedFile {
107 /// The output file path (e.g., `"my.pkg.foo.rs"` or `"my.pkg.mod.rs"`).
108 pub name: String,
109 /// The proto package this file belongs to.
110 pub package: String,
111 /// What this file contains. Build integrations only need to wire up
112 /// [`GeneratedFileKind::PackageMod`] files; everything else is reached
113 /// via `include!` from there.
114 pub kind: GeneratedFileKind,
115 /// The generated Rust source code.
116 pub content: String,
117}
118
119/// Kind of [`GeneratedFile`].
120///
121/// [`generate`] produces up to five per-proto content kinds — one each
122/// of [`Owned`](Self::Owned), [`View`](Self::View), [`Oneof`](Self::Oneof),
123/// [`ViewOneof`](Self::ViewOneof), and [`Ext`](Self::Ext) per input
124/// `.proto` file — plus one [`PackageMod`](Self::PackageMod) stitcher per
125/// package. Kinds with no content for the input (a proto with no oneofs
126/// emits no [`Oneof`](Self::Oneof) / [`ViewOneof`](Self::ViewOneof);
127/// no extensions, no [`Ext`](Self::Ext); etc.) are omitted. Build
128/// integrations only need to wire up `PackageMod` entries; the per-proto
129/// content kinds are reached via `include!` from the stitcher and need
130/// only be written to disk alongside it. Under
131/// [`CodeGenConfig::file_per_package`] only `PackageMod` is emitted.
132///
133/// [`Companion`](Self::Companion) is the one kind *not* produced by
134/// [`generate`]: downstream code generators construct `Companion` files
135/// themselves and merge them into buffa's output via
136/// [`apply_companions`].
137///
138/// This enum is `#[non_exhaustive]` — match with a wildcard arm so new
139/// kinds can be added without a major version bump.
140#[derive(Debug, Clone, Copy, PartialEq, Eq)]
141#[non_exhaustive]
142pub enum GeneratedFileKind {
143 /// Owned message structs and enums (`<stem>.rs`).
144 Owned,
145 /// View structs (`<stem>.__view.rs`).
146 View,
147 /// Owned oneof enums (`<stem>.__oneof.rs`).
148 Oneof,
149 /// View oneof enums (`<stem>.__view_oneof.rs`).
150 ViewOneof,
151 /// File-level proto-extension consts (`<stem>.__ext.rs`) — the
152 /// `pub const` `ExtensionDescriptor` items generated from `extend`
153 /// blocks. Not to be confused with [`Companion`](Self::Companion),
154 /// which is unrelated downstream-supplied content.
155 Ext,
156 /// Per-package stitcher (`<dotted.pkg>.mod.rs`). The only file build
157 /// systems need to wire up directly.
158 PackageMod,
159 /// Extra per-proto content from a downstream code generator (service
160 /// stubs, extra trait impls, etc.) that travels with buffa's output.
161 ///
162 /// Not produced by [`generate`]. Construct these in your own generator
163 /// and pass them to [`apply_companions`], which appends an `include!`
164 /// for each one at file scope in the matching package's
165 /// [`PackageMod`](Self::PackageMod) — after buffa's own output, at
166 /// package root alongside the owned message types (**not** under the
167 /// `__buffa::` sentinel module). Items declared `pub` in a companion
168 /// file are visible at `crate::<pkg>::*`.
169 ///
170 /// Not to be confused with [`Ext`](Self::Ext), which is the buffa-
171 /// generated file holding protobuf `extend` consts.
172 Companion,
173}
174
175/// The Rust type a proto `string` field maps to in generated owned structs.
176///
177/// The default is [`String`](StringRepr::String). The other variants are
178/// small-string-optimized types that avoid `String`'s growable buffer for
179/// read-mostly schemas; each is gated behind the matching `buffa` Cargo feature
180/// (`smol_str`, `ecow`, `compact_str`), and the downstream crate must enable
181/// that feature so the re-exported type path (`::buffa::smol_str::SmolStr`,
182/// etc.) resolves.
183///
184/// Select a representation through `buffa_build`'s `string_type` /
185/// `string_type_in` builder methods. The wire format is identical regardless of
186/// representation — only the in-memory owned type changes; view types keep
187/// borrowing `&str`, and `map<_, string>` / `map<string, _>` keys and values
188/// always stay `String`.
189///
190/// Sizes below are for 64-bit targets. See the buffa README for a fuller
191/// comparison of the small-string crates.
192#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
193#[non_exhaustive]
194pub enum StringRepr {
195 /// `::buffa::alloc::string::String` — 24-byte struct, growable and mutable
196 /// (the default).
197 #[default]
198 String,
199 /// `smol_str::SmolStr` — 24-byte struct, inlines up to 23 bytes, `O(1)`
200 /// clone of long strings via `Arc<str>`. **Immutable** (assign a new value
201 /// to mutate). Requires the `buffa/smol_str` feature.
202 SmolStr,
203 /// `ecow::EcoString` — 16-byte struct, inlines up to 15 bytes, clone-on-write
204 /// with `O(1)` clone. **Immutable** (assign a new value to mutate).
205 /// Requires the `buffa/ecow` feature.
206 EcoString,
207 /// `compact_str::CompactString` — 24-byte struct, inlines up to 24 bytes,
208 /// mutable (a drop-in `String` replacement). Requires the
209 /// `buffa/compact_str` feature.
210 CompactString,
211}
212
213impl StringRepr {
214 /// The owned Rust type path emitted for a `string` field with this
215 /// representation.
216 pub(crate) fn type_path(self, resolver: &imports::ImportResolver) -> proc_macro2::TokenStream {
217 use quote::quote;
218 match self {
219 StringRepr::String => resolver.string(),
220 StringRepr::SmolStr => quote! { ::buffa::smol_str::SmolStr },
221 StringRepr::EcoString => quote! { ::buffa::ecow::EcoString },
222 StringRepr::CompactString => quote! { ::buffa::compact_str::CompactString },
223 }
224 }
225
226 /// Whether this is the default `String` representation, which keeps the
227 /// `String`-specialized fast paths (in-place `merge_string`, `clear()`,
228 /// native `Arbitrary`) instead of the generic `ProtoString` ones.
229 pub(crate) fn is_default(self) -> bool {
230 matches!(self, StringRepr::String)
231 }
232}
233
234/// How much reflection support generated types get.
235///
236/// Selected through `buffa_build`'s `reflect_mode` builder method (or the
237/// `protoc-gen-buffa` `reflect_mode=` option). All modes need the consuming
238/// crate to depend on `buffa-descriptor` with its `reflect` feature and on
239/// `std`; the call site is `foo.reflect().get(fd)` regardless of mode.
240#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
241#[non_exhaustive]
242pub enum ReflectMode {
243 /// No reflection impls.
244 #[default]
245 Off,
246 /// `Reflectable::reflect()` round-trips the message through a
247 /// `DynamicMessage` (encode → decode → boxed handle). Smaller generated
248 /// code; pays an allocation and a re-encode per `reflect()` call.
249 Bridge,
250 /// `impl ReflectMessage` directly on the owned and view types, and
251 /// `Reflectable::reflect()` borrows `self` with no round-trip. Larger
252 /// generated code; near-free reflective access. Does not require view
253 /// generation — with views off, only the owned impls are emitted.
254 VTable,
255}
256
257impl ReflectMode {
258 /// Apply this mode to a [`CodeGenConfig`] (sets `generate_reflection` /
259 /// `generate_reflection_vtable`). Used by the `buffa-build` and
260 /// `protoc-gen-buffa` front-ends.
261 pub fn apply(self, config: &mut CodeGenConfig) {
262 let (reflection, vtable) = match self {
263 ReflectMode::Off => (false, false),
264 ReflectMode::Bridge => (true, false),
265 ReflectMode::VTable => (true, true),
266 };
267 config.generate_reflection = reflection;
268 config.generate_reflection_vtable = vtable;
269 }
270}
271
272/// Configuration for code generation.
273#[derive(Debug, Clone)]
274#[non_exhaustive]
275pub struct CodeGenConfig {
276 /// Whether to generate borrowed view types (`MyMessageView<'a>`) in
277 /// addition to owned types.
278 pub generate_views: bool,
279 /// Whether to preserve unknown fields (default: true).
280 pub preserve_unknown_fields: bool,
281 /// Whether to derive `serde::Serialize` / `serde::Deserialize` on
282 /// generated message structs and enum types, and emit `#[serde(with = "...")]`
283 /// attributes for proto3 JSON's special scalar encodings (int64 as quoted
284 /// string, bytes as base64, etc.).
285 ///
286 /// When this is `true`, the downstream crate must depend on `serde` and
287 /// must enable the `buffa/json` feature for the runtime helpers.
288 ///
289 /// Oneof fields use `#[serde(flatten)]` with custom `Serialize` /
290 /// `Deserialize` impls so that each variant appears as a top-level
291 /// JSON field (proto3 JSON inline oneof encoding).
292 pub generate_json: bool,
293 /// Whether to emit `#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]`
294 /// on generated message structs and enum types.
295 ///
296 /// When this is `true`, the downstream crate must add `arbitrary` as an
297 /// optional dependency and enable the `buffa/arbitrary` feature. The
298 /// downstream crate's Cargo feature that gates `arbitrary` must be named
299 /// exactly `"arbitrary"` — the generated `cfg_attr` uses that literal
300 /// string and cannot be customized. This applies to both the struct-level
301 /// `derive(Arbitrary)` and the per-field `#[arbitrary(with = ...)]`
302 /// attributes emitted for `bytes_fields`-typed fields.
303 ///
304 /// For `bytes_fields`-typed fields, codegen emits `#[arbitrary(with = ...)]`
305 /// using helpers in `::buffa::__private` since `bytes::Bytes` has no
306 /// `Arbitrary` impl. Singular, optional, and repeated bytes fields are all
307 /// covered. Map values are always `Vec<u8>` regardless of `bytes_fields`
308 /// and require no special handling.
309 pub generate_arbitrary: bool,
310 /// External type path mappings.
311 ///
312 /// Each entry maps either a fully-qualified protobuf package prefix
313 /// (e.g., `".my.common"`) to a Rust module path (e.g.,
314 /// `"::common_protos"`), or a single type FQN (e.g.,
315 /// `".my.common.Shared"`) to a full Rust type path (e.g.,
316 /// `"::shared_types::Shared"`). Matched types reference the extern Rust
317 /// path instead of being generated, allowing shared proto packages to be
318 /// compiled once in a dedicated crate and referenced from others. An
319 /// exact type-FQN entry wins over a covering package prefix; otherwise
320 /// the longest matching prefix wins.
321 ///
322 /// Well-known types (`google.protobuf.*`) are automatically mapped to
323 /// `::buffa_types::google::protobuf::*` without needing an explicit
324 /// entry here. To override with a custom implementation, add an
325 /// `extern_path` for `.google.protobuf` pointing to your crate.
326 pub extern_paths: Vec<(String, String)>,
327 /// Fully-qualified proto field paths whose `bytes` fields should use
328 /// `bytes::Bytes` instead of `Vec<u8>`.
329 ///
330 /// Each entry is a proto path prefix (e.g., `".my.pkg.MyMessage.data"` for
331 /// a specific field, or `"."` for all bytes fields). The path is matched
332 /// as a prefix, so `"."` applies to every bytes field in every message.
333 pub bytes_fields: Vec<String>,
334 /// Ordered (proto-path-prefix, [`StringRepr`]) rules selecting the Rust type
335 /// for `string` fields. Later rules win, so a broad rule (e.g. `"."` →
336 /// `SmolStr`) can be refined by a more specific one
337 /// (`".my.pkg.Msg.field"` → `CompactString`). Fields matching no rule use
338 /// `String`. The path is matched with the same proto-segment-aware prefix
339 /// logic as [`bytes_fields`](Self::bytes_fields).
340 ///
341 /// Applies to singular, optional, and repeated `string` fields and oneof
342 /// `string` variants. Map keys and values always stay `String`, mirroring
343 /// the bytes path (where map values always stay `Vec<u8>`).
344 pub string_fields: Vec<(String, StringRepr)>,
345 /// Honor `features.utf8_validation = NONE` by emitting `Vec<u8>` / `&[u8]`
346 /// for such string fields instead of `String` / `&str`.
347 ///
348 /// When `false` (the default), buffa emits `String` for all string fields
349 /// and **validates UTF-8 on decode** — stricter than proto2 requires, but
350 /// ergonomic and safe.
351 ///
352 /// When `true`, string fields with `utf8_validation = NONE` (all proto2
353 /// strings by default, and editions fields that opt into `NONE`) become
354 /// `Vec<u8>` / `&[u8]`. Decode skips validation; the caller decides at the
355 /// call site whether to `std::str::from_utf8` (checked) or
356 /// `from_utf8_unchecked` (trusted-input fast path). This is the only
357 /// sound Rust mapping when strings may actually contain non-UTF-8 bytes.
358 ///
359 /// **This is a breaking change for proto2** — enable only for new code or
360 /// when profiling identifies UTF-8 validation as a bottleneck.
361 pub strict_utf8_mapping: bool,
362 /// Permit `option message_set_wire_format = true` on input messages.
363 ///
364 /// MessageSet is a legacy Google-internal wire format that wraps each
365 /// extension in a group structure instead of using regular field tags.
366 /// When `false` (the default), encountering such a message is a codegen
367 /// error — the flag exists to make MessageSet use explicit, since the
368 /// format is obsolete outside of interop with very old Google protos.
369 pub allow_message_set: bool,
370 /// Whether to emit `impl buffa::text::TextFormat` on generated message
371 /// structs for textproto (human-readable text format) encoding/decoding.
372 ///
373 /// When this is `true`, the downstream crate must enable the `buffa/text`
374 /// feature for the runtime encoder/decoder.
375 pub generate_text: bool,
376 /// Whether the per-package `.mod.rs` stitcher emits
377 /// `__buffa::register_types(&mut TypeRegistry)`.
378 ///
379 /// Default `true`. The fn aggregates `Any` type entries and extension
380 /// entries for every message in the package. Set to `false` for
381 /// crates that don't use extensions/`Any`, or that hand-roll
382 /// registration (e.g. `buffa-types`' `register_wkt_types`, which
383 /// knows the JSON-Any `is_wkt` special-casing the generic fn does
384 /// not). The per-message `__*_JSON_ANY` / `__*_TEXT_ANY` consts are
385 /// still emitted; only the aggregating fn is suppressed.
386 pub emit_register_fn: bool,
387 /// Emit one `<dotted.package>.rs` per proto package instead of the
388 /// per-proto-file content set plus `<pkg>.mod.rs` stitcher.
389 ///
390 /// The single file inlines what the stitcher would otherwise `include!`,
391 /// producing the same `__buffa::{view,oneof,ext,...}` module structure.
392 /// Intended for Buf Schema Registry generated SDKs, whose `lib.rs`
393 /// synthesis builds the module tree from `<dotted.package>.rs` filenames.
394 ///
395 /// Under `strategy: directory` this only sees one directory's files per
396 /// invocation, so the input module must be `PACKAGE_DIRECTORY_MATCH`-clean
397 /// (one package per directory) for the output to be complete. BSR-hosted
398 /// modules satisfy this by lint default. If a package spans multiple
399 /// directories, separate invocations each emit their own `<pkg>.rs` and
400 /// the last write wins — silent partial output, not a codegen error.
401 pub file_per_package: bool,
402 /// Custom attributes to inject on generated types (messages and enums).
403 ///
404 /// Each entry is `(proto_path, attribute)`. The `proto_path` is matched
405 /// as a prefix against the fully-qualified proto name: `"."` applies to
406 /// all types, `".my.pkg"` to types in that package, `".my.pkg.MyMessage"`
407 /// to a specific type. The `attribute` is a raw Rust attribute string
408 /// (e.g., `"#[derive(serde::Serialize)]"`).
409 pub type_attributes: Vec<(String, String)>,
410 /// Custom attributes to inject on generated struct fields.
411 ///
412 /// Each entry is `(proto_path, attribute)`. The `proto_path` is matched
413 /// as a prefix against the fully-qualified field path (e.g.,
414 /// `".my.pkg.MyMessage.my_field"`). `"."` applies to all fields.
415 pub field_attributes: Vec<(String, String)>,
416 /// Custom attributes to inject on generated message structs only (not enums).
417 ///
418 /// Same path-matching semantics as `type_attributes`, but only applied to
419 /// message structs, not enum types. Useful for struct-only attributes like
420 /// `#[serde(default)]`.
421 pub message_attributes: Vec<(String, String)>,
422 /// Custom attributes to inject on generated enum types only (not messages).
423 ///
424 /// Same path-matching semantics as `type_attributes`, but only applied to
425 /// enum types. Useful for enum-only attributes like
426 /// `#[derive(strum::EnumIter)]` when the user does not want to apply the
427 /// same attribute to every message in the matched scope.
428 pub enum_attributes: Vec<(String, String)>,
429 /// Wrap generated `impl`s in `#[cfg(feature = "...")]` instead of
430 /// emitting them unconditionally.
431 ///
432 /// When `true`, the impls controlled by [`generate_json`],
433 /// [`generate_views`], and [`generate_text`] are emitted wrapped in
434 /// `#[cfg(feature = "json" | "views" | "text")]` (or
435 /// `#[cfg_attr(feature = ..., ...)]` for derives and field attributes)
436 /// rather than unconditionally. The consuming crate must define matching
437 /// Cargo features that enable the corresponding runtime support, e.g.:
438 ///
439 /// ```toml
440 /// [features]
441 /// json = ["buffa/json", "dep:serde", "dep:serde_json"]
442 /// views = []
443 /// text = ["buffa/text"]
444 /// ```
445 ///
446 /// The [`generate_*`] flags still control *whether* an impl kind is
447 /// emitted at all — this flag only controls whether it is `cfg`-gated.
448 /// `generate_arbitrary` is always `cfg_attr`-gated on
449 /// `feature = "arbitrary"` regardless of this flag, because `arbitrary`
450 /// is an optional dependency by design.
451 ///
452 /// When [`generate_reflection`](Self::generate_reflection) is also on, the
453 /// reflection impls are gated on `feature = "reflect"` alongside
454 /// json/views/text. To gate *only* reflection without gating json/views/text,
455 /// use [`gate_reflect_on_crate_feature`](Self::gate_reflect_on_crate_feature)
456 /// instead.
457 ///
458 /// This is the mechanism that lets `buffa-descriptor` and `buffa-types`
459 /// ship every impl while keeping the codegen toolchain
460 /// (`buffa-codegen`/`buffa-build`/`protoc-gen-buffa`) lean: those crates
461 /// depend on `buffa-descriptor` with `default-features = false` and so
462 /// don't pull `serde`/`serde_json`/`base64`. Most consumers don't need
463 /// this — they decide at build-script time whether to generate JSON, and
464 /// if they say yes, they want `impl Serialize` to just exist.
465 ///
466 /// [`generate_json`]: Self::generate_json
467 /// [`generate_views`]: Self::generate_views
468 /// [`generate_text`]: Self::generate_text
469 /// [`generate_*`]: Self::generate_json
470 pub gate_impls_on_crate_features: bool,
471 /// Generate `with_*` builder-style setter methods for explicit-presence fields.
472 ///
473 /// Each explicit-presence scalar, bytes, or enum field gets a
474 /// `pub fn with_<name>(mut self, value: T) -> Self` method that wraps the
475 /// value in `Some` and returns `self`, enabling chained construction:
476 ///
477 /// ```ignore
478 /// let req = MyRequest::default()
479 /// .with_name("alice")
480 /// .with_timeout_ms(30_000);
481 /// ```
482 ///
483 /// **Fields that receive a setter:** proto3 `optional`, proto2 `optional`,
484 /// and editions fields with `field_presence = EXPLICIT`.
485 ///
486 /// **Fields that do not receive a setter:** message fields
487 /// (`MessageField<T>`), repeated fields, map fields, oneof variant fields,
488 /// proto2 `required` fields, and any implicit-presence field.
489 ///
490 /// There is no `clear_<name>` companion — to clear a field, assign `None`
491 /// directly: `msg.name = None;`.
492 ///
493 /// Defaults to `true`.
494 pub generate_with_setters: bool,
495 /// Generate `impl Reflectable` for owned message types (bridge mode).
496 ///
497 /// When enabled, each generated message gets an
498 /// `impl ::buffa_descriptor::reflect::Reflectable` whose `reflect()`
499 /// round-trips through `DynamicMessage` (encode → decode → reflective
500 /// handle), and the package's `__buffa::reflect` submodule embeds the
501 /// `FileDescriptorSet` bytes plus a lazily-built `DescriptorPool`.
502 ///
503 /// **Runtime requirements** — the consuming crate must depend on:
504 /// - `buffa-descriptor` with the `reflect` feature.
505 /// - `std` (the lazy pool accessor uses `std::sync::OnceLock`).
506 ///
507 /// When [`gate_impls_on_crate_features`](Self::gate_impls_on_crate_features)
508 /// is on, the impls are wrapped in `#[cfg(feature = "reflect")]` so the
509 /// consuming crate can opt out per build.
510 ///
511 /// **Performance** — `reflect()` is one full encode/decode round-trip
512 /// plus a heap allocation. The first call also pays a one-time pool
513 /// build cost (linking the embedded `FileDescriptorSet`). For zero-copy
514 /// reflective access over view types without the round-trip, additionally
515 /// enable [`generate_reflection_vtable`](Self::generate_reflection_vtable).
516 ///
517 /// **Binary size** — each package embeds its own copy of the full
518 /// `FileDescriptorSet` (transitive closure). For a multi-package
519 /// codegen run this duplicates the FDS bytes per package. Acceptable
520 /// for the bridge prototype; deduplication via a crate-root module is
521 /// a planned follow-up.
522 ///
523 /// Defaults to `false`.
524 pub generate_reflection: bool,
525 /// Emit vtable-mode reflection: `impl ReflectMessage` / `impl
526 /// ReflectElement` on the owned message structs and (when views are
527 /// generated) the view types, and switch the owned
528 /// `Reflectable::reflect()` body to borrow `self`
529 /// (`ReflectCow::Borrowed(self)`) instead of the bridge round-trip.
530 ///
531 /// Reflective access then reads struct fields in place — no encode/decode
532 /// round-trip and no per-field allocation — for both a decoded view and an
533 /// in-memory owned message.
534 ///
535 /// Requires [`generate_reflection`](Self::generate_reflection) (the impls
536 /// resolve against the same embedded `DescriptorPool`) but not
537 /// [`generate_views`](Self::generate_views) — with views off, only the
538 /// owned impls are emitted. Set via [`ReflectMode::VTable`]
539 /// — front-ends expose it as `buffa_build::Config::reflect_mode` /
540 /// `protoc-gen-buffa`'s `reflect_mode=vtable`.
541 ///
542 /// Defaults to `false`.
543 pub generate_reflection_vtable: bool,
544 /// Gate the reflection impls behind a `reflect` crate feature, *without*
545 /// gating json/views/text (unlike
546 /// [`gate_impls_on_crate_features`](Self::gate_impls_on_crate_features),
547 /// which gates them all together).
548 ///
549 /// Used by crates that ship view/text impls unconditionally but want the
550 /// reflection surface — which pulls a `buffa-descriptor` dependency and
551 /// `std` — to be opt-in. `buffa-types` is the motivating case: its WKT
552 /// views are always available, but `impl ReflectMessage` for them is gated
553 /// behind `buffa-types`'s `reflect` feature.
554 ///
555 /// When [`gate_impls_on_crate_features`](Self::gate_impls_on_crate_features)
556 /// is already on, reflection is gated regardless and this flag is ignored.
557 ///
558 /// A low-level knob for crates whose generated code is a public interface
559 /// (`buffa-types`, the conformance harness). Set directly by `gen_wkt_types`
560 /// and exposed through `buffa_build::Config::gate_reflect_on_crate_feature`
561 /// (currently `#[doc(hidden)]`, paired with the experimental vtable flag).
562 ///
563 /// Defaults to `false`.
564 pub gate_reflect_on_crate_feature: bool,
565 /// Emit idiomatic `UpperCamelCase` constant aliases alongside each enum
566 /// variant.
567 ///
568 /// Protobuf style names enum values in `SHOUTY_SNAKE_CASE`, conventionally
569 /// prefixed with the enum name (`RULE_LEVEL_HIGH`). Those names remain the
570 /// definitive Rust variants — they are guaranteed unique and valid by
571 /// protobuf, and existing references (including `Debug` output) are
572 /// unchanged. When this is enabled, codegen additionally emits associated
573 /// `const`s with the prefix stripped and the name converted to
574 /// `UpperCamelCase` (`RULE_LEVEL_HIGH` → `High`), so downstream code can
575 /// write `RuleLevel::High`.
576 ///
577 /// The conversion is lossy, so two values can collide (`FOO_BAR` and
578 /// `FOO__BAR` both map to `FooBar`). The rule is all-or-nothing per enum:
579 /// if any two values would collide after conversion, or a value would yield
580 /// an invalid identifier, **no** aliases are emitted for that enum (a
581 /// [`CodeGenWarning`] and an enum doc note explain why). This keeps every
582 /// match either fully `SHOUTY_SNAKE_CASE` or fully idiomatic, never a forced
583 /// mix.
584 ///
585 /// The aliases are associated `const`s, which work in pattern position too:
586 /// a `match` written entirely against aliases is still exhaustiveness-checked
587 /// (the "non-exhaustive" error names the underlying `SHOUTY_SNAKE_CASE`
588 /// variant, since that is the canonical name).
589 ///
590 /// Defaults to `true`: the aliases are purely additive (the proto names
591 /// remain the variants, and `Debug` is unchanged), so enabling by default is
592 /// backward-compatible, and the all-or-nothing rule guarantees correctness on
593 /// any enum.
594 pub idiomatic_enum_aliases: bool,
595}
596
597impl Default for CodeGenConfig {
598 fn default() -> Self {
599 Self {
600 generate_views: true,
601 preserve_unknown_fields: true,
602 generate_json: false,
603 generate_arbitrary: false,
604 extern_paths: Vec::new(),
605 bytes_fields: Vec::new(),
606 string_fields: Vec::new(),
607 strict_utf8_mapping: false,
608 allow_message_set: false,
609 generate_text: false,
610 emit_register_fn: true,
611 file_per_package: false,
612 type_attributes: Vec::new(),
613 field_attributes: Vec::new(),
614 message_attributes: Vec::new(),
615 enum_attributes: Vec::new(),
616 gate_impls_on_crate_features: false,
617 generate_with_setters: true,
618 generate_reflection: false,
619 generate_reflection_vtable: false,
620 gate_reflect_on_crate_feature: false,
621 idiomatic_enum_aliases: true,
622 }
623 }
624}
625
626impl CodeGenConfig {
627 /// Active [`feature_gates::FeatureGates`] for this config.
628 ///
629 /// Recomputed on each call (cheap — three boolean ANDs); call once at
630 /// the top of a generation function and thread through, or call inline
631 /// at each use site, whichever reads better.
632 pub(crate) fn feature_gates(&self) -> feature_gates::FeatureGates {
633 feature_gates::FeatureGates::for_config(self)
634 }
635}
636
637/// Compute the effective extern path list by starting with user-provided
638/// mappings and adding the default WKT mapping if appropriate.
639///
640/// The default mapping `".google.protobuf" → "::buffa_types::google::protobuf"`
641/// is added unless:
642/// - The user already provided an extern_path covering `.google.protobuf`
643/// - Any of the files being generated are in the `google.protobuf` package
644/// (i.e., we're building `buffa-types` itself)
645pub(crate) fn effective_extern_paths(
646 file_descriptors: &[FileDescriptorProto],
647 files_to_generate: &[String],
648 config: &CodeGenConfig,
649) -> Vec<(String, String)> {
650 let mut paths = config.extern_paths.clone();
651
652 // Only an EXACT .google.protobuf mapping suppresses auto-injection.
653 // A sub-package mapping like .google.protobuf.compiler does NOT cover
654 // WKTs like Timestamp — resolve_extern_prefix's longest-prefix matching
655 // lets both coexist, so we still inject the parent mapping.
656 let has_wkt_mapping = paths.iter().any(|(proto, _)| proto == ".google.protobuf");
657
658 if !has_wkt_mapping {
659 // Check if we're generating google.protobuf files ourselves
660 // (e.g., building buffa-types). If so, don't auto-map.
661 let generating_wkts = file_descriptors
662 .iter()
663 .filter(|fd| {
664 fd.name
665 .as_deref()
666 .is_some_and(|n| files_to_generate.iter().any(|f| f == n))
667 })
668 .any(|fd| fd.package.as_deref() == Some("google.protobuf"));
669
670 if !generating_wkts {
671 paths.push((
672 ".google.protobuf".to_string(),
673 "::buffa_types::google::protobuf".to_string(),
674 ));
675 }
676 }
677
678 paths
679}
680
681/// Compute the effective file-level extern path list.
682///
683/// File-level mappings route a specific `.proto` file to a Rust module root,
684/// taking priority over the package-level mappings from
685/// [`effective_extern_paths`]. They exist to resolve a structural problem:
686/// `descriptor.proto` is in the same `google.protobuf` package as the
687/// JSON-mappable WKTs (`Timestamp`, `Any`, …), but its types live in
688/// `buffa-descriptor`, not `buffa-types`. A single package-keyed
689/// `.google.protobuf` extern_path can route the package to one crate or the
690/// other; it can't split it. The file-level mapping splits it.
691///
692/// Auto-injected mappings (when not suppressed):
693///
694/// | Proto file | Rust module |
695/// |---|---|
696/// | `google/protobuf/descriptor.proto` | `::buffa_descriptor::generated::descriptor` |
697/// | `google/protobuf/compiler/plugin.proto` | `::buffa_descriptor::generated::compiler` |
698///
699/// Suppression conditions, evaluated **per file**:
700///
701/// - **A user-provided `extern_path` covers the file's package.** That
702/// override has covered the file's types since the package mapping was
703/// introduced; auto-injecting a higher-priority file-level mapping would
704/// silently redirect them away from the user's crate. Matching is via
705/// the same longest-prefix logic the package resolver uses, so both an
706/// exact `.google.protobuf` mapping and a sub-package
707/// `.google.protobuf.compiler` mapping suppress the entries they cover —
708/// `.google.protobuf` suppresses both, `.google.protobuf.compiler`
709/// suppresses only `plugin.proto`.
710/// - **The proto file itself is in `files_to_generate`.** When building
711/// `buffa-descriptor` (or any local copy of `descriptor.proto`), its types
712/// must resolve to the local module, not externally.
713///
714/// Currently internal-only — there is no `CodeGenConfig` field for
715/// user-provided *file-level* mappings. The user-facing `extern_path` API is
716/// keyed by proto package *or* type FQN (per-type overrides, issue #111);
717/// per-file overrides may be added later as a public feature if a concrete
718/// need arises.
719pub(crate) fn effective_file_extern_paths(
720 files_to_generate: &[String],
721 config: &CodeGenConfig,
722) -> Vec<(String, String)> {
723 // (proto file path, proto package, Rust module root). The package is
724 // recorded alongside the file so the user-override suppression check
725 // is per-file: a `.google.protobuf.compiler` extern_path covers only
726 // `plugin.proto`, while `.google.protobuf` covers both.
727 const DESCRIPTOR_FILES: [(&str, &str, &str); 2] = [
728 (
729 "google/protobuf/descriptor.proto",
730 "google.protobuf",
731 "::buffa_descriptor::generated::descriptor",
732 ),
733 (
734 "google/protobuf/compiler/plugin.proto",
735 "google.protobuf.compiler",
736 "::buffa_descriptor::generated::compiler",
737 ),
738 ];
739
740 DESCRIPTOR_FILES
741 .into_iter()
742 .filter(|(proto_file, package, _)| {
743 // Yield to a user package-level extern_path that already covers
744 // this file's package: anyone who wrote
745 // `extern_path(".google.protobuf", "::my_crate")` (or a
746 // sub-package mapping) today routes these types to their crate;
747 // the auto-injected file-level mapping must not silently
748 // outrank it.
749 if context::resolve_extern_prefix(package, &config.extern_paths).is_some() {
750 return false;
751 }
752 // Don't externalize a file we're generating locally.
753 !files_to_generate.iter().any(|f| f == proto_file)
754 })
755 .map(|(proto_file, _, rust_module)| (proto_file.to_string(), rust_module.to_string()))
756 .collect()
757}
758
759/// One CamelCase collision: a target identifier and the proto value names that
760/// would all convert onto it.
761///
762/// Part of [`CodeGenWarning::IdiomaticAliasesSuppressed`].
763#[derive(Debug, Clone, PartialEq, Eq)]
764#[non_exhaustive]
765pub struct AliasConflict {
766 /// The `UpperCamelCase` identifier the colliding values map to.
767 pub camel_target: String,
768 /// The proto value names that convert onto `camel_target` (includes a
769 /// literal variant name when an alias would shadow it).
770 pub proto_values: Vec<String>,
771}
772
773/// A non-fatal diagnostic produced during code generation.
774///
775/// Returned by [`generate_with_diagnostics`]. Render the human-readable form via
776/// the [`Display`](core::fmt::Display) impl (e.g. `cargo:warning={warning}`), or
777/// match on the variant for programmatic handling. The enum and its variants are
778/// `#[non_exhaustive]` so new diagnostic kinds and fields can be added without a
779/// breaking change.
780#[derive(Debug, Clone, PartialEq, Eq)]
781#[non_exhaustive]
782pub enum CodeGenWarning {
783 /// Idiomatic CamelCase aliases were suppressed for an enum because two or
784 /// more proto values collide after conversion, or a value would convert to
785 /// an invalid identifier. The enum's `SHOUTY_SNAKE_CASE` variants are
786 /// unaffected.
787 #[non_exhaustive]
788 IdiomaticAliasesSuppressed {
789 /// The Rust name of the affected enum.
790 enum_name: String,
791 /// Each collision, by target identifier. Empty if the only problem was
792 /// invalid identifiers.
793 conflicts: Vec<AliasConflict>,
794 /// Proto values that would convert to an invalid Rust identifier.
795 invalid: Vec<String>,
796 },
797 /// A field or oneof accessor on a generated `FooOwnedView` wrapper was
798 /// suppressed because the proto name collides with one of the wrapper's
799 /// reserved method names (`decode`, `view`, `bytes`, …). The field stays
800 /// fully accessible through `view()` on the wrapper (or
801 /// `OwnedView::reborrow`).
802 #[non_exhaustive]
803 OwnedViewAccessorSuppressed {
804 /// The Rust name of the wrapper type (e.g. `FooOwnedView`).
805 wrapper_name: String,
806 /// The proto field or oneof name whose accessor was suppressed.
807 field_name: String,
808 },
809}
810
811impl core::fmt::Display for CodeGenWarning {
812 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
813 match self {
814 Self::IdiomaticAliasesSuppressed {
815 enum_name,
816 conflicts,
817 invalid,
818 } => {
819 // Name the cause accurately: a collision, an invalid identifier,
820 // or both.
821 let cause = match (conflicts.is_empty(), invalid.is_empty()) {
822 (false, true) => "naming conflict",
823 (true, false) => "invalid identifier",
824 _ => "naming conflict / invalid identifier",
825 };
826 write!(
827 f,
828 "enum `{enum_name}`: idiomatic CamelCase aliases suppressed ({cause})"
829 )?;
830 let mut parts: Vec<String> = conflicts
831 .iter()
832 .map(|c| format!("{} → {}", c.proto_values.join(", "), c.camel_target))
833 .collect();
834 parts.extend(invalid.iter().map(|n| format!("{n} → invalid identifier")));
835 if !parts.is_empty() {
836 write!(f, ": {}", parts.join("; "))?;
837 }
838 Ok(())
839 }
840 Self::OwnedViewAccessorSuppressed {
841 wrapper_name,
842 field_name,
843 } => {
844 write!(
845 f,
846 "`{wrapper_name}`: accessor for field `{field_name}` suppressed \
847 (collides with a reserved wrapper method); use `.view().{field_name}` instead"
848 )
849 }
850 }
851 }
852}
853
854/// Generate Rust source files from a set of file descriptors.
855///
856/// `files_to_generate` is the set of file names that were explicitly requested
857/// (matching `CodeGeneratorRequest.file_to_generate`). Descriptors for
858/// dependencies may be present in `file_descriptors` but won't produce output
859/// files unless they appear in `files_to_generate`.
860///
861/// Each `.proto` emits up to five content files (kinds with no content
862/// are omitted); each distinct package emits one `<pkg>.mod.rs`
863/// stitcher. Packages are processed in sorted order for deterministic
864/// output.
865///
866/// # Diagnostics
867///
868/// Non-fatal diagnostics produced during generation (e.g. an enum whose
869/// idiomatic CamelCase aliases were suppressed by a naming conflict) are
870/// **discarded** here. Use [`generate_with_diagnostics`] to receive them and
871/// surface them as build warnings.
872pub fn generate(
873 file_descriptors: &[FileDescriptorProto],
874 files_to_generate: &[String],
875 config: &CodeGenConfig,
876) -> Result<Vec<GeneratedFile>, CodeGenError> {
877 Ok(generate_with_diagnostics(file_descriptors, files_to_generate, config)?.0)
878}
879
880/// Like [`generate`], but also returns the non-fatal [`CodeGenWarning`]s
881/// collected during generation (e.g. enums whose idiomatic CamelCase aliases
882/// were suppressed by a naming conflict).
883///
884/// Surface each warning via its [`Display`](core::fmt::Display) impl — e.g. as a
885/// `cargo:warning=...` from a `build.rs`, or on stderr from a standalone
886/// generator — or match on it for programmatic handling. [`generate`] discards
887/// them, so existing callers are unaffected.
888///
889/// Warnings are returned only on success. On error, any warnings already
890/// collected are dropped along with the partial output — the [`CodeGenError`]
891/// is the actionable signal.
892///
893/// # Errors
894///
895/// Returns [`CodeGenError::FileNotFound`] if a name in `files_to_generate` has
896/// no matching descriptor, [`CodeGenError::Other`] if `generate_reflection_vtable`
897/// is set without `generate_reflection`, and other [`CodeGenError`] variants for
898/// malformed descriptors (e.g. a missing required field) encountered while
899/// generating.
900pub fn generate_with_diagnostics(
901 file_descriptors: &[FileDescriptorProto],
902 files_to_generate: &[String],
903 config: &CodeGenConfig,
904) -> Result<(Vec<GeneratedFile>, Vec<CodeGenWarning>), CodeGenError> {
905 // Vtable reflection resolves against the per-package descriptor pool, which
906 // is emitted by bridge-mode reflection — so it requires `generate_reflection`.
907 // It does NOT require views: the owned `impl ReflectMessage` is self-contained,
908 // so with views off, vtable mode still emits owned-message reflection (the
909 // view impls are simply skipped along with the views).
910 if config.generate_reflection_vtable && !config.generate_reflection {
911 return Err(CodeGenError::Other(
912 "generate_reflection_vtable requires generate_reflection to be enabled \
913 (it provides the descriptor pool the reflect impls resolve against)"
914 .into(),
915 ));
916 }
917
918 let ctx = context::CodeGenContext::for_generate(file_descriptors, files_to_generate, config);
919
920 // Group requested files by package. BTreeMap → deterministic output order.
921 let mut by_package: std::collections::BTreeMap<String, Vec<&FileDescriptorProto>> =
922 std::collections::BTreeMap::new();
923 for file_name in files_to_generate {
924 let file_desc = file_descriptors
925 .iter()
926 .find(|f| f.name.as_deref() == Some(file_name.as_str()))
927 .ok_or_else(|| CodeGenError::FileNotFound(file_name.clone()))?;
928 let pkg = file_desc.package.as_deref().unwrap_or("").to_string();
929 by_package.entry(pkg).or_default().push(file_desc);
930 }
931
932 // Reflection: serialize the FileDescriptorSet once, regardless of how
933 // many packages are in the request. Each package embeds its own copy of
934 // the bytes (binary-size dedup is a follow-up), but the build-time
935 // re-encoding cost shouldn't scale with the package count.
936 let fds_bytes = if config.generate_reflection {
937 reflect::encode_fds_once(file_descriptors)
938 } else {
939 Vec::new()
940 };
941
942 let mut output = Vec::new();
943 for (package, files) in by_package {
944 generate_package(&ctx, &package, &files, &fds_bytes, &mut output)?;
945 }
946
947 Ok((output, ctx.take_warnings()))
948}
949
950/// Generate a module tree that assembles per-package `.mod.rs` files into
951/// nested `pub mod` blocks matching the protobuf package hierarchy.
952///
953/// Each entry is a `(mod_file_name, package)` pair where `package` is the
954/// dot-separated protobuf package name (e.g., `"google.api"`) and
955/// `mod_file_name` is the corresponding `<pkg>.mod.rs` (only
956/// [`GeneratedFileKind::PackageMod`] outputs need wiring; per-proto
957/// content files are reached via `include!` from the stitcher).
958///
959/// `include_mode` controls how `include!` paths are emitted.
960///
961/// `emit_inner_allow` adds a `#![allow(...)]` inner attribute at the top —
962/// valid when the output is used directly as a module file (`mod.rs`),
963/// invalid when consumed via `include!`.
964pub fn generate_module_tree<F: AsRef<str>, P: AsRef<str>>(
965 entries: &[(F, P)],
966 include_mode: IncludeMode<'_>,
967 emit_inner_allow: bool,
968) -> String {
969 use std::collections::BTreeMap;
970 use std::fmt::Write;
971
972 use crate::idents::escape_mod_ident;
973
974 #[derive(Default)]
975 struct ModNode {
976 files: Vec<String>,
977 children: BTreeMap<String, Self>,
978 }
979
980 let mut root = ModNode::default();
981
982 for (file_name, package) in entries {
983 let package = package.as_ref();
984 let pkg_parts: Vec<&str> = if package.is_empty() {
985 vec![]
986 } else {
987 package.split('.').collect()
988 };
989
990 let mut node = &mut root;
991 for seg in &pkg_parts {
992 node = node.children.entry(seg.to_string()).or_default();
993 }
994 node.files.push(file_name.as_ref().to_string());
995 }
996
997 let lints = ALLOW_LINTS.join(", ");
998 let mut out = String::new();
999 let _ = writeln!(out, "// @generated by buffa-codegen. DO NOT EDIT.");
1000 if emit_inner_allow {
1001 let _ = writeln!(out, "#![allow({lints})]");
1002 }
1003 let _ = writeln!(out);
1004
1005 fn emit(out: &mut String, node: &ModNode, depth: usize, mode: IncludeMode<'_>, lints: &str) {
1006 let indent = " ".repeat(depth);
1007
1008 for file in &node.files {
1009 match mode {
1010 IncludeMode::Relative(prefix) => {
1011 let _ = writeln!(out, r#"{indent}include!("{prefix}{file}");"#);
1012 }
1013 IncludeMode::OutDir => {
1014 let _ = writeln!(
1015 out,
1016 r#"{indent}include!(concat!(env!("OUT_DIR"), "/{file}"));"#
1017 );
1018 }
1019 }
1020 }
1021
1022 for (name, child) in &node.children {
1023 let escaped = escape_mod_ident(name);
1024 let _ = writeln!(out, "{indent}#[allow({lints})]");
1025 let _ = writeln!(out, "{indent}pub mod {escaped} {{");
1026 let _ = writeln!(out, "{indent} use super::*;");
1027 emit(out, child, depth + 1, mode, lints);
1028 let _ = writeln!(out, "{indent}}}");
1029 }
1030 }
1031
1032 emit(&mut out, &root, 0, include_mode, &lints);
1033 out
1034}
1035
1036/// How [`generate_module_tree`] emits `include!` paths.
1037#[derive(Debug, Clone, Copy)]
1038pub enum IncludeMode<'a> {
1039 /// `include!("<prefix><file>")` — relative to the including file.
1040 /// Prefix is typically `""` or `"gen/"`.
1041 Relative(&'a str),
1042 /// `include!(concat!(env!("OUT_DIR"), "/<file>"))` — for build.rs output.
1043 OutDir,
1044}
1045
1046/// Validate one input descriptor before generating code for it.
1047///
1048/// Checks, in one walk of the message tree:
1049///
1050/// - **Reserved field names**: no field starts with `__buffa_` (would clash
1051/// with generated `__buffa_unknown_fields` / `__buffa_cached_size`).
1052/// - **Module-name conflicts**: no two sibling messages snake_case to the
1053/// same module name (e.g. `HTTPRequest` vs `HttpRequest`).
1054/// - **Reserved sentinel**: no package segment, message-module name, or
1055/// file-level enum name equals [`SENTINEL_MOD`](context::SENTINEL_MOD).
1056/// Ancillary types live under `pkg::__buffa::…`; a proto element
1057/// emitting an item named `__buffa` at package root would produce
1058/// E0428 against `pub mod __buffa`. This is the only name buffa
1059/// reserves in user namespace.
1060fn validate_file(file: &FileDescriptorProto) -> Result<(), CodeGenError> {
1061 use std::collections::HashMap;
1062
1063 let sentinel = context::SENTINEL_MOD;
1064 let package = file.package.as_deref().unwrap_or("");
1065 if package.split('.').any(|seg| seg == sentinel) {
1066 return Err(CodeGenError::ReservedModuleName {
1067 name: sentinel.to_string(),
1068 location: format!("package '{package}'"),
1069 });
1070 }
1071 // File-level enums emit `pub enum <name>` at package root with the
1072 // proto name preserved verbatim (no PascalCase normalization), so a
1073 // proto `enum __buffa` would land beside `pub mod __buffa`. Nested
1074 // enums live inside their owner message's module and cannot collide
1075 // with the package-root sentinel, so only file-level is checked.
1076 for enum_type in &file.enum_type {
1077 let name = enum_type.name.as_deref().unwrap_or("");
1078 if name == sentinel {
1079 return Err(CodeGenError::ReservedModuleName {
1080 name: sentinel.to_string(),
1081 location: format!("enum '{package}.{name}'"),
1082 });
1083 }
1084 }
1085
1086 fn walk(
1087 messages: &[crate::generated::descriptor::DescriptorProto],
1088 scope: &str,
1089 sentinel: &str,
1090 ) -> Result<(), CodeGenError> {
1091 // snake_case module name → original proto name (for conflict diag).
1092 let mut seen: HashMap<String, &str> = HashMap::new();
1093
1094 for msg in messages {
1095 let name = msg.name.as_deref().unwrap_or("");
1096 let fqn = if scope.is_empty() {
1097 name.to_string()
1098 } else {
1099 format!("{scope}.{name}")
1100 };
1101
1102 for field in &msg.field {
1103 if let Some(fname) = &field.name {
1104 if fname.starts_with("__buffa_") {
1105 return Err(CodeGenError::ReservedFieldName {
1106 message_name: fqn,
1107 field_name: fname.clone(),
1108 });
1109 }
1110 }
1111 }
1112
1113 let module_name = crate::oneof::to_snake_case(name);
1114 if module_name == sentinel {
1115 return Err(CodeGenError::ReservedModuleName {
1116 name: sentinel.to_string(),
1117 location: format!("message '{fqn}'"),
1118 });
1119 }
1120 if let Some(existing) = seen.get(&module_name) {
1121 return Err(CodeGenError::ModuleNameConflict {
1122 scope: scope.to_string(),
1123 name_a: existing.to_string(),
1124 name_b: name.to_string(),
1125 module_name,
1126 });
1127 }
1128 seen.insert(module_name, name);
1129
1130 walk(&msg.nested_type, &fqn, sentinel)?;
1131 }
1132 Ok(())
1133 }
1134
1135 walk(&file.message_type, package, sentinel)
1136}
1137
1138/// Per-proto content streams plus the file stem, ready to be formatted.
1139struct ProtoContent {
1140 stem: String,
1141 owned: TokenStream,
1142 view: TokenStream,
1143 oneof: TokenStream,
1144 view_oneof: TokenStream,
1145 ext: TokenStream,
1146 /// Candidate `pub use` re-exports targeting the package root (top-level
1147 /// view structs, file-level extension consts). Filtered against the
1148 /// package-wide root namespace in [`generate_package_mod`] — the package
1149 /// can span multiple `.proto` files, so collisions are only knowable at
1150 /// the stitcher level.
1151 root_reexports: Vec<message::ReexportCandidate>,
1152}
1153
1154/// Generate the per-`.proto` content token streams for one input file.
1155/// Each ancillary kind that has no content yields an empty stream and
1156/// is dropped at the file-emission stage.
1157fn generate_proto_content(
1158 ctx: &context::CodeGenContext,
1159 current_package: &str,
1160 file: &FileDescriptorProto,
1161 reg: &mut message::RegistryPaths,
1162) -> Result<ProtoContent, CodeGenError> {
1163 use crate::idents::make_field_ident;
1164 use crate::message::MessageOutput;
1165
1166 validate_file(file)?;
1167
1168 let resolver = imports::ImportResolver::new();
1169 let features = crate::features::for_file(file);
1170
1171 let mut owned = TokenStream::new();
1172 let mut view = TokenStream::new();
1173 let mut oneof = TokenStream::new();
1174 let mut view_oneof = TokenStream::new();
1175 let mut ext = TokenStream::new();
1176 let mut root_reexports: Vec<message::ReexportCandidate> = Vec::new();
1177 let sentinel = make_field_ident(context::SENTINEL_MOD);
1178
1179 for enum_type in &file.enum_type {
1180 let enum_rust_name = enum_type.name.as_deref().unwrap_or("");
1181 let enum_fqn = if current_package.is_empty() {
1182 enum_rust_name.to_string()
1183 } else {
1184 format!("{}.{}", current_package, enum_rust_name)
1185 };
1186 owned.extend(enumeration::generate_enum(
1187 ctx,
1188 enum_type,
1189 enum_rust_name,
1190 &enum_fqn,
1191 &features,
1192 &resolver,
1193 )?);
1194 }
1195
1196 for message_type in &file.message_type {
1197 let top_level_name = message_type.name.as_deref().unwrap_or("");
1198 let proto_fqn = if current_package.is_empty() {
1199 top_level_name.to_string()
1200 } else {
1201 format!("{}.{}", current_package, top_level_name)
1202 };
1203 let MessageOutput {
1204 owned_top,
1205 owned_mod,
1206 oneof_tree: msg_oneof,
1207 view_tree: msg_view,
1208 view_oneof_tree: msg_view_oneof,
1209 reg: msg_reg,
1210 } = message::generate_message(
1211 ctx,
1212 message_type,
1213 current_package,
1214 top_level_name,
1215 &proto_fqn,
1216 &features,
1217 &resolver,
1218 )?;
1219 owned.extend(owned_top);
1220 let mod_name = ctx.nested_module_name(current_package, top_level_name);
1221 let mod_ident = make_field_ident(&mod_name);
1222 // When the nested-types module was deconflicted from a sub-package
1223 // (issue #135), document why the name carries a trailing `_`.
1224 let mod_doc = if mod_name == crate::oneof::to_snake_case(top_level_name) {
1225 quote! {}
1226 } else {
1227 let doc = format!(
1228 "Nested items of `{top_level_name}`. The module name carries a \
1229 trailing `_` to avoid a collision with another module in this \
1230 scope (a sub-package or sibling message of the same name). See \
1231 buffa#135."
1232 );
1233 quote! { #[doc = #doc] }
1234 };
1235 for p in msg_reg.json_ext {
1236 reg.json_ext.push(quote! { #mod_ident :: #p });
1237 }
1238 for p in msg_reg.text_ext {
1239 reg.text_ext.push(quote! { #mod_ident :: #p });
1240 }
1241 reg.json_any.extend(msg_reg.json_any);
1242 reg.text_any.extend(msg_reg.text_any);
1243
1244 if !owned_mod.is_empty() {
1245 owned.extend(quote! {
1246 #mod_doc
1247 pub mod #mod_ident {
1248 #[allow(unused_imports)]
1249 use super::*;
1250 #owned_mod
1251 }
1252 });
1253 }
1254 oneof.extend(msg_oneof);
1255 view.extend(msg_view);
1256 view_oneof.extend(msg_view_oneof);
1257
1258 // Top-level message view → re-export at package root. The leading
1259 // `self::` is load-bearing: when consumers nest packages with
1260 // `pub mod a { use super::*; pub mod a_b { use super::*; … } }`
1261 // (`buffa-build`'s `_include.rs` does this), a parent package's
1262 // `__buffa` is in scope via the glob, and Rust's import-resolution
1263 // pass treats a glob-imported name as ambiguous against a
1264 // **macro-expanded** local one (the `pub mod __buffa` block arrives
1265 // via `include!()`), even though a non-macro local definition would
1266 // shadow the glob — see rustc E0659. `self::` resolves it
1267 // deterministically. `#[doc(inline)]` makes rustdoc render the type's
1268 // full page at the natural path instead of a "Re-export of …" stub.
1269 if ctx.config.generate_views {
1270 let view_ident = format_ident!("{top_level_name}View");
1271 root_reexports.push(message::ReexportCandidate {
1272 name: view_ident.to_string(),
1273 tokens: feature_gates::cfg_block(
1274 quote! {
1275 #[doc(inline)]
1276 pub use self :: #sentinel :: view :: #view_ident;
1277 },
1278 ctx.config.feature_gates().views,
1279 ),
1280 });
1281 // The owned-view wrapper gets the same natural-path treatment as
1282 // the view struct, so `pkg::FooOwnedView` works out of the box.
1283 let owned_view_ident = format_ident!("{top_level_name}OwnedView");
1284 root_reexports.push(message::ReexportCandidate {
1285 name: owned_view_ident.to_string(),
1286 tokens: feature_gates::cfg_block(
1287 quote! {
1288 #[doc(inline)]
1289 pub use self :: #sentinel :: view :: #owned_view_ident;
1290 },
1291 ctx.config.feature_gates().views,
1292 ),
1293 });
1294 }
1295 }
1296
1297 // File-level `extend` declarations → `__buffa::ext::` (depth 2).
1298 let (file_ext_tokens, file_ext_json, file_ext_text) = extension::generate_extensions(
1299 ctx,
1300 &file.extension,
1301 current_package,
1302 2,
1303 &features,
1304 current_package,
1305 )?;
1306 ext.extend(file_ext_tokens);
1307 for id in file_ext_json {
1308 reg.json_ext.push(quote! { #sentinel :: ext :: #id });
1309 }
1310 for id in file_ext_text {
1311 reg.text_ext.push(quote! { #sentinel :: ext :: #id });
1312 }
1313 // File-level extension consts → re-export at package root. `self::` and
1314 // `#[doc(inline)]` for the same reasons as the view re-exports above.
1315 for ext_field in &file.extension {
1316 let const_ident = extension::extension_const_ident(ext_field.name.as_deref().unwrap_or(""));
1317 root_reexports.push(message::ReexportCandidate {
1318 name: const_ident.to_string(),
1319 tokens: quote! {
1320 #[doc(inline)]
1321 pub use self :: #sentinel :: ext :: #const_ident;
1322 },
1323 });
1324 }
1325
1326 Ok(ProtoContent {
1327 stem: proto_path_to_stem(file.name.as_deref().unwrap_or("")),
1328 owned,
1329 view,
1330 oneof,
1331 view_oneof,
1332 ext,
1333 root_reexports,
1334 })
1335}
1336
1337/// Per-section token streams for one package, ready for the stitcher.
1338///
1339/// In per-file mode each section holds `include!("<stem>...rs")` calls; in
1340/// `file_per_package` mode each holds the actual generated items.
1341#[derive(Default)]
1342struct PackageSections {
1343 owned: Vec<TokenStream>,
1344 view: Vec<TokenStream>,
1345 oneof: Vec<TokenStream>,
1346 view_oneof: Vec<TokenStream>,
1347 ext: Vec<TokenStream>,
1348}
1349
1350impl PackageSections {
1351 /// Append one proto file's generated items in-line.
1352 ///
1353 /// Empty streams are skipped so each section's emptiness reflects
1354 /// "the package has no content of this kind" — symmetric with the
1355 /// per-file branch that filters at file-emission time.
1356 fn push_inline(&mut self, pc: ProtoContent) {
1357 let push_if_nonempty = |dst: &mut Vec<TokenStream>, ts: TokenStream| {
1358 if !ts.is_empty() {
1359 dst.push(ts);
1360 }
1361 };
1362 push_if_nonempty(&mut self.owned, pc.owned);
1363 push_if_nonempty(&mut self.view, pc.view);
1364 push_if_nonempty(&mut self.oneof, pc.oneof);
1365 push_if_nonempty(&mut self.view_oneof, pc.view_oneof);
1366 push_if_nonempty(&mut self.ext, pc.ext);
1367 }
1368}
1369
1370/// Generate all output files for one proto package: up to five content
1371/// files per `.proto` (empty ancillary kinds are skipped) plus one
1372/// `<pkg>.mod.rs` stitcher, or a single `<pkg>.rs` when
1373/// [`CodeGenConfig::file_per_package`] is set.
1374fn generate_package(
1375 ctx: &context::CodeGenContext,
1376 current_package: &str,
1377 files: &[&FileDescriptorProto],
1378 fds_bytes: &[u8],
1379 out: &mut Vec<GeneratedFile>,
1380) -> Result<(), CodeGenError> {
1381 // Registry paths are package-root-relative; `register_types` lives at
1382 // `__buffa::register_types` (one level deep), so each path gets a
1383 // single `super::` prefix when emitted into the fn body.
1384 let mut reg = message::RegistryPaths::default();
1385 let mut root_reexports: Vec<message::ReexportCandidate> = Vec::new();
1386
1387 let sections = if ctx.config.file_per_package {
1388 let mut sections = PackageSections::default();
1389 for file in files {
1390 let mut pc = generate_proto_content(ctx, current_package, file, &mut reg)?;
1391 root_reexports.append(&mut pc.root_reexports);
1392 sections.push_inline(pc);
1393 }
1394 sections
1395 } else {
1396 let mut sections = PackageSections::default();
1397 for file in files {
1398 let mut pc = generate_proto_content(ctx, current_package, file, &mut reg)?;
1399 root_reexports.append(&mut pc.root_reexports);
1400 let source = file.name.as_deref().unwrap_or("");
1401 let stem = pc.stem;
1402
1403 // Empty ancillary token streams are skipped — neither the
1404 // content file nor the stitcher's `include!` is emitted.
1405 let emit = |suffix: &str,
1406 kind: GeneratedFileKind,
1407 tokens: TokenStream,
1408 section: &mut Vec<TokenStream>,
1409 out: &mut Vec<GeneratedFile>|
1410 -> Result<(), CodeGenError> {
1411 if tokens.is_empty() {
1412 return Ok(());
1413 }
1414 let name = format!("{stem}{suffix}.rs");
1415 section.push(quote! { include!(#name); });
1416 out.push(GeneratedFile {
1417 name,
1418 package: current_package.to_string(),
1419 kind,
1420 content: format_tokens(tokens, source)?,
1421 });
1422 Ok(())
1423 };
1424 emit(
1425 "",
1426 GeneratedFileKind::Owned,
1427 pc.owned,
1428 &mut sections.owned,
1429 out,
1430 )?;
1431 emit(
1432 ".__view",
1433 GeneratedFileKind::View,
1434 pc.view,
1435 &mut sections.view,
1436 out,
1437 )?;
1438 emit(
1439 ".__oneof",
1440 GeneratedFileKind::Oneof,
1441 pc.oneof,
1442 &mut sections.oneof,
1443 out,
1444 )?;
1445 emit(
1446 ".__view_oneof",
1447 GeneratedFileKind::ViewOneof,
1448 pc.view_oneof,
1449 &mut sections.view_oneof,
1450 out,
1451 )?;
1452 emit(
1453 ".__ext",
1454 GeneratedFileKind::Ext,
1455 pc.ext,
1456 &mut sections.ext,
1457 out,
1458 )?;
1459 }
1460 sections
1461 };
1462
1463 let reexport_block = surviving_root_reexports(ctx, files, ®, root_reexports);
1464
1465 out.push(GeneratedFile {
1466 name: if ctx.config.file_per_package {
1467 package_to_filename(current_package)
1468 } else {
1469 package_to_mod_filename(current_package)
1470 },
1471 package: current_package.to_string(),
1472 kind: GeneratedFileKind::PackageMod,
1473 content: generate_package_mod(ctx, §ions, ®, &reexport_block, fds_bytes)?,
1474 });
1475
1476 Ok(())
1477}
1478
1479/// Filter the candidate package-root re-exports against the package's
1480/// existing root namespace and against each other, returning the surviving
1481/// `pub use` lines.
1482///
1483/// The package root is shared across every `.proto` file in the package, so
1484/// the occupied-name set must be built from *all* of them — a top-level
1485/// message named `FooView` declared in `a.proto` would shadow `Foo`'s view
1486/// re-export from `b.proto`.
1487fn surviving_root_reexports(
1488 ctx: &context::CodeGenContext,
1489 files: &[&FileDescriptorProto],
1490 reg: &message::RegistryPaths,
1491 mut candidates: Vec<message::ReexportCandidate>,
1492) -> TokenStream {
1493 use crate::idents::make_field_ident;
1494 use std::collections::BTreeSet;
1495
1496 // Names already occupied at package root by real items: top-level
1497 // messages, enums, message nested-types modules (deconflicted name, #135),
1498 // and the `__buffa` sentinel itself. File-level extension consts live in
1499 // `__buffa::ext::`, not at the root, so they are *candidates* (added
1500 // by `generate_proto_content`) rather than occupants.
1501 let mut occupied: BTreeSet<String> = BTreeSet::new();
1502 occupied.insert(context::SENTINEL_MOD.to_string());
1503 for file in files {
1504 let package = file.package.as_deref().unwrap_or("");
1505 for m in &file.message_type {
1506 let name = m.name.as_deref().unwrap_or("");
1507 occupied.insert(name.to_string());
1508 // The actual module name (deconflicted from sub-packages, #135).
1509 occupied.insert(ctx.nested_module_name(package, name));
1510 }
1511 for e in &file.enum_type {
1512 occupied.insert(e.name.as_deref().unwrap_or("").to_string());
1513 }
1514 }
1515
1516 // `register_types`, when emitted, lives at `__buffa::register_types`.
1517 // `self::` and `#[doc(inline)]` for the same reasons as the view
1518 // re-exports above. Same `any(json, text)` gate as the fn itself.
1519 if ctx.config.emit_register_fn && !reg.is_empty() {
1520 let sentinel = make_field_ident(context::SENTINEL_MOD);
1521 let json_or_text = ctx.config.feature_gates().json_or_text();
1522 candidates.push(message::ReexportCandidate {
1523 name: "register_types".to_string(),
1524 tokens: feature_gates::cfg_block_any(
1525 quote! {
1526 #[doc(inline)]
1527 pub use self :: #sentinel :: register_types;
1528 },
1529 &json_or_text,
1530 ),
1531 });
1532 }
1533
1534 message::emit_surviving_reexports(candidates, &occupied)
1535}
1536
1537/// Render the per-package stitcher: owned items at root plus the
1538/// `__buffa::{view,oneof,ext,...}` module wrappers, followed by the
1539/// surviving package-root `pub use` re-exports.
1540fn generate_package_mod(
1541 ctx: &context::CodeGenContext,
1542 sections: &PackageSections,
1543 reg: &message::RegistryPaths,
1544 root_reexports: &TokenStream,
1545 fds_bytes: &[u8],
1546) -> Result<String, CodeGenError> {
1547 use crate::idents::make_field_ident;
1548
1549 let owned = §ions.owned;
1550 let view = §ions.view;
1551 let view_oneof = §ions.view_oneof;
1552 let oneof = §ions.oneof;
1553 let ext = §ions.ext;
1554
1555 // Each ancillary module is emitted only when its section has
1556 // content. The natural-path re-exports outside `__buffa` target
1557 // these modules — they are emitted only when their target items
1558 // exist, so the conditions align and re-exports never reference
1559 // a missing module.
1560 let view_oneof_mod = if !view_oneof.is_empty() {
1561 quote! {
1562 pub mod oneof {
1563 #[allow(unused_imports)]
1564 use super::*;
1565 #(#view_oneof)*
1566 }
1567 }
1568 } else {
1569 TokenStream::new()
1570 };
1571
1572 // `view_oneof` is only populated for messages that have oneofs, and
1573 // every message also contributes to `view`, so `!view.is_empty()` is
1574 // sufficient — `view_oneof` non-empty implies `view` non-empty.
1575 debug_assert!(view_oneof.is_empty() || !view.is_empty());
1576 let view_mod = if ctx.config.generate_views && !view.is_empty() {
1577 feature_gates::cfg_block(
1578 quote! {
1579 pub mod view {
1580 #[allow(unused_imports)]
1581 use super::*;
1582 #(#view)*
1583 #view_oneof_mod
1584 }
1585 },
1586 ctx.config.feature_gates().views,
1587 )
1588 } else {
1589 TokenStream::new()
1590 };
1591
1592 let oneof_mod = if !oneof.is_empty() {
1593 quote! {
1594 pub mod oneof {
1595 #[allow(unused_imports)]
1596 use super::*;
1597 #(#oneof)*
1598 }
1599 }
1600 } else {
1601 TokenStream::new()
1602 };
1603
1604 let ext_mod = if !ext.is_empty() {
1605 quote! {
1606 pub mod ext {
1607 #[allow(unused_imports)]
1608 use super::*;
1609 #(#ext)*
1610 }
1611 }
1612 } else {
1613 TokenStream::new()
1614 };
1615
1616 let register_fn = if ctx.config.emit_register_fn && !reg.is_empty() {
1617 let gates = ctx.config.feature_gates();
1618 // When the gated consts (`__*_JSON_ANY` / `__*_TEXT_ANY`) are
1619 // `#[cfg(feature = "...")]`, each registration statement that
1620 // references them gets the same gate. `#[cfg]` on a statement is
1621 // allowed; the call disappears with the const.
1622 let json_regs = reg
1623 .json_any
1624 .iter()
1625 .map(|p| {
1626 feature_gates::cfg_block(quote! { reg.register_json_any(super::#p); }, gates.json)
1627 })
1628 .chain(reg.json_ext.iter().map(|p| {
1629 feature_gates::cfg_block(quote! { reg.register_json_ext(super::#p); }, gates.json)
1630 }));
1631 let text_regs = reg
1632 .text_any
1633 .iter()
1634 .map(|p| {
1635 feature_gates::cfg_block(quote! { reg.register_text_any(super::#p); }, gates.text)
1636 })
1637 .chain(reg.text_ext.iter().map(|p| {
1638 feature_gates::cfg_block(quote! { reg.register_text_ext(super::#p); }, gates.text)
1639 }));
1640 // When gating, a feature subset may leave one bucket of statements
1641 // cfg'd out while the other survives — `reg` is still used. But if
1642 // `register_types` itself is gated on `any(json, text)` (below),
1643 // the only reachable bodies have at least one statement, so `reg`
1644 // can't be unused. Keep `#[allow(unused_variables)]` defensively
1645 // anyway: it's harmless, and the alternative — proving the
1646 // invariant holds across future statement-shape changes — is
1647 // brittle.
1648 let allow_unused = if ctx.config.gate_impls_on_crate_features {
1649 quote! { #[allow(unused_variables)] }
1650 } else {
1651 quote! {}
1652 };
1653 // The fn is useless without at least one of the gated modes that
1654 // populate it — and `::buffa::type_registry::TypeRegistry` may
1655 // become feature-gated in the runtime in a future release. Gate the
1656 // fn on `any(...)` of whichever modes are active so it disappears
1657 // alongside the last entry.
1658 feature_gates::cfg_block_any(
1659 quote! {
1660 /// Register this package's `Any` type entries and extension entries.
1661 #allow_unused
1662 pub fn register_types(reg: &mut ::buffa::type_registry::TypeRegistry) {
1663 #(#json_regs)*
1664 #(#text_regs)*
1665 }
1666 },
1667 &gates.json_or_text(),
1668 )
1669 } else {
1670 TokenStream::new()
1671 };
1672
1673 // Reflection: embed the FileDescriptorSet bytes and a lazy pool
1674 // accessor so per-message `Reflectable` impls have a descriptor pool to
1675 // resolve against. Lives inside `__buffa` so the impls can reach it via
1676 // a relative `__buffa::reflect::descriptor_pool()` path. A package-root
1677 // `pub use` re-exports `descriptor_pool` so consumers don't have to
1678 // route through the reserved `__buffa` sentinel.
1679 let (reflect_mod, reflect_reexport) = if ctx.config.generate_reflection {
1680 let gate = ctx.config.feature_gates().reflect;
1681 (
1682 feature_gates::cfg_block(reflect::reflect_pool_module(fds_bytes), gate),
1683 feature_gates::cfg_block(reflect::pool_accessor_reexport("e! { __buffa }), gate),
1684 )
1685 } else {
1686 (TokenStream::new(), TokenStream::new())
1687 };
1688
1689 let sentinel = make_field_ident(context::SENTINEL_MOD);
1690 // The whole `pub mod __buffa { ... }` wrapper is itself omitted
1691 // when none of its inner modules or `register_types` exist.
1692 let buffa_mod = if view_mod.is_empty()
1693 && oneof_mod.is_empty()
1694 && ext_mod.is_empty()
1695 && register_fn.is_empty()
1696 && reflect_mod.is_empty()
1697 {
1698 TokenStream::new()
1699 } else {
1700 let allow = allow_lints_attr();
1701 quote! {
1702 #allow
1703 pub mod #sentinel {
1704 #[allow(unused_imports)]
1705 use super::*;
1706 #view_mod
1707 #oneof_mod
1708 #ext_mod
1709 #register_fn
1710 #reflect_mod
1711 }
1712 }
1713 };
1714
1715 let tokens = quote! {
1716 #(#owned)*
1717 #buffa_mod
1718 #reflect_reexport
1719 #root_reexports
1720 };
1721
1722 format_tokens(tokens, "")
1723}
1724
1725/// Format a token stream into a generated-file string with the standard
1726/// header comment.
1727fn format_tokens(tokens: TokenStream, source: &str) -> Result<String, CodeGenError> {
1728 let syntax_tree =
1729 syn::parse2::<syn::File>(tokens).map_err(|e| CodeGenError::InvalidSyntax(e.to_string()))?;
1730 let formatted = prettyplease::unparse(&syntax_tree);
1731 let source_line = if source.is_empty() {
1732 String::new()
1733 } else {
1734 format!("// source: {source}\n")
1735 };
1736 Ok(format!(
1737 "// @generated by buffa-codegen. DO NOT EDIT.\n{source_line}\n{formatted}"
1738 ))
1739}
1740
1741/// Convert a proto package name to its `.mod.rs` stitcher filename.
1742///
1743/// e.g., `"google.protobuf"` → `"google.protobuf.mod.rs"`. The unnamed
1744/// package uses the [`SENTINEL_MOD`](context::SENTINEL_MOD) name as its
1745/// filename stem — `package __buffa;` is already rejected by
1746/// `validate_file`, so the unnamed-package stitcher cannot
1747/// collide with any real package's.
1748pub fn package_to_mod_filename(package: &str) -> String {
1749 if package.is_empty() {
1750 format!("{}.mod.rs", context::SENTINEL_MOD)
1751 } else {
1752 format!("{package}.mod.rs")
1753 }
1754}
1755
1756/// Convert a proto package name to its [`file_per_package`] output filename.
1757///
1758/// e.g., `"google.protobuf"` → `"google.protobuf.rs"`. The unnamed
1759/// package uses [`SENTINEL_MOD`](context::SENTINEL_MOD) — same
1760/// collision-avoidance as [`package_to_mod_filename`].
1761///
1762/// [`file_per_package`]: CodeGenConfig::file_per_package
1763pub fn package_to_filename(package: &str) -> String {
1764 if package.is_empty() {
1765 format!("{}.rs", context::SENTINEL_MOD)
1766 } else {
1767 format!("{package}.rs")
1768 }
1769}
1770
1771/// Convert a `.proto` file path to its content-file stem.
1772///
1773/// e.g., `"google/protobuf/timestamp.proto"` → `"google.protobuf.timestamp"`.
1774/// Content files append `""`, `".__view"`, `".__oneof"`,
1775/// `".__view_oneof"`, or `".__ext"` plus `".rs"` — emitted only for
1776/// kinds with non-empty content.
1777pub fn proto_path_to_stem(proto_path: &str) -> String {
1778 let without_ext = proto_path.strip_suffix(".proto").unwrap_or(proto_path);
1779 without_ext.replace('/', ".")
1780}
1781
1782/// Merge downstream [`Companion`](GeneratedFileKind::Companion) files into
1783/// the per-package stitcher produced by [`generate`].
1784///
1785/// For each companion file this function locates the
1786/// [`PackageMod`](GeneratedFileKind::PackageMod) entry in `files` with a
1787/// matching package and appends `include!("<name>");` at file scope after
1788/// buffa's own output — at package root, alongside the owned message types,
1789/// not under `__buffa::`. The companion files themselves are appended to
1790/// `files` so that build integrations can write everything to disk in one
1791/// pass.
1792///
1793/// **Call this once per build**; it does not deduplicate, so a second call
1794/// with the same companions emits a second `include!` for each, which fails
1795/// to compile downstream with a duplicate-definition error.
1796///
1797/// `name` must be a bare-sibling filename — the same convention buffa uses
1798/// for its own `include!` calls, so it resolves relative to the stitcher
1799/// without any `OUT_DIR` prefix. Names must not contain `"`, `\`, `/`, or
1800/// newlines (the function `debug_assert!`s this in debug builds), and must
1801/// not collide with any of buffa's own generated filenames for the same
1802/// package (`<stem>.rs`, `<stem>.__view.rs`, etc.) — pick an unused suffix
1803/// such as `<stem>.__myplugin.rs`.
1804///
1805/// Companion files with no matching `PackageMod` (e.g. for a package buffa
1806/// did not generate any output for) are still appended to `files` but no
1807/// `include!` is emitted; the caller is responsible for wiring them up. If
1808/// you don't expect orphans, check that every companion's `package` appears
1809/// in `files` as a `PackageMod` after calling.
1810pub fn apply_companions(files: &mut Vec<GeneratedFile>, companions: Vec<GeneratedFile>) {
1811 for comp in &companions {
1812 debug_assert!(
1813 !comp.name.contains(['"', '\\', '/', '\n']),
1814 "companion file name {:?} contains a character that would break \
1815 the generated include!() literal or its bare-sibling resolution",
1816 comp.name
1817 );
1818 if let Some(pkg_mod) = files
1819 .iter_mut()
1820 .find(|f| f.kind == GeneratedFileKind::PackageMod && f.package == comp.package)
1821 {
1822 pkg_mod
1823 .content
1824 .push_str(&format!("include!(\"{}\");\n", comp.name));
1825 }
1826 }
1827 files.extend(companions);
1828}
1829
1830/// Code generation error.
1831#[derive(Debug, Clone, thiserror::Error)]
1832#[non_exhaustive]
1833pub enum CodeGenError {
1834 /// A required field was absent in a descriptor.
1835 ///
1836 /// The `&'static str` names the missing field for diagnostics.
1837 #[error("missing required descriptor field: {0}")]
1838 MissingField(&'static str),
1839 /// A resolved type path string could not be parsed as a Rust type.
1840 #[error("invalid Rust type path: '{0}'")]
1841 InvalidTypePath(String),
1842 /// The accumulated `TokenStream` failed to parse as valid Rust syntax.
1843 #[error("generated code failed to parse as Rust: {0}")]
1844 InvalidSyntax(String),
1845 /// A requested file was not present in the descriptor set.
1846 #[error("file_to_generate '{0}' not found in descriptor set")]
1847 FileNotFound(String),
1848 /// Unexpected descriptor state (e.g. a map entry or oneof that cannot be
1849 /// resolved to a known descriptor field).
1850 #[error("codegen error: {0}")]
1851 Other(String),
1852 /// A proto field name uses the `__buffa_` reserved prefix, which would
1853 /// conflict with buffa's internal generated fields.
1854 #[error(
1855 "reserved field name '{field_name}' in message '{message_name}': \
1856 proto field names starting with '__buffa_' conflict with buffa's \
1857 internal fields"
1858 )]
1859 ReservedFieldName {
1860 message_name: String,
1861 field_name: String,
1862 },
1863 /// Two sibling messages produce the same Rust module name after
1864 /// snake_case conversion (e.g., `HTTPRequest` and `HttpRequest` both
1865 /// become `pub mod http_request`).
1866 #[error(
1867 "module name conflict in '{scope}': messages '{name_a}' and '{name_b}' \
1868 both produce module '{module_name}'"
1869 )]
1870 ModuleNameConflict {
1871 scope: String,
1872 name_a: String,
1873 name_b: String,
1874 module_name: String,
1875 },
1876 /// A proto package segment, message name, or file-level enum name
1877 /// would emit a Rust item matching the reserved sentinel `__buffa`.
1878 ///
1879 /// This is the only name buffa reserves in user namespace. Resolve by
1880 /// renaming the proto element.
1881 #[error(
1882 "reserved name '{name}' at {location}: this name is reserved for \
1883 buffa's generated ancillary types (views, oneof enums, \
1884 extensions). Rename the proto element."
1885 )]
1886 ReservedModuleName { name: String, location: String },
1887 /// The input contains a message with `option message_set_wire_format = true`
1888 /// but [`CodeGenConfig::allow_message_set`] was not set.
1889 #[error(
1890 "message '{message_name}' uses `option message_set_wire_format = true` \
1891 but CodeGenConfig::allow_message_set is false; MessageSet is a legacy \
1892 wire format — set allow_message_set(true) if this is intentional"
1893 )]
1894 MessageSetNotSupported { message_name: String },
1895 /// A custom attribute string configured via [`CodeGenConfig::type_attributes`],
1896 /// [`CodeGenConfig::field_attributes`], or [`CodeGenConfig::message_attributes`]
1897 /// could not be parsed as a Rust attribute.
1898 #[error(
1899 "invalid custom attribute for path '{path}': '{attribute}' is not a valid \
1900 Rust attribute ({detail})"
1901 )]
1902 InvalidCustomAttribute {
1903 path: String,
1904 attribute: String,
1905 detail: String,
1906 },
1907}
1908
1909#[cfg(test)]
1910mod tests;