Skip to main content

buffa_codegen/
lib.rs

1//! Shared code generation logic for buffa.
2//!
3//! This crate takes protobuf descriptors (`google.protobuf.FileDescriptorProto`,
4//! decoded from binary `FileDescriptorSet` data) and emits Rust source code
5//! that uses the `buffa` runtime.
6//!
7//! It is used by:
8//! - `protoc-gen-buffa` (protoc plugin)
9//! - `buffa-build` (build.rs integration)
10//!
11//! # Architecture
12//!
13//! The code generator is intentionally decoupled from how descriptors are
14//! obtained. It receives fully-resolved `FileDescriptorProto`s and produces
15//! Rust source strings. This means:
16//!
17//! - It doesn't parse `.proto` files.
18//! - It doesn't invoke `protoc`.
19//! - It doesn't do import resolution or name linking.
20//!
21//! All of that is handled upstream (by protoc, buf, or a future parser).
22
23pub(crate) mod comments;
24pub mod context;
25pub(crate) mod defaults;
26pub(crate) mod enumeration;
27pub(crate) mod extension;
28pub(crate) mod feature_gates;
29pub(crate) mod features;
30#[doc(hidden)]
31pub use buffa_descriptor::generated;
32pub mod idents;
33pub(crate) mod impl_message;
34pub(crate) mod impl_text;
35pub(crate) mod imports;
36pub(crate) mod message;
37pub(crate) mod oneof;
38pub(crate) mod view;
39
40use crate::generated::descriptor::FileDescriptorProto;
41use proc_macro2::TokenStream;
42use quote::{format_ident, quote};
43
44/// Lints suppressed on generated code at module boundaries.
45///
46/// Consumed by [`generate_module_tree`], the per-package `.mod.rs`
47/// stitcher, and `buffa-build`'s `_include.rs` writer. One list keeps
48/// them in sync.
49pub const ALLOW_LINTS: &[&str] = &[
50    "non_camel_case_types",
51    "dead_code",
52    "unused_imports",
53    // Cross-proto refs within the same package are emitted through the
54    // canonical `super::super::__buffa::view::…` path even though the
55    // target lives in the same generated module — using the bare name
56    // would resolve, but the canonical path is stable when a sibling
57    // proto defines a same-named natural-path re-export.
58    "unused_qualifications",
59    "clippy::derivable_impls",
60    "clippy::match_single_binding",
61    "clippy::uninlined_format_args",
62    "clippy::doc_lazy_continuation",
63    // A user `message View { message Inner }` produces
64    // `__buffa::view::view::InnerView`; harmless but trips this lint.
65    "clippy::module_inception",
66];
67
68/// Render [`ALLOW_LINTS`] as a `#[allow(…)]` attribute token stream.
69pub fn allow_lints_attr() -> TokenStream {
70    let lints: Vec<TokenStream> = ALLOW_LINTS
71        .iter()
72        .map(|l| syn::parse_str(l).expect("lint name parses as path"))
73        .collect();
74    quote! { #[allow( #(#lints),* )] }
75}
76
77/// One generated output file.
78///
79/// Each `.proto` produces up to five **content files** (`<stem>.rs`,
80/// `<stem>.__view.rs`, `<stem>.__oneof.rs`, `<stem>.__view_oneof.rs`,
81/// `<stem>.__ext.rs`) and each proto package produces one
82/// `<dotted.pkg>.mod.rs` **stitcher** that `include!`s the content files
83/// and authors the `pub mod __buffa { … }` ancillary tree.
84/// Ancillary kinds with no content for that input file (e.g. a message
85/// with no oneofs and no extensions) are omitted, and the stitcher's
86/// `include!` set is filtered to match. The `__buffa` wrapper (and each
87/// `view` / `oneof` / `ext` submodule inside it) is itself omitted when
88/// it would be empty, so packages with only owned messages emit no
89/// `__buffa` block at all.
90/// See `DESIGN.md` → "Generated code layout".
91///
92/// Consumers normally only need to wire up the
93/// [`GeneratedFileKind::PackageMod`] entries (one per package); the
94/// per-proto content kinds are reached transitively via `include!` from
95/// the stitcher. Write all files to disk; build a module tree from only
96/// the `PackageMod` ones.
97///
98/// With [`CodeGenConfig::file_per_package`] set, the per-proto content
99/// kinds are not emitted at all — the single `<dotted.pkg>.rs` (still
100/// kind `PackageMod`) inlines what the stitcher would `include!`.
101#[derive(Debug)]
102pub struct GeneratedFile {
103    /// The output file path (e.g., `"my.pkg.foo.rs"` or `"my.pkg.mod.rs"`).
104    pub name: String,
105    /// The proto package this file belongs to.
106    pub package: String,
107    /// What this file contains. Build integrations only need to wire up
108    /// [`GeneratedFileKind::PackageMod`] files; everything else is reached
109    /// via `include!` from there.
110    pub kind: GeneratedFileKind,
111    /// The generated Rust source code.
112    pub content: String,
113}
114
115/// Kind of [`GeneratedFile`].
116///
117/// [`generate`] produces up to five per-proto content kinds — one each
118/// of [`Owned`](Self::Owned), [`View`](Self::View), [`Oneof`](Self::Oneof),
119/// [`ViewOneof`](Self::ViewOneof), and [`Ext`](Self::Ext) per input
120/// `.proto` file — plus one [`PackageMod`](Self::PackageMod) stitcher per
121/// package. Kinds with no content for the input (a proto with no oneofs
122/// emits no [`Oneof`](Self::Oneof) / [`ViewOneof`](Self::ViewOneof);
123/// no extensions, no [`Ext`](Self::Ext); etc.) are omitted. Build
124/// integrations only need to wire up `PackageMod` entries; the per-proto
125/// content kinds are reached via `include!` from the stitcher and need
126/// only be written to disk alongside it. Under
127/// [`CodeGenConfig::file_per_package`] only `PackageMod` is emitted.
128///
129/// [`Companion`](Self::Companion) is the one kind *not* produced by
130/// [`generate`]: downstream code generators construct `Companion` files
131/// themselves and merge them into buffa's output via
132/// [`apply_companions`].
133///
134/// This enum is `#[non_exhaustive]` — match with a wildcard arm so new
135/// kinds can be added without a major version bump.
136#[derive(Debug, Clone, Copy, PartialEq, Eq)]
137#[non_exhaustive]
138pub enum GeneratedFileKind {
139    /// Owned message structs and enums (`<stem>.rs`).
140    Owned,
141    /// View structs (`<stem>.__view.rs`).
142    View,
143    /// Owned oneof enums (`<stem>.__oneof.rs`).
144    Oneof,
145    /// View oneof enums (`<stem>.__view_oneof.rs`).
146    ViewOneof,
147    /// File-level proto-extension consts (`<stem>.__ext.rs`) — the
148    /// `pub const` `ExtensionDescriptor` items generated from `extend`
149    /// blocks. Not to be confused with [`Companion`](Self::Companion),
150    /// which is unrelated downstream-supplied content.
151    Ext,
152    /// Per-package stitcher (`<dotted.pkg>.mod.rs`). The only file build
153    /// systems need to wire up directly.
154    PackageMod,
155    /// Extra per-proto content from a downstream code generator (service
156    /// stubs, extra trait impls, etc.) that travels with buffa's output.
157    ///
158    /// Not produced by [`generate`]. Construct these in your own generator
159    /// and pass them to [`apply_companions`], which appends an `include!`
160    /// for each one at file scope in the matching package's
161    /// [`PackageMod`](Self::PackageMod) — after buffa's own output, at
162    /// package root alongside the owned message types (**not** under the
163    /// `__buffa::` sentinel module). Items declared `pub` in a companion
164    /// file are visible at `crate::<pkg>::*`.
165    ///
166    /// Not to be confused with [`Ext`](Self::Ext), which is the buffa-
167    /// generated file holding protobuf `extend` consts.
168    Companion,
169}
170
171/// Configuration for code generation.
172#[derive(Debug, Clone)]
173#[non_exhaustive]
174pub struct CodeGenConfig {
175    /// Whether to generate borrowed view types (`MyMessageView<'a>`) in
176    /// addition to owned types.
177    pub generate_views: bool,
178    /// Whether to preserve unknown fields (default: true).
179    pub preserve_unknown_fields: bool,
180    /// Whether to derive `serde::Serialize` / `serde::Deserialize` on
181    /// generated message structs and enum types, and emit `#[serde(with = "...")]`
182    /// attributes for proto3 JSON's special scalar encodings (int64 as quoted
183    /// string, bytes as base64, etc.).
184    ///
185    /// When this is `true`, the downstream crate must depend on `serde` and
186    /// must enable the `buffa/json` feature for the runtime helpers.
187    ///
188    /// Oneof fields use `#[serde(flatten)]` with custom `Serialize` /
189    /// `Deserialize` impls so that each variant appears as a top-level
190    /// JSON field (proto3 JSON inline oneof encoding).
191    pub generate_json: bool,
192    /// Whether to emit `#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]`
193    /// on generated message structs and enum types.
194    ///
195    /// When this is `true`, the downstream crate must add `arbitrary` as an
196    /// optional dependency and enable the `buffa/arbitrary` feature. The
197    /// downstream crate's Cargo feature that gates `arbitrary` must be named
198    /// exactly `"arbitrary"` — the generated `cfg_attr` uses that literal
199    /// string and cannot be customized. This applies to both the struct-level
200    /// `derive(Arbitrary)` and the per-field `#[arbitrary(with = ...)]`
201    /// attributes emitted for `bytes_fields`-typed fields.
202    ///
203    /// For `bytes_fields`-typed fields, codegen emits `#[arbitrary(with = ...)]`
204    /// using helpers in `::buffa::__private` since `bytes::Bytes` has no
205    /// `Arbitrary` impl. Singular, optional, and repeated bytes fields are all
206    /// covered. Map values are always `Vec<u8>` regardless of `bytes_fields`
207    /// and require no special handling.
208    pub generate_arbitrary: bool,
209    /// External type path mappings.
210    ///
211    /// Each entry maps a fully-qualified protobuf path prefix (e.g.,
212    /// `".my.common"`) to a Rust module path (e.g., `"::common_protos"`).
213    /// Types under the proto prefix will reference the extern Rust path
214    /// instead of being generated, allowing shared proto packages to be
215    /// compiled once in a dedicated crate and referenced from others.
216    ///
217    /// Well-known types (`google.protobuf.*`) are automatically mapped to
218    /// `::buffa_types::google::protobuf::*` without needing an explicit
219    /// entry here. To override with a custom implementation, add an
220    /// `extern_path` for `.google.protobuf` pointing to your crate.
221    pub extern_paths: Vec<(String, String)>,
222    /// Fully-qualified proto field paths whose `bytes` fields should use
223    /// `bytes::Bytes` instead of `Vec<u8>`.
224    ///
225    /// Each entry is a proto path prefix (e.g., `".my.pkg.MyMessage.data"` for
226    /// a specific field, or `"."` for all bytes fields). The path is matched
227    /// as a prefix, so `"."` applies to every bytes field in every message.
228    pub bytes_fields: Vec<String>,
229    /// Honor `features.utf8_validation = NONE` by emitting `Vec<u8>` / `&[u8]`
230    /// for such string fields instead of `String` / `&str`.
231    ///
232    /// When `false` (the default), buffa emits `String` for all string fields
233    /// and **validates UTF-8 on decode** — stricter than proto2 requires, but
234    /// ergonomic and safe.
235    ///
236    /// When `true`, string fields with `utf8_validation = NONE` (all proto2
237    /// strings by default, and editions fields that opt into `NONE`) become
238    /// `Vec<u8>` / `&[u8]`. Decode skips validation; the caller decides at the
239    /// call site whether to `std::str::from_utf8` (checked) or
240    /// `from_utf8_unchecked` (trusted-input fast path). This is the only
241    /// sound Rust mapping when strings may actually contain non-UTF-8 bytes.
242    ///
243    /// **This is a breaking change for proto2** — enable only for new code or
244    /// when profiling identifies UTF-8 validation as a bottleneck.
245    pub strict_utf8_mapping: bool,
246    /// Permit `option message_set_wire_format = true` on input messages.
247    ///
248    /// MessageSet is a legacy Google-internal wire format that wraps each
249    /// extension in a group structure instead of using regular field tags.
250    /// When `false` (the default), encountering such a message is a codegen
251    /// error — the flag exists to make MessageSet use explicit, since the
252    /// format is obsolete outside of interop with very old Google protos.
253    pub allow_message_set: bool,
254    /// Whether to emit `impl buffa::text::TextFormat` on generated message
255    /// structs for textproto (human-readable text format) encoding/decoding.
256    ///
257    /// When this is `true`, the downstream crate must enable the `buffa/text`
258    /// feature for the runtime encoder/decoder.
259    pub generate_text: bool,
260    /// Whether the per-package `.mod.rs` stitcher emits
261    /// `__buffa::register_types(&mut TypeRegistry)`.
262    ///
263    /// Default `true`. The fn aggregates `Any` type entries and extension
264    /// entries for every message in the package. Set to `false` for
265    /// crates that don't use extensions/`Any`, or that hand-roll
266    /// registration (e.g. `buffa-types`' `register_wkt_types`, which
267    /// knows the JSON-Any `is_wkt` special-casing the generic fn does
268    /// not). The per-message `__*_JSON_ANY` / `__*_TEXT_ANY` consts are
269    /// still emitted; only the aggregating fn is suppressed.
270    pub emit_register_fn: bool,
271    /// Emit one `<dotted.package>.rs` per proto package instead of the
272    /// per-proto-file content set plus `<pkg>.mod.rs` stitcher.
273    ///
274    /// The single file inlines what the stitcher would otherwise `include!`,
275    /// producing the same `__buffa::{view,oneof,ext,...}` module structure.
276    /// Intended for Buf Schema Registry generated SDKs, whose `lib.rs`
277    /// synthesis builds the module tree from `<dotted.package>.rs` filenames.
278    ///
279    /// Under `strategy: directory` this only sees one directory's files per
280    /// invocation, so the input module must be `PACKAGE_DIRECTORY_MATCH`-clean
281    /// (one package per directory) for the output to be complete. BSR-hosted
282    /// modules satisfy this by lint default. If a package spans multiple
283    /// directories, separate invocations each emit their own `<pkg>.rs` and
284    /// the last write wins — silent partial output, not a codegen error.
285    pub file_per_package: bool,
286    /// Custom attributes to inject on generated types (messages and enums).
287    ///
288    /// Each entry is `(proto_path, attribute)`. The `proto_path` is matched
289    /// as a prefix against the fully-qualified proto name: `"."` applies to
290    /// all types, `".my.pkg"` to types in that package, `".my.pkg.MyMessage"`
291    /// to a specific type. The `attribute` is a raw Rust attribute string
292    /// (e.g., `"#[derive(serde::Serialize)]"`).
293    pub type_attributes: Vec<(String, String)>,
294    /// Custom attributes to inject on generated struct fields.
295    ///
296    /// Each entry is `(proto_path, attribute)`. The `proto_path` is matched
297    /// as a prefix against the fully-qualified field path (e.g.,
298    /// `".my.pkg.MyMessage.my_field"`). `"."` applies to all fields.
299    pub field_attributes: Vec<(String, String)>,
300    /// Custom attributes to inject on generated message structs only (not enums).
301    ///
302    /// Same path-matching semantics as `type_attributes`, but only applied to
303    /// message structs, not enum types. Useful for struct-only attributes like
304    /// `#[serde(default)]`.
305    pub message_attributes: Vec<(String, String)>,
306    /// Custom attributes to inject on generated enum types only (not messages).
307    ///
308    /// Same path-matching semantics as `type_attributes`, but only applied to
309    /// enum types. Useful for enum-only attributes like
310    /// `#[derive(strum::EnumIter)]` when the user does not want to apply the
311    /// same attribute to every message in the matched scope.
312    pub enum_attributes: Vec<(String, String)>,
313    /// Wrap generated `impl`s in `#[cfg(feature = "...")]` instead of
314    /// emitting them unconditionally.
315    ///
316    /// When `true`, the impls controlled by [`generate_json`],
317    /// [`generate_views`], and [`generate_text`] are emitted wrapped in
318    /// `#[cfg(feature = "json" | "views" | "text")]` (or
319    /// `#[cfg_attr(feature = ..., ...)]` for derives and field attributes)
320    /// rather than unconditionally. The consuming crate must define matching
321    /// Cargo features that enable the corresponding runtime support, e.g.:
322    ///
323    /// ```toml
324    /// [features]
325    /// json  = ["buffa/json", "dep:serde", "dep:serde_json"]
326    /// views = []
327    /// text  = ["buffa/text"]
328    /// ```
329    ///
330    /// The [`generate_*`] flags still control *whether* an impl kind is
331    /// emitted at all — this flag only controls whether it is `cfg`-gated.
332    /// `generate_arbitrary` is always `cfg_attr`-gated on
333    /// `feature = "arbitrary"` regardless of this flag, because `arbitrary`
334    /// is an optional dependency by design.
335    ///
336    /// This is the mechanism that lets `buffa-descriptor` and `buffa-types`
337    /// ship every impl while keeping the codegen toolchain
338    /// (`buffa-codegen`/`buffa-build`/`protoc-gen-buffa`) lean: those crates
339    /// depend on `buffa-descriptor` with `default-features = false` and so
340    /// don't pull `serde`/`serde_json`/`base64`. Most consumers don't need
341    /// this — they decide at build-script time whether to generate JSON, and
342    /// if they say yes, they want `impl Serialize` to just exist.
343    ///
344    /// [`generate_json`]: Self::generate_json
345    /// [`generate_views`]: Self::generate_views
346    /// [`generate_text`]: Self::generate_text
347    /// [`generate_*`]: Self::generate_json
348    pub gate_impls_on_crate_features: bool,
349    /// Generate `with_*` builder-style setter methods for explicit-presence fields.
350    ///
351    /// Each explicit-presence scalar, bytes, or enum field gets a
352    /// `pub fn with_<name>(mut self, value: T) -> Self` method that wraps the
353    /// value in `Some` and returns `self`, enabling chained construction:
354    ///
355    /// ```ignore
356    /// let req = MyRequest::default()
357    ///     .with_name("alice")
358    ///     .with_timeout_ms(30_000);
359    /// ```
360    ///
361    /// **Fields that receive a setter:** proto3 `optional`, proto2 `optional`,
362    /// and editions fields with `field_presence = EXPLICIT`.
363    ///
364    /// **Fields that do not receive a setter:** message fields
365    /// (`MessageField<T>`), repeated fields, map fields, oneof variant fields,
366    /// proto2 `required` fields, and any implicit-presence field.
367    ///
368    /// There is no `clear_<name>` companion — to clear a field, assign `None`
369    /// directly: `msg.name = None;`.
370    ///
371    /// Defaults to `true`.
372    pub generate_with_setters: bool,
373}
374
375impl Default for CodeGenConfig {
376    fn default() -> Self {
377        Self {
378            generate_views: true,
379            preserve_unknown_fields: true,
380            generate_json: false,
381            generate_arbitrary: false,
382            extern_paths: Vec::new(),
383            bytes_fields: Vec::new(),
384            strict_utf8_mapping: false,
385            allow_message_set: false,
386            generate_text: false,
387            emit_register_fn: true,
388            file_per_package: false,
389            type_attributes: Vec::new(),
390            field_attributes: Vec::new(),
391            message_attributes: Vec::new(),
392            enum_attributes: Vec::new(),
393            gate_impls_on_crate_features: false,
394            generate_with_setters: true,
395        }
396    }
397}
398
399impl CodeGenConfig {
400    /// Active [`feature_gates::FeatureGates`] for this config.
401    ///
402    /// Recomputed on each call (cheap — three boolean ANDs); call once at
403    /// the top of a generation function and thread through, or call inline
404    /// at each use site, whichever reads better.
405    pub(crate) fn feature_gates(&self) -> feature_gates::FeatureGates {
406        feature_gates::FeatureGates::for_config(self)
407    }
408}
409
410/// Compute the effective extern path list by starting with user-provided
411/// mappings and adding the default WKT mapping if appropriate.
412///
413/// The default mapping `".google.protobuf" → "::buffa_types::google::protobuf"`
414/// is added unless:
415/// - The user already provided an extern_path covering `.google.protobuf`
416/// - Any of the files being generated are in the `google.protobuf` package
417///   (i.e., we're building `buffa-types` itself)
418pub(crate) fn effective_extern_paths(
419    file_descriptors: &[FileDescriptorProto],
420    files_to_generate: &[String],
421    config: &CodeGenConfig,
422) -> Vec<(String, String)> {
423    let mut paths = config.extern_paths.clone();
424
425    // Only an EXACT .google.protobuf mapping suppresses auto-injection.
426    // A sub-package mapping like .google.protobuf.compiler does NOT cover
427    // WKTs like Timestamp — resolve_extern_prefix's longest-prefix matching
428    // lets both coexist, so we still inject the parent mapping.
429    let has_wkt_mapping = paths.iter().any(|(proto, _)| proto == ".google.protobuf");
430
431    if !has_wkt_mapping {
432        // Check if we're generating google.protobuf files ourselves
433        // (e.g., building buffa-types). If so, don't auto-map.
434        let generating_wkts = file_descriptors
435            .iter()
436            .filter(|fd| {
437                fd.name
438                    .as_deref()
439                    .is_some_and(|n| files_to_generate.iter().any(|f| f == n))
440            })
441            .any(|fd| fd.package.as_deref() == Some("google.protobuf"));
442
443        if !generating_wkts {
444            paths.push((
445                ".google.protobuf".to_string(),
446                "::buffa_types::google::protobuf".to_string(),
447            ));
448        }
449    }
450
451    paths
452}
453
454/// Compute the effective file-level extern path list.
455///
456/// File-level mappings route a specific `.proto` file to a Rust module root,
457/// taking priority over the package-level mappings from
458/// [`effective_extern_paths`]. They exist to resolve a structural problem:
459/// `descriptor.proto` is in the same `google.protobuf` package as the
460/// JSON-mappable WKTs (`Timestamp`, `Any`, …), but its types live in
461/// `buffa-descriptor`, not `buffa-types`. A single package-keyed
462/// `.google.protobuf` extern_path can route the package to one crate or the
463/// other; it can't split it. The file-level mapping splits it.
464///
465/// Auto-injected mappings (when not suppressed):
466///
467/// | Proto file | Rust module |
468/// |---|---|
469/// | `google/protobuf/descriptor.proto` | `::buffa_descriptor::generated::descriptor` |
470/// | `google/protobuf/compiler/plugin.proto` | `::buffa_descriptor::generated::compiler` |
471///
472/// Suppression conditions, evaluated **per file**:
473///
474/// - **A user-provided `extern_path` covers the file's package.** That
475///   override has covered the file's types since the package mapping was
476///   introduced; auto-injecting a higher-priority file-level mapping would
477///   silently redirect them away from the user's crate. Matching is via
478///   the same longest-prefix logic the package resolver uses, so both an
479///   exact `.google.protobuf` mapping and a sub-package
480///   `.google.protobuf.compiler` mapping suppress the entries they cover —
481///   `.google.protobuf` suppresses both, `.google.protobuf.compiler`
482///   suppresses only `plugin.proto`.
483/// - **The proto file itself is in `files_to_generate`.** When building
484///   `buffa-descriptor` (or any local copy of `descriptor.proto`), its types
485///   must resolve to the local module, not externally.
486///
487/// Currently internal-only — there is no `CodeGenConfig` field for
488/// user-provided file-level mappings. The user-facing `extern_path` API
489/// remains package-prefix keyed; per-file or per-type overrides may be added
490/// later as a public feature if a concrete need arises.
491pub(crate) fn effective_file_extern_paths(
492    files_to_generate: &[String],
493    config: &CodeGenConfig,
494) -> Vec<(String, String)> {
495    // (proto file path, proto package, Rust module root). The package is
496    // recorded alongside the file so the user-override suppression check
497    // is per-file: a `.google.protobuf.compiler` extern_path covers only
498    // `plugin.proto`, while `.google.protobuf` covers both.
499    const DESCRIPTOR_FILES: [(&str, &str, &str); 2] = [
500        (
501            "google/protobuf/descriptor.proto",
502            "google.protobuf",
503            "::buffa_descriptor::generated::descriptor",
504        ),
505        (
506            "google/protobuf/compiler/plugin.proto",
507            "google.protobuf.compiler",
508            "::buffa_descriptor::generated::compiler",
509        ),
510    ];
511
512    DESCRIPTOR_FILES
513        .into_iter()
514        .filter(|(proto_file, package, _)| {
515            // Yield to a user package-level extern_path that already covers
516            // this file's package: anyone who wrote
517            // `extern_path(".google.protobuf", "::my_crate")` (or a
518            // sub-package mapping) today routes these types to their crate;
519            // the auto-injected file-level mapping must not silently
520            // outrank it.
521            if context::resolve_extern_prefix(package, &config.extern_paths).is_some() {
522                return false;
523            }
524            // Don't externalize a file we're generating locally.
525            !files_to_generate.iter().any(|f| f == proto_file)
526        })
527        .map(|(proto_file, _, rust_module)| (proto_file.to_string(), rust_module.to_string()))
528        .collect()
529}
530
531/// Generate Rust source files from a set of file descriptors.
532///
533/// `files_to_generate` is the set of file names that were explicitly requested
534/// (matching `CodeGeneratorRequest.file_to_generate`). Descriptors for
535/// dependencies may be present in `file_descriptors` but won't produce output
536/// files unless they appear in `files_to_generate`.
537///
538/// Each `.proto` emits up to five content files (kinds with no content
539/// are omitted); each distinct package emits one `<pkg>.mod.rs`
540/// stitcher. Packages are processed in sorted order for deterministic
541/// output.
542pub fn generate(
543    file_descriptors: &[FileDescriptorProto],
544    files_to_generate: &[String],
545    config: &CodeGenConfig,
546) -> Result<Vec<GeneratedFile>, CodeGenError> {
547    let ctx = context::CodeGenContext::for_generate(file_descriptors, files_to_generate, config);
548
549    // Group requested files by package. BTreeMap → deterministic output order.
550    let mut by_package: std::collections::BTreeMap<String, Vec<&FileDescriptorProto>> =
551        std::collections::BTreeMap::new();
552    for file_name in files_to_generate {
553        let file_desc = file_descriptors
554            .iter()
555            .find(|f| f.name.as_deref() == Some(file_name.as_str()))
556            .ok_or_else(|| CodeGenError::FileNotFound(file_name.clone()))?;
557        let pkg = file_desc.package.as_deref().unwrap_or("").to_string();
558        by_package.entry(pkg).or_default().push(file_desc);
559    }
560
561    let mut output = Vec::new();
562    for (package, files) in by_package {
563        generate_package(&ctx, &package, &files, &mut output)?;
564    }
565
566    Ok(output)
567}
568
569/// Generate a module tree that assembles per-package `.mod.rs` files into
570/// nested `pub mod` blocks matching the protobuf package hierarchy.
571///
572/// Each entry is a `(mod_file_name, package)` pair where `package` is the
573/// dot-separated protobuf package name (e.g., `"google.api"`) and
574/// `mod_file_name` is the corresponding `<pkg>.mod.rs` (only
575/// [`GeneratedFileKind::PackageMod`] outputs need wiring; per-proto
576/// content files are reached via `include!` from the stitcher).
577///
578/// `include_mode` controls how `include!` paths are emitted.
579///
580/// `emit_inner_allow` adds a `#![allow(...)]` inner attribute at the top —
581/// valid when the output is used directly as a module file (`mod.rs`),
582/// invalid when consumed via `include!`.
583pub fn generate_module_tree<F: AsRef<str>, P: AsRef<str>>(
584    entries: &[(F, P)],
585    include_mode: IncludeMode<'_>,
586    emit_inner_allow: bool,
587) -> String {
588    use std::collections::BTreeMap;
589    use std::fmt::Write;
590
591    use crate::idents::escape_mod_ident;
592
593    #[derive(Default)]
594    struct ModNode {
595        files: Vec<String>,
596        children: BTreeMap<String, Self>,
597    }
598
599    let mut root = ModNode::default();
600
601    for (file_name, package) in entries {
602        let package = package.as_ref();
603        let pkg_parts: Vec<&str> = if package.is_empty() {
604            vec![]
605        } else {
606            package.split('.').collect()
607        };
608
609        let mut node = &mut root;
610        for seg in &pkg_parts {
611            node = node.children.entry(seg.to_string()).or_default();
612        }
613        node.files.push(file_name.as_ref().to_string());
614    }
615
616    let lints = ALLOW_LINTS.join(", ");
617    let mut out = String::new();
618    let _ = writeln!(out, "// @generated by buffa-codegen. DO NOT EDIT.");
619    if emit_inner_allow {
620        let _ = writeln!(out, "#![allow({lints})]");
621    }
622    let _ = writeln!(out);
623
624    fn emit(out: &mut String, node: &ModNode, depth: usize, mode: IncludeMode<'_>, lints: &str) {
625        let indent = "    ".repeat(depth);
626
627        for file in &node.files {
628            match mode {
629                IncludeMode::Relative(prefix) => {
630                    let _ = writeln!(out, r#"{indent}include!("{prefix}{file}");"#);
631                }
632                IncludeMode::OutDir => {
633                    let _ = writeln!(
634                        out,
635                        r#"{indent}include!(concat!(env!("OUT_DIR"), "/{file}"));"#
636                    );
637                }
638            }
639        }
640
641        for (name, child) in &node.children {
642            let escaped = escape_mod_ident(name);
643            let _ = writeln!(out, "{indent}#[allow({lints})]");
644            let _ = writeln!(out, "{indent}pub mod {escaped} {{");
645            let _ = writeln!(out, "{indent}    use super::*;");
646            emit(out, child, depth + 1, mode, lints);
647            let _ = writeln!(out, "{indent}}}");
648        }
649    }
650
651    emit(&mut out, &root, 0, include_mode, &lints);
652    out
653}
654
655/// How [`generate_module_tree`] emits `include!` paths.
656#[derive(Debug, Clone, Copy)]
657pub enum IncludeMode<'a> {
658    /// `include!("<prefix><file>")` — relative to the including file.
659    /// Prefix is typically `""` or `"gen/"`.
660    Relative(&'a str),
661    /// `include!(concat!(env!("OUT_DIR"), "/<file>"))` — for build.rs output.
662    OutDir,
663}
664
665/// Validate one input descriptor before generating code for it.
666///
667/// Checks, in one walk of the message tree:
668///
669/// - **Reserved field names**: no field starts with `__buffa_` (would clash
670///   with generated `__buffa_unknown_fields` / `__buffa_cached_size`).
671/// - **Module-name conflicts**: no two sibling messages snake_case to the
672///   same module name (e.g. `HTTPRequest` vs `HttpRequest`).
673/// - **Reserved sentinel**: no package segment, message-module name, or
674///   file-level enum name equals [`SENTINEL_MOD`](context::SENTINEL_MOD).
675///   Ancillary types live under `pkg::__buffa::…`; a proto element
676///   emitting an item named `__buffa` at package root would produce
677///   E0428 against `pub mod __buffa`. This is the only name buffa
678///   reserves in user namespace.
679fn validate_file(file: &FileDescriptorProto) -> Result<(), CodeGenError> {
680    use std::collections::HashMap;
681
682    let sentinel = context::SENTINEL_MOD;
683    let package = file.package.as_deref().unwrap_or("");
684    if package.split('.').any(|seg| seg == sentinel) {
685        return Err(CodeGenError::ReservedModuleName {
686            name: sentinel.to_string(),
687            location: format!("package '{package}'"),
688        });
689    }
690    // File-level enums emit `pub enum <name>` at package root with the
691    // proto name preserved verbatim (no PascalCase normalization), so a
692    // proto `enum __buffa` would land beside `pub mod __buffa`. Nested
693    // enums live inside their owner message's module and cannot collide
694    // with the package-root sentinel, so only file-level is checked.
695    for enum_type in &file.enum_type {
696        let name = enum_type.name.as_deref().unwrap_or("");
697        if name == sentinel {
698            return Err(CodeGenError::ReservedModuleName {
699                name: sentinel.to_string(),
700                location: format!("enum '{package}.{name}'"),
701            });
702        }
703    }
704
705    fn walk(
706        messages: &[crate::generated::descriptor::DescriptorProto],
707        scope: &str,
708        sentinel: &str,
709    ) -> Result<(), CodeGenError> {
710        // snake_case module name → original proto name (for conflict diag).
711        let mut seen: HashMap<String, &str> = HashMap::new();
712
713        for msg in messages {
714            let name = msg.name.as_deref().unwrap_or("");
715            let fqn = if scope.is_empty() {
716                name.to_string()
717            } else {
718                format!("{scope}.{name}")
719            };
720
721            for field in &msg.field {
722                if let Some(fname) = &field.name {
723                    if fname.starts_with("__buffa_") {
724                        return Err(CodeGenError::ReservedFieldName {
725                            message_name: fqn,
726                            field_name: fname.clone(),
727                        });
728                    }
729                }
730            }
731
732            let module_name = crate::oneof::to_snake_case(name);
733            if module_name == sentinel {
734                return Err(CodeGenError::ReservedModuleName {
735                    name: sentinel.to_string(),
736                    location: format!("message '{fqn}'"),
737                });
738            }
739            if let Some(existing) = seen.get(&module_name) {
740                return Err(CodeGenError::ModuleNameConflict {
741                    scope: scope.to_string(),
742                    name_a: existing.to_string(),
743                    name_b: name.to_string(),
744                    module_name,
745                });
746            }
747            seen.insert(module_name, name);
748
749            walk(&msg.nested_type, &fqn, sentinel)?;
750        }
751        Ok(())
752    }
753
754    walk(&file.message_type, package, sentinel)
755}
756
757/// Per-proto content streams plus the file stem, ready to be formatted.
758struct ProtoContent {
759    stem: String,
760    owned: TokenStream,
761    view: TokenStream,
762    oneof: TokenStream,
763    view_oneof: TokenStream,
764    ext: TokenStream,
765    /// Candidate `pub use` re-exports targeting the package root (top-level
766    /// view structs, file-level extension consts). Filtered against the
767    /// package-wide root namespace in [`generate_package_mod`] — the package
768    /// can span multiple `.proto` files, so collisions are only knowable at
769    /// the stitcher level.
770    root_reexports: Vec<message::ReexportCandidate>,
771}
772
773/// Generate the per-`.proto` content token streams for one input file.
774/// Each ancillary kind that has no content yields an empty stream and
775/// is dropped at the file-emission stage.
776fn generate_proto_content(
777    ctx: &context::CodeGenContext,
778    current_package: &str,
779    file: &FileDescriptorProto,
780    reg: &mut message::RegistryPaths,
781) -> Result<ProtoContent, CodeGenError> {
782    use crate::idents::make_field_ident;
783    use crate::message::MessageOutput;
784
785    validate_file(file)?;
786
787    let resolver = imports::ImportResolver::new();
788    let features = crate::features::for_file(file);
789
790    let mut owned = TokenStream::new();
791    let mut view = TokenStream::new();
792    let mut oneof = TokenStream::new();
793    let mut view_oneof = TokenStream::new();
794    let mut ext = TokenStream::new();
795    let mut root_reexports: Vec<message::ReexportCandidate> = Vec::new();
796    let sentinel = make_field_ident(context::SENTINEL_MOD);
797
798    for enum_type in &file.enum_type {
799        let enum_rust_name = enum_type.name.as_deref().unwrap_or("");
800        let enum_fqn = if current_package.is_empty() {
801            enum_rust_name.to_string()
802        } else {
803            format!("{}.{}", current_package, enum_rust_name)
804        };
805        owned.extend(enumeration::generate_enum(
806            ctx,
807            enum_type,
808            enum_rust_name,
809            &enum_fqn,
810            &features,
811            &resolver,
812        )?);
813    }
814
815    for message_type in &file.message_type {
816        let top_level_name = message_type.name.as_deref().unwrap_or("");
817        let proto_fqn = if current_package.is_empty() {
818            top_level_name.to_string()
819        } else {
820            format!("{}.{}", current_package, top_level_name)
821        };
822        let MessageOutput {
823            owned_top,
824            owned_mod,
825            oneof_tree: msg_oneof,
826            view_tree: msg_view,
827            view_oneof_tree: msg_view_oneof,
828            reg: msg_reg,
829        } = message::generate_message(
830            ctx,
831            message_type,
832            current_package,
833            top_level_name,
834            &proto_fqn,
835            &features,
836            &resolver,
837        )?;
838        owned.extend(owned_top);
839        let mod_ident = make_field_ident(&crate::oneof::to_snake_case(top_level_name));
840        for p in msg_reg.json_ext {
841            reg.json_ext.push(quote! { #mod_ident :: #p });
842        }
843        for p in msg_reg.text_ext {
844            reg.text_ext.push(quote! { #mod_ident :: #p });
845        }
846        reg.json_any.extend(msg_reg.json_any);
847        reg.text_any.extend(msg_reg.text_any);
848
849        if !owned_mod.is_empty() {
850            owned.extend(quote! {
851                pub mod #mod_ident {
852                    #[allow(unused_imports)]
853                    use super::*;
854                    #owned_mod
855                }
856            });
857        }
858        oneof.extend(msg_oneof);
859        view.extend(msg_view);
860        view_oneof.extend(msg_view_oneof);
861
862        // Top-level message view → re-export at package root. The leading
863        // `self::` is load-bearing: when consumers nest packages with
864        // `pub mod a { use super::*; pub mod a_b { use super::*; … } }`
865        // (`buffa-build`'s `_include.rs` does this), a parent package's
866        // `__buffa` is in scope via the glob, and Rust's import-resolution
867        // pass treats a glob-imported name as ambiguous against a
868        // **macro-expanded** local one (the `pub mod __buffa` block arrives
869        // via `include!()`), even though a non-macro local definition would
870        // shadow the glob — see rustc E0659. `self::` resolves it
871        // deterministically. `#[doc(inline)]` makes rustdoc render the type's
872        // full page at the natural path instead of a "Re-export of …" stub.
873        if ctx.config.generate_views {
874            let view_ident = format_ident!("{top_level_name}View");
875            root_reexports.push(message::ReexportCandidate {
876                name: view_ident.to_string(),
877                tokens: feature_gates::cfg_block(
878                    quote! {
879                        #[doc(inline)]
880                        pub use self :: #sentinel :: view :: #view_ident;
881                    },
882                    ctx.config.feature_gates().views,
883                ),
884            });
885        }
886    }
887
888    // File-level `extend` declarations → `__buffa::ext::` (depth 2).
889    let (file_ext_tokens, file_ext_json, file_ext_text) = extension::generate_extensions(
890        ctx,
891        &file.extension,
892        current_package,
893        2,
894        &features,
895        current_package,
896    )?;
897    ext.extend(file_ext_tokens);
898    for id in file_ext_json {
899        reg.json_ext.push(quote! { #sentinel :: ext :: #id });
900    }
901    for id in file_ext_text {
902        reg.text_ext.push(quote! { #sentinel :: ext :: #id });
903    }
904    // File-level extension consts → re-export at package root. `self::` and
905    // `#[doc(inline)]` for the same reasons as the view re-exports above.
906    for ext_field in &file.extension {
907        let const_ident = extension::extension_const_ident(ext_field.name.as_deref().unwrap_or(""));
908        root_reexports.push(message::ReexportCandidate {
909            name: const_ident.to_string(),
910            tokens: quote! {
911                #[doc(inline)]
912                pub use self :: #sentinel :: ext :: #const_ident;
913            },
914        });
915    }
916
917    Ok(ProtoContent {
918        stem: proto_path_to_stem(file.name.as_deref().unwrap_or("")),
919        owned,
920        view,
921        oneof,
922        view_oneof,
923        ext,
924        root_reexports,
925    })
926}
927
928/// Per-section token streams for one package, ready for the stitcher.
929///
930/// In per-file mode each section holds `include!("<stem>...rs")` calls; in
931/// `file_per_package` mode each holds the actual generated items.
932#[derive(Default)]
933struct PackageSections {
934    owned: Vec<TokenStream>,
935    view: Vec<TokenStream>,
936    oneof: Vec<TokenStream>,
937    view_oneof: Vec<TokenStream>,
938    ext: Vec<TokenStream>,
939}
940
941impl PackageSections {
942    /// Append one proto file's generated items in-line.
943    ///
944    /// Empty streams are skipped so each section's emptiness reflects
945    /// "the package has no content of this kind" — symmetric with the
946    /// per-file branch that filters at file-emission time.
947    fn push_inline(&mut self, pc: ProtoContent) {
948        let push_if_nonempty = |dst: &mut Vec<TokenStream>, ts: TokenStream| {
949            if !ts.is_empty() {
950                dst.push(ts);
951            }
952        };
953        push_if_nonempty(&mut self.owned, pc.owned);
954        push_if_nonempty(&mut self.view, pc.view);
955        push_if_nonempty(&mut self.oneof, pc.oneof);
956        push_if_nonempty(&mut self.view_oneof, pc.view_oneof);
957        push_if_nonempty(&mut self.ext, pc.ext);
958    }
959}
960
961/// Generate all output files for one proto package: up to five content
962/// files per `.proto` (empty ancillary kinds are skipped) plus one
963/// `<pkg>.mod.rs` stitcher, or a single `<pkg>.rs` when
964/// [`CodeGenConfig::file_per_package`] is set.
965fn generate_package(
966    ctx: &context::CodeGenContext,
967    current_package: &str,
968    files: &[&FileDescriptorProto],
969    out: &mut Vec<GeneratedFile>,
970) -> Result<(), CodeGenError> {
971    // Registry paths are package-root-relative; `register_types` lives at
972    // `__buffa::register_types` (one level deep), so each path gets a
973    // single `super::` prefix when emitted into the fn body.
974    let mut reg = message::RegistryPaths::default();
975    let mut root_reexports: Vec<message::ReexportCandidate> = Vec::new();
976
977    let sections = if ctx.config.file_per_package {
978        let mut sections = PackageSections::default();
979        for file in files {
980            let mut pc = generate_proto_content(ctx, current_package, file, &mut reg)?;
981            root_reexports.append(&mut pc.root_reexports);
982            sections.push_inline(pc);
983        }
984        sections
985    } else {
986        let mut sections = PackageSections::default();
987        for file in files {
988            let mut pc = generate_proto_content(ctx, current_package, file, &mut reg)?;
989            root_reexports.append(&mut pc.root_reexports);
990            let source = file.name.as_deref().unwrap_or("");
991            let stem = pc.stem;
992
993            // Empty ancillary token streams are skipped — neither the
994            // content file nor the stitcher's `include!` is emitted.
995            let emit = |suffix: &str,
996                        kind: GeneratedFileKind,
997                        tokens: TokenStream,
998                        section: &mut Vec<TokenStream>,
999                        out: &mut Vec<GeneratedFile>|
1000             -> Result<(), CodeGenError> {
1001                if tokens.is_empty() {
1002                    return Ok(());
1003                }
1004                let name = format!("{stem}{suffix}.rs");
1005                section.push(quote! { include!(#name); });
1006                out.push(GeneratedFile {
1007                    name,
1008                    package: current_package.to_string(),
1009                    kind,
1010                    content: format_tokens(tokens, source)?,
1011                });
1012                Ok(())
1013            };
1014            emit(
1015                "",
1016                GeneratedFileKind::Owned,
1017                pc.owned,
1018                &mut sections.owned,
1019                out,
1020            )?;
1021            emit(
1022                ".__view",
1023                GeneratedFileKind::View,
1024                pc.view,
1025                &mut sections.view,
1026                out,
1027            )?;
1028            emit(
1029                ".__oneof",
1030                GeneratedFileKind::Oneof,
1031                pc.oneof,
1032                &mut sections.oneof,
1033                out,
1034            )?;
1035            emit(
1036                ".__view_oneof",
1037                GeneratedFileKind::ViewOneof,
1038                pc.view_oneof,
1039                &mut sections.view_oneof,
1040                out,
1041            )?;
1042            emit(
1043                ".__ext",
1044                GeneratedFileKind::Ext,
1045                pc.ext,
1046                &mut sections.ext,
1047                out,
1048            )?;
1049        }
1050        sections
1051    };
1052
1053    let reexport_block = surviving_root_reexports(ctx, files, &reg, root_reexports);
1054
1055    out.push(GeneratedFile {
1056        name: if ctx.config.file_per_package {
1057            package_to_filename(current_package)
1058        } else {
1059            package_to_mod_filename(current_package)
1060        },
1061        package: current_package.to_string(),
1062        kind: GeneratedFileKind::PackageMod,
1063        content: generate_package_mod(ctx, &sections, &reg, &reexport_block)?,
1064    });
1065
1066    Ok(())
1067}
1068
1069/// Filter the candidate package-root re-exports against the package's
1070/// existing root namespace and against each other, returning the surviving
1071/// `pub use` lines.
1072///
1073/// The package root is shared across every `.proto` file in the package, so
1074/// the occupied-name set must be built from *all* of them — a top-level
1075/// message named `FooView` declared in `a.proto` would shadow `Foo`'s view
1076/// re-export from `b.proto`.
1077fn surviving_root_reexports(
1078    ctx: &context::CodeGenContext,
1079    files: &[&FileDescriptorProto],
1080    reg: &message::RegistryPaths,
1081    mut candidates: Vec<message::ReexportCandidate>,
1082) -> TokenStream {
1083    use crate::idents::make_field_ident;
1084    use std::collections::BTreeSet;
1085
1086    // Names already occupied at package root by real items: top-level
1087    // messages, enums, message snake_case modules, and the `__buffa`
1088    // sentinel itself. File-level extension consts live in
1089    // `__buffa::ext::`, not at the root, so they are *candidates* (added
1090    // by `generate_proto_content`) rather than occupants.
1091    let mut occupied: BTreeSet<String> = BTreeSet::new();
1092    occupied.insert(context::SENTINEL_MOD.to_string());
1093    for file in files {
1094        for m in &file.message_type {
1095            let name = m.name.as_deref().unwrap_or("");
1096            occupied.insert(name.to_string());
1097            occupied.insert(crate::oneof::to_snake_case(name));
1098        }
1099        for e in &file.enum_type {
1100            occupied.insert(e.name.as_deref().unwrap_or("").to_string());
1101        }
1102    }
1103
1104    // `register_types`, when emitted, lives at `__buffa::register_types`.
1105    // `self::` and `#[doc(inline)]` for the same reasons as the view
1106    // re-exports above. Same `any(json, text)` gate as the fn itself.
1107    if ctx.config.emit_register_fn && !reg.is_empty() {
1108        let sentinel = make_field_ident(context::SENTINEL_MOD);
1109        let json_or_text = ctx.config.feature_gates().json_or_text();
1110        candidates.push(message::ReexportCandidate {
1111            name: "register_types".to_string(),
1112            tokens: feature_gates::cfg_block_any(
1113                quote! {
1114                    #[doc(inline)]
1115                    pub use self :: #sentinel :: register_types;
1116                },
1117                &json_or_text,
1118            ),
1119        });
1120    }
1121
1122    message::emit_surviving_reexports(candidates, &occupied)
1123}
1124
1125/// Render the per-package stitcher: owned items at root plus the
1126/// `__buffa::{view,oneof,ext,...}` module wrappers, followed by the
1127/// surviving package-root `pub use` re-exports.
1128fn generate_package_mod(
1129    ctx: &context::CodeGenContext,
1130    sections: &PackageSections,
1131    reg: &message::RegistryPaths,
1132    root_reexports: &TokenStream,
1133) -> Result<String, CodeGenError> {
1134    use crate::idents::make_field_ident;
1135
1136    let owned = &sections.owned;
1137    let view = &sections.view;
1138    let view_oneof = &sections.view_oneof;
1139    let oneof = &sections.oneof;
1140    let ext = &sections.ext;
1141
1142    // Each ancillary module is emitted only when its section has
1143    // content. The natural-path re-exports outside `__buffa` target
1144    // these modules — they are emitted only when their target items
1145    // exist, so the conditions align and re-exports never reference
1146    // a missing module.
1147    let view_oneof_mod = if !view_oneof.is_empty() {
1148        quote! {
1149            pub mod oneof {
1150                #[allow(unused_imports)]
1151                use super::*;
1152                #(#view_oneof)*
1153            }
1154        }
1155    } else {
1156        TokenStream::new()
1157    };
1158
1159    // `view_oneof` is only populated for messages that have oneofs, and
1160    // every message also contributes to `view`, so `!view.is_empty()` is
1161    // sufficient — `view_oneof` non-empty implies `view` non-empty.
1162    debug_assert!(view_oneof.is_empty() || !view.is_empty());
1163    let view_mod = if ctx.config.generate_views && !view.is_empty() {
1164        feature_gates::cfg_block(
1165            quote! {
1166                pub mod view {
1167                    #[allow(unused_imports)]
1168                    use super::*;
1169                    #(#view)*
1170                    #view_oneof_mod
1171                }
1172            },
1173            ctx.config.feature_gates().views,
1174        )
1175    } else {
1176        TokenStream::new()
1177    };
1178
1179    let oneof_mod = if !oneof.is_empty() {
1180        quote! {
1181            pub mod oneof {
1182                #[allow(unused_imports)]
1183                use super::*;
1184                #(#oneof)*
1185            }
1186        }
1187    } else {
1188        TokenStream::new()
1189    };
1190
1191    let ext_mod = if !ext.is_empty() {
1192        quote! {
1193            pub mod ext {
1194                #[allow(unused_imports)]
1195                use super::*;
1196                #(#ext)*
1197            }
1198        }
1199    } else {
1200        TokenStream::new()
1201    };
1202
1203    let register_fn = if ctx.config.emit_register_fn && !reg.is_empty() {
1204        let gates = ctx.config.feature_gates();
1205        // When the gated consts (`__*_JSON_ANY` / `__*_TEXT_ANY`) are
1206        // `#[cfg(feature = "...")]`, each registration statement that
1207        // references them gets the same gate. `#[cfg]` on a statement is
1208        // allowed; the call disappears with the const.
1209        let json_regs = reg
1210            .json_any
1211            .iter()
1212            .map(|p| {
1213                feature_gates::cfg_block(quote! { reg.register_json_any(super::#p); }, gates.json)
1214            })
1215            .chain(reg.json_ext.iter().map(|p| {
1216                feature_gates::cfg_block(quote! { reg.register_json_ext(super::#p); }, gates.json)
1217            }));
1218        let text_regs = reg
1219            .text_any
1220            .iter()
1221            .map(|p| {
1222                feature_gates::cfg_block(quote! { reg.register_text_any(super::#p); }, gates.text)
1223            })
1224            .chain(reg.text_ext.iter().map(|p| {
1225                feature_gates::cfg_block(quote! { reg.register_text_ext(super::#p); }, gates.text)
1226            }));
1227        // When gating, a feature subset may leave one bucket of statements
1228        // cfg'd out while the other survives — `reg` is still used. But if
1229        // `register_types` itself is gated on `any(json, text)` (below),
1230        // the only reachable bodies have at least one statement, so `reg`
1231        // can't be unused. Keep `#[allow(unused_variables)]` defensively
1232        // anyway: it's harmless, and the alternative — proving the
1233        // invariant holds across future statement-shape changes — is
1234        // brittle.
1235        let allow_unused = if ctx.config.gate_impls_on_crate_features {
1236            quote! { #[allow(unused_variables)] }
1237        } else {
1238            quote! {}
1239        };
1240        // The fn is useless without at least one of the gated modes that
1241        // populate it — and `::buffa::type_registry::TypeRegistry` may
1242        // become feature-gated in the runtime in a future release. Gate the
1243        // fn on `any(...)` of whichever modes are active so it disappears
1244        // alongside the last entry.
1245        feature_gates::cfg_block_any(
1246            quote! {
1247                /// Register this package's `Any` type entries and extension entries.
1248                #allow_unused
1249                pub fn register_types(reg: &mut ::buffa::type_registry::TypeRegistry) {
1250                    #(#json_regs)*
1251                    #(#text_regs)*
1252                }
1253            },
1254            &gates.json_or_text(),
1255        )
1256    } else {
1257        TokenStream::new()
1258    };
1259
1260    let sentinel = make_field_ident(context::SENTINEL_MOD);
1261    // The whole `pub mod __buffa { ... }` wrapper is itself omitted
1262    // when none of its inner modules or `register_types` exist.
1263    let buffa_mod = if view_mod.is_empty()
1264        && oneof_mod.is_empty()
1265        && ext_mod.is_empty()
1266        && register_fn.is_empty()
1267    {
1268        TokenStream::new()
1269    } else {
1270        let allow = allow_lints_attr();
1271        quote! {
1272            #allow
1273            pub mod #sentinel {
1274                #[allow(unused_imports)]
1275                use super::*;
1276                #view_mod
1277                #oneof_mod
1278                #ext_mod
1279                #register_fn
1280            }
1281        }
1282    };
1283
1284    let tokens = quote! {
1285        #(#owned)*
1286        #buffa_mod
1287        #root_reexports
1288    };
1289
1290    format_tokens(tokens, "")
1291}
1292
1293/// Format a token stream into a generated-file string with the standard
1294/// header comment.
1295fn format_tokens(tokens: TokenStream, source: &str) -> Result<String, CodeGenError> {
1296    let syntax_tree =
1297        syn::parse2::<syn::File>(tokens).map_err(|e| CodeGenError::InvalidSyntax(e.to_string()))?;
1298    let formatted = prettyplease::unparse(&syntax_tree);
1299    let source_line = if source.is_empty() {
1300        String::new()
1301    } else {
1302        format!("// source: {source}\n")
1303    };
1304    Ok(format!(
1305        "// @generated by buffa-codegen. DO NOT EDIT.\n{source_line}\n{formatted}"
1306    ))
1307}
1308
1309/// Convert a proto package name to its `.mod.rs` stitcher filename.
1310///
1311/// e.g., `"google.protobuf"` → `"google.protobuf.mod.rs"`. The unnamed
1312/// package uses the [`SENTINEL_MOD`](context::SENTINEL_MOD) name as its
1313/// filename stem — `package __buffa;` is already rejected by
1314/// `validate_file`, so the unnamed-package stitcher cannot
1315/// collide with any real package's.
1316pub fn package_to_mod_filename(package: &str) -> String {
1317    if package.is_empty() {
1318        format!("{}.mod.rs", context::SENTINEL_MOD)
1319    } else {
1320        format!("{package}.mod.rs")
1321    }
1322}
1323
1324/// Convert a proto package name to its [`file_per_package`] output filename.
1325///
1326/// e.g., `"google.protobuf"` → `"google.protobuf.rs"`. The unnamed
1327/// package uses [`SENTINEL_MOD`](context::SENTINEL_MOD) — same
1328/// collision-avoidance as [`package_to_mod_filename`].
1329///
1330/// [`file_per_package`]: CodeGenConfig::file_per_package
1331pub fn package_to_filename(package: &str) -> String {
1332    if package.is_empty() {
1333        format!("{}.rs", context::SENTINEL_MOD)
1334    } else {
1335        format!("{package}.rs")
1336    }
1337}
1338
1339/// Convert a `.proto` file path to its content-file stem.
1340///
1341/// e.g., `"google/protobuf/timestamp.proto"` → `"google.protobuf.timestamp"`.
1342/// Content files append `""`, `".__view"`, `".__oneof"`,
1343/// `".__view_oneof"`, or `".__ext"` plus `".rs"` — emitted only for
1344/// kinds with non-empty content.
1345pub fn proto_path_to_stem(proto_path: &str) -> String {
1346    let without_ext = proto_path.strip_suffix(".proto").unwrap_or(proto_path);
1347    without_ext.replace('/', ".")
1348}
1349
1350/// Merge downstream [`Companion`](GeneratedFileKind::Companion) files into
1351/// the per-package stitcher produced by [`generate`].
1352///
1353/// For each companion file this function locates the
1354/// [`PackageMod`](GeneratedFileKind::PackageMod) entry in `files` with a
1355/// matching package and appends `include!("<name>");` at file scope after
1356/// buffa's own output — at package root, alongside the owned message types,
1357/// not under `__buffa::`. The companion files themselves are appended to
1358/// `files` so that build integrations can write everything to disk in one
1359/// pass.
1360///
1361/// **Call this once per build**; it does not deduplicate, so a second call
1362/// with the same companions emits a second `include!` for each, which fails
1363/// to compile downstream with a duplicate-definition error.
1364///
1365/// `name` must be a bare-sibling filename — the same convention buffa uses
1366/// for its own `include!` calls, so it resolves relative to the stitcher
1367/// without any `OUT_DIR` prefix. Names must not contain `"`, `\`, `/`, or
1368/// newlines (the function `debug_assert!`s this in debug builds), and must
1369/// not collide with any of buffa's own generated filenames for the same
1370/// package (`<stem>.rs`, `<stem>.__view.rs`, etc.) — pick an unused suffix
1371/// such as `<stem>.__myplugin.rs`.
1372///
1373/// Companion files with no matching `PackageMod` (e.g. for a package buffa
1374/// did not generate any output for) are still appended to `files` but no
1375/// `include!` is emitted; the caller is responsible for wiring them up. If
1376/// you don't expect orphans, check that every companion's `package` appears
1377/// in `files` as a `PackageMod` after calling.
1378pub fn apply_companions(files: &mut Vec<GeneratedFile>, companions: Vec<GeneratedFile>) {
1379    for comp in &companions {
1380        debug_assert!(
1381            !comp.name.contains(['"', '\\', '/', '\n']),
1382            "companion file name {:?} contains a character that would break \
1383             the generated include!() literal or its bare-sibling resolution",
1384            comp.name
1385        );
1386        if let Some(pkg_mod) = files
1387            .iter_mut()
1388            .find(|f| f.kind == GeneratedFileKind::PackageMod && f.package == comp.package)
1389        {
1390            pkg_mod
1391                .content
1392                .push_str(&format!("include!(\"{}\");\n", comp.name));
1393        }
1394    }
1395    files.extend(companions);
1396}
1397
1398/// Code generation error.
1399#[derive(Debug, Clone, thiserror::Error)]
1400#[non_exhaustive]
1401pub enum CodeGenError {
1402    /// A required field was absent in a descriptor.
1403    ///
1404    /// The `&'static str` names the missing field for diagnostics.
1405    #[error("missing required descriptor field: {0}")]
1406    MissingField(&'static str),
1407    /// A resolved type path string could not be parsed as a Rust type.
1408    #[error("invalid Rust type path: '{0}'")]
1409    InvalidTypePath(String),
1410    /// The accumulated `TokenStream` failed to parse as valid Rust syntax.
1411    #[error("generated code failed to parse as Rust: {0}")]
1412    InvalidSyntax(String),
1413    /// A requested file was not present in the descriptor set.
1414    #[error("file_to_generate '{0}' not found in descriptor set")]
1415    FileNotFound(String),
1416    /// Unexpected descriptor state (e.g. a map entry or oneof that cannot be
1417    /// resolved to a known descriptor field).
1418    #[error("codegen error: {0}")]
1419    Other(String),
1420    /// A proto field name uses the `__buffa_` reserved prefix, which would
1421    /// conflict with buffa's internal generated fields.
1422    #[error(
1423        "reserved field name '{field_name}' in message '{message_name}': \
1424             proto field names starting with '__buffa_' conflict with buffa's \
1425             internal fields"
1426    )]
1427    ReservedFieldName {
1428        message_name: String,
1429        field_name: String,
1430    },
1431    /// Two sibling messages produce the same Rust module name after
1432    /// snake_case conversion (e.g., `HTTPRequest` and `HttpRequest` both
1433    /// become `pub mod http_request`).
1434    #[error(
1435        "module name conflict in '{scope}': messages '{name_a}' and '{name_b}' \
1436         both produce module '{module_name}'"
1437    )]
1438    ModuleNameConflict {
1439        scope: String,
1440        name_a: String,
1441        name_b: String,
1442        module_name: String,
1443    },
1444    /// A proto package segment, message name, or file-level enum name
1445    /// would emit a Rust item matching the reserved sentinel `__buffa`.
1446    ///
1447    /// This is the only name buffa reserves in user namespace. Resolve by
1448    /// renaming the proto element.
1449    #[error(
1450        "reserved name '{name}' at {location}: this name is reserved for \
1451         buffa's generated ancillary types (views, oneof enums, \
1452         extensions). Rename the proto element."
1453    )]
1454    ReservedModuleName { name: String, location: String },
1455    /// The input contains a message with `option message_set_wire_format = true`
1456    /// but [`CodeGenConfig::allow_message_set`] was not set.
1457    #[error(
1458        "message '{message_name}' uses `option message_set_wire_format = true` \
1459         but CodeGenConfig::allow_message_set is false; MessageSet is a legacy \
1460         wire format — set allow_message_set(true) if this is intentional"
1461    )]
1462    MessageSetNotSupported { message_name: String },
1463    /// A custom attribute string configured via [`CodeGenConfig::type_attributes`],
1464    /// [`CodeGenConfig::field_attributes`], or [`CodeGenConfig::message_attributes`]
1465    /// could not be parsed as a Rust attribute.
1466    #[error(
1467        "invalid custom attribute for path '{path}': '{attribute}' is not a valid \
1468         Rust attribute ({detail})"
1469    )]
1470    InvalidCustomAttribute {
1471        path: String,
1472        attribute: String,
1473        detail: String,
1474    },
1475}
1476
1477#[cfg(test)]
1478mod tests;