Skip to main content

buffa_codegen/
lib.rs

1//! Shared code generation logic for buffa.
2//!
3//! This crate takes protobuf descriptors (`google.protobuf.FileDescriptorProto`,
4//! decoded from binary `FileDescriptorSet` data) and emits Rust source code
5//! that uses the `buffa` runtime.
6//!
7//! It is used by:
8//! - `protoc-gen-buffa` (protoc plugin)
9//! - `buffa-build` (build.rs integration)
10//!
11//! # Architecture
12//!
13//! The code generator is intentionally decoupled from how descriptors are
14//! obtained. It receives fully-resolved `FileDescriptorProto`s and produces
15//! Rust source strings. This means:
16//!
17//! - It doesn't parse `.proto` files.
18//! - It doesn't invoke `protoc`.
19//! - It doesn't do import resolution or name linking.
20//!
21//! All of that is handled upstream (by protoc, buf, or a future parser).
22
23pub(crate) mod comments;
24pub mod context;
25pub(crate) mod defaults;
26pub(crate) mod enumeration;
27pub(crate) mod extension;
28pub(crate) mod features;
29#[doc(hidden)]
30pub use buffa_descriptor::generated;
31pub mod idents;
32pub(crate) mod impl_message;
33pub(crate) mod impl_text;
34pub(crate) mod imports;
35pub(crate) mod message;
36pub(crate) mod oneof;
37pub(crate) mod view;
38
39use crate::generated::descriptor::FileDescriptorProto;
40use proc_macro2::TokenStream;
41use quote::{format_ident, quote};
42
43/// Lints suppressed on generated code at module boundaries.
44///
45/// Consumed by [`generate_module_tree`], the per-package `.mod.rs`
46/// stitcher, and `buffa-build`'s `_include.rs` writer. One list keeps
47/// them in sync.
48pub const ALLOW_LINTS: &[&str] = &[
49    "non_camel_case_types",
50    "dead_code",
51    "unused_imports",
52    // Cross-proto refs within the same package are emitted through the
53    // canonical `super::super::__buffa::view::…` path even though the
54    // target lives in the same generated module — using the bare name
55    // would resolve, but the canonical path is stable when a sibling
56    // proto defines a same-named natural-path re-export.
57    "unused_qualifications",
58    "clippy::derivable_impls",
59    "clippy::match_single_binding",
60    "clippy::uninlined_format_args",
61    "clippy::doc_lazy_continuation",
62    // A user `message View { message Inner }` produces
63    // `__buffa::view::view::InnerView`; harmless but trips this lint.
64    "clippy::module_inception",
65];
66
67/// Render [`ALLOW_LINTS`] as a `#[allow(…)]` attribute token stream.
68pub fn allow_lints_attr() -> TokenStream {
69    let lints: Vec<TokenStream> = ALLOW_LINTS
70        .iter()
71        .map(|l| syn::parse_str(l).expect("lint name parses as path"))
72        .collect();
73    quote! { #[allow( #(#lints),* )] }
74}
75
76/// One generated output file.
77///
78/// Each `.proto` produces five **content files** (`<stem>.rs`,
79/// `<stem>.__view.rs`, `<stem>.__oneof.rs`, `<stem>.__view_oneof.rs`,
80/// `<stem>.__ext.rs`) and each proto package produces one
81/// `<dotted.pkg>.mod.rs` **stitcher** that `include!`s the content files
82/// and authors the `pub mod __buffa { … }` ancillary tree.
83/// See `DESIGN.md` → "Generated code layout".
84///
85/// Consumers normally only need to wire up the
86/// [`GeneratedFileKind::PackageMod`] entries (one per package); the five
87/// per-proto content kinds are reached transitively via `include!` from
88/// the stitcher. Write all files to disk; build a module tree from only
89/// the `PackageMod` ones.
90///
91/// With [`CodeGenConfig::file_per_package`] set, the per-proto content
92/// kinds are not emitted at all — the single `<dotted.pkg>.rs` (still
93/// kind `PackageMod`) inlines what the stitcher would `include!`.
94#[derive(Debug)]
95pub struct GeneratedFile {
96    /// The output file path (e.g., `"my.pkg.foo.rs"` or `"my.pkg.mod.rs"`).
97    pub name: String,
98    /// The proto package this file belongs to.
99    pub package: String,
100    /// What this file contains. Build integrations only need to wire up
101    /// [`GeneratedFileKind::PackageMod`] files; everything else is reached
102    /// via `include!` from there.
103    pub kind: GeneratedFileKind,
104    /// The generated Rust source code.
105    pub content: String,
106}
107
108/// Kind of [`GeneratedFile`].
109///
110/// [`generate`] produces five per-proto content kinds — one each of
111/// [`Owned`](Self::Owned), [`View`](Self::View), [`Oneof`](Self::Oneof),
112/// [`ViewOneof`](Self::ViewOneof), and [`Ext`](Self::Ext) per input
113/// `.proto` file — plus one [`PackageMod`](Self::PackageMod) stitcher per
114/// package. Build integrations only need to wire up `PackageMod` entries;
115/// the per-proto content kinds are reached via `include!` from the stitcher
116/// and need only be written to disk alongside it. Under
117/// [`CodeGenConfig::file_per_package`] only `PackageMod` is emitted.
118///
119/// [`Companion`](Self::Companion) is the one kind *not* produced by
120/// [`generate`]: downstream code generators construct `Companion` files
121/// themselves and merge them into buffa's output via
122/// [`apply_companions`].
123///
124/// This enum is `#[non_exhaustive]` — match with a wildcard arm so new
125/// kinds can be added without a major version bump.
126#[derive(Debug, Clone, Copy, PartialEq, Eq)]
127#[non_exhaustive]
128pub enum GeneratedFileKind {
129    /// Owned message structs and enums (`<stem>.rs`).
130    Owned,
131    /// View structs (`<stem>.__view.rs`).
132    View,
133    /// Owned oneof enums (`<stem>.__oneof.rs`).
134    Oneof,
135    /// View oneof enums (`<stem>.__view_oneof.rs`).
136    ViewOneof,
137    /// File-level proto-extension consts (`<stem>.__ext.rs`) — the
138    /// `pub const` `ExtensionDescriptor` items generated from `extend`
139    /// blocks. Not to be confused with [`Companion`](Self::Companion),
140    /// which is unrelated downstream-supplied content.
141    Ext,
142    /// Per-package stitcher (`<dotted.pkg>.mod.rs`). The only file build
143    /// systems need to wire up directly.
144    PackageMod,
145    /// Extra per-proto content from a downstream code generator (service
146    /// stubs, extra trait impls, etc.) that travels with buffa's output.
147    ///
148    /// Not produced by [`generate`]. Construct these in your own generator
149    /// and pass them to [`apply_companions`], which appends an `include!`
150    /// for each one at file scope in the matching package's
151    /// [`PackageMod`](Self::PackageMod) — after buffa's own output, at
152    /// package root alongside the owned message types (**not** under the
153    /// `__buffa::` sentinel module). Items declared `pub` in a companion
154    /// file are visible at `crate::<pkg>::*`.
155    ///
156    /// Not to be confused with [`Ext`](Self::Ext), which is the buffa-
157    /// generated file holding protobuf `extend` consts.
158    Companion,
159}
160
161/// Configuration for code generation.
162#[derive(Debug, Clone)]
163#[non_exhaustive]
164pub struct CodeGenConfig {
165    /// Whether to generate borrowed view types (`MyMessageView<'a>`) in
166    /// addition to owned types.
167    pub generate_views: bool,
168    /// Whether to preserve unknown fields (default: true).
169    pub preserve_unknown_fields: bool,
170    /// Whether to derive `serde::Serialize` / `serde::Deserialize` on
171    /// generated message structs and enum types, and emit `#[serde(with = "...")]`
172    /// attributes for proto3 JSON's special scalar encodings (int64 as quoted
173    /// string, bytes as base64, etc.).
174    ///
175    /// When this is `true`, the downstream crate must depend on `serde` and
176    /// must enable the `buffa/json` feature for the runtime helpers.
177    ///
178    /// Oneof fields use `#[serde(flatten)]` with custom `Serialize` /
179    /// `Deserialize` impls so that each variant appears as a top-level
180    /// JSON field (proto3 JSON inline oneof encoding).
181    pub generate_json: bool,
182    /// Whether to emit `#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]`
183    /// on generated message structs and enum types.
184    ///
185    /// When this is `true`, the downstream crate must add `arbitrary` as an
186    /// optional dependency and enable the `buffa/arbitrary` feature. The
187    /// downstream crate's Cargo feature that gates `arbitrary` must be named
188    /// exactly `"arbitrary"` — the generated `cfg_attr` uses that literal
189    /// string and cannot be customized. This applies to both the struct-level
190    /// `derive(Arbitrary)` and the per-field `#[arbitrary(with = ...)]`
191    /// attributes emitted for `bytes_fields`-typed fields.
192    ///
193    /// For `bytes_fields`-typed fields, codegen emits `#[arbitrary(with = ...)]`
194    /// using helpers in `::buffa::__private` since `bytes::Bytes` has no
195    /// `Arbitrary` impl. Singular, optional, and repeated bytes fields are all
196    /// covered. Map values are always `Vec<u8>` regardless of `bytes_fields`
197    /// and require no special handling.
198    pub generate_arbitrary: bool,
199    /// External type path mappings.
200    ///
201    /// Each entry maps a fully-qualified protobuf path prefix (e.g.,
202    /// `".my.common"`) to a Rust module path (e.g., `"::common_protos"`).
203    /// Types under the proto prefix will reference the extern Rust path
204    /// instead of being generated, allowing shared proto packages to be
205    /// compiled once in a dedicated crate and referenced from others.
206    ///
207    /// Well-known types (`google.protobuf.*`) are automatically mapped to
208    /// `::buffa_types::google::protobuf::*` without needing an explicit
209    /// entry here. To override with a custom implementation, add an
210    /// `extern_path` for `.google.protobuf` pointing to your crate.
211    pub extern_paths: Vec<(String, String)>,
212    /// Fully-qualified proto field paths whose `bytes` fields should use
213    /// `bytes::Bytes` instead of `Vec<u8>`.
214    ///
215    /// Each entry is a proto path prefix (e.g., `".my.pkg.MyMessage.data"` for
216    /// a specific field, or `"."` for all bytes fields). The path is matched
217    /// as a prefix, so `"."` applies to every bytes field in every message.
218    pub bytes_fields: Vec<String>,
219    /// Honor `features.utf8_validation = NONE` by emitting `Vec<u8>` / `&[u8]`
220    /// for such string fields instead of `String` / `&str`.
221    ///
222    /// When `false` (the default), buffa emits `String` for all string fields
223    /// and **validates UTF-8 on decode** — stricter than proto2 requires, but
224    /// ergonomic and safe.
225    ///
226    /// When `true`, string fields with `utf8_validation = NONE` (all proto2
227    /// strings by default, and editions fields that opt into `NONE`) become
228    /// `Vec<u8>` / `&[u8]`. Decode skips validation; the caller decides at the
229    /// call site whether to `std::str::from_utf8` (checked) or
230    /// `from_utf8_unchecked` (trusted-input fast path). This is the only
231    /// sound Rust mapping when strings may actually contain non-UTF-8 bytes.
232    ///
233    /// **This is a breaking change for proto2** — enable only for new code or
234    /// when profiling identifies UTF-8 validation as a bottleneck.
235    pub strict_utf8_mapping: bool,
236    /// Permit `option message_set_wire_format = true` on input messages.
237    ///
238    /// MessageSet is a legacy Google-internal wire format that wraps each
239    /// extension in a group structure instead of using regular field tags.
240    /// When `false` (the default), encountering such a message is a codegen
241    /// error — the flag exists to make MessageSet use explicit, since the
242    /// format is obsolete outside of interop with very old Google protos.
243    pub allow_message_set: bool,
244    /// Whether to emit `impl buffa::text::TextFormat` on generated message
245    /// structs for textproto (human-readable text format) encoding/decoding.
246    ///
247    /// When this is `true`, the downstream crate must enable the `buffa/text`
248    /// feature for the runtime encoder/decoder.
249    pub generate_text: bool,
250    /// Whether the per-package `.mod.rs` stitcher emits
251    /// `__buffa::register_types(&mut TypeRegistry)`.
252    ///
253    /// Default `true`. The fn aggregates `Any` type entries and extension
254    /// entries for every message in the package. Set to `false` for
255    /// crates that don't use extensions/`Any`, or that hand-roll
256    /// registration (e.g. `buffa-types`' `register_wkt_types`, which
257    /// knows the JSON-Any `is_wkt` special-casing the generic fn does
258    /// not). The per-message `__*_JSON_ANY` / `__*_TEXT_ANY` consts are
259    /// still emitted; only the aggregating fn is suppressed.
260    pub emit_register_fn: bool,
261    /// Emit one `<dotted.package>.rs` per proto package instead of the
262    /// per-proto-file content set plus `<pkg>.mod.rs` stitcher.
263    ///
264    /// The single file inlines what the stitcher would otherwise `include!`,
265    /// producing the same `__buffa::{view,oneof,ext,...}` module structure.
266    /// Intended for Buf Schema Registry generated SDKs, whose `lib.rs`
267    /// synthesis builds the module tree from `<dotted.package>.rs` filenames.
268    ///
269    /// Under `strategy: directory` this only sees one directory's files per
270    /// invocation, so the input module must be `PACKAGE_DIRECTORY_MATCH`-clean
271    /// (one package per directory) for the output to be complete. BSR-hosted
272    /// modules satisfy this by lint default. If a package spans multiple
273    /// directories, separate invocations each emit their own `<pkg>.rs` and
274    /// the last write wins — silent partial output, not a codegen error.
275    pub file_per_package: bool,
276    /// Custom attributes to inject on generated types (messages and enums).
277    ///
278    /// Each entry is `(proto_path, attribute)`. The `proto_path` is matched
279    /// as a prefix against the fully-qualified proto name: `"."` applies to
280    /// all types, `".my.pkg"` to types in that package, `".my.pkg.MyMessage"`
281    /// to a specific type. The `attribute` is a raw Rust attribute string
282    /// (e.g., `"#[derive(serde::Serialize)]"`).
283    pub type_attributes: Vec<(String, String)>,
284    /// Custom attributes to inject on generated struct fields.
285    ///
286    /// Each entry is `(proto_path, attribute)`. The `proto_path` is matched
287    /// as a prefix against the fully-qualified field path (e.g.,
288    /// `".my.pkg.MyMessage.my_field"`). `"."` applies to all fields.
289    pub field_attributes: Vec<(String, String)>,
290    /// Custom attributes to inject on generated message structs only (not enums).
291    ///
292    /// Same path-matching semantics as `type_attributes`, but only applied to
293    /// message structs, not enum types. Useful for struct-only attributes like
294    /// `#[serde(default)]`.
295    pub message_attributes: Vec<(String, String)>,
296    /// Custom attributes to inject on generated enum types only (not messages).
297    ///
298    /// Same path-matching semantics as `type_attributes`, but only applied to
299    /// enum types. Useful for enum-only attributes like
300    /// `#[derive(strum::EnumIter)]` when the user does not want to apply the
301    /// same attribute to every message in the matched scope.
302    pub enum_attributes: Vec<(String, String)>,
303}
304
305impl Default for CodeGenConfig {
306    fn default() -> Self {
307        Self {
308            generate_views: true,
309            preserve_unknown_fields: true,
310            generate_json: false,
311            generate_arbitrary: false,
312            extern_paths: Vec::new(),
313            bytes_fields: Vec::new(),
314            strict_utf8_mapping: false,
315            allow_message_set: false,
316            generate_text: false,
317            emit_register_fn: true,
318            file_per_package: false,
319            type_attributes: Vec::new(),
320            field_attributes: Vec::new(),
321            message_attributes: Vec::new(),
322            enum_attributes: Vec::new(),
323        }
324    }
325}
326
327/// Compute the effective extern path list by starting with user-provided
328/// mappings and adding the default WKT mapping if appropriate.
329///
330/// The default mapping `".google.protobuf" → "::buffa_types::google::protobuf"`
331/// is added unless:
332/// - The user already provided an extern_path covering `.google.protobuf`
333/// - Any of the files being generated are in the `google.protobuf` package
334///   (i.e., we're building `buffa-types` itself)
335pub(crate) fn effective_extern_paths(
336    file_descriptors: &[FileDescriptorProto],
337    files_to_generate: &[String],
338    config: &CodeGenConfig,
339) -> Vec<(String, String)> {
340    let mut paths = config.extern_paths.clone();
341
342    // Only an EXACT .google.protobuf mapping suppresses auto-injection.
343    // A sub-package mapping like .google.protobuf.compiler does NOT cover
344    // WKTs like Timestamp — resolve_extern_prefix's longest-prefix matching
345    // lets both coexist, so we still inject the parent mapping.
346    let has_wkt_mapping = paths.iter().any(|(proto, _)| proto == ".google.protobuf");
347
348    if !has_wkt_mapping {
349        // Check if we're generating google.protobuf files ourselves
350        // (e.g., building buffa-types). If so, don't auto-map.
351        let generating_wkts = file_descriptors
352            .iter()
353            .filter(|fd| {
354                fd.name
355                    .as_deref()
356                    .is_some_and(|n| files_to_generate.iter().any(|f| f == n))
357            })
358            .any(|fd| fd.package.as_deref() == Some("google.protobuf"));
359
360        if !generating_wkts {
361            paths.push((
362                ".google.protobuf".to_string(),
363                "::buffa_types::google::protobuf".to_string(),
364            ));
365        }
366    }
367
368    paths
369}
370
371/// Generate Rust source files from a set of file descriptors.
372///
373/// `files_to_generate` is the set of file names that were explicitly requested
374/// (matching `CodeGeneratorRequest.file_to_generate`). Descriptors for
375/// dependencies may be present in `file_descriptors` but won't produce output
376/// files unless they appear in `files_to_generate`.
377///
378/// Each `.proto` emits five content files; each distinct package emits one
379/// `<pkg>.mod.rs` stitcher. Packages are processed in sorted order for
380/// deterministic output.
381pub fn generate(
382    file_descriptors: &[FileDescriptorProto],
383    files_to_generate: &[String],
384    config: &CodeGenConfig,
385) -> Result<Vec<GeneratedFile>, CodeGenError> {
386    let ctx = context::CodeGenContext::for_generate(file_descriptors, files_to_generate, config);
387
388    // Group requested files by package. BTreeMap → deterministic output order.
389    let mut by_package: std::collections::BTreeMap<String, Vec<&FileDescriptorProto>> =
390        std::collections::BTreeMap::new();
391    for file_name in files_to_generate {
392        let file_desc = file_descriptors
393            .iter()
394            .find(|f| f.name.as_deref() == Some(file_name.as_str()))
395            .ok_or_else(|| CodeGenError::FileNotFound(file_name.clone()))?;
396        let pkg = file_desc.package.as_deref().unwrap_or("").to_string();
397        by_package.entry(pkg).or_default().push(file_desc);
398    }
399
400    let mut output = Vec::new();
401    for (package, files) in by_package {
402        generate_package(&ctx, &package, &files, &mut output)?;
403    }
404
405    Ok(output)
406}
407
408/// Generate a module tree that assembles per-package `.mod.rs` files into
409/// nested `pub mod` blocks matching the protobuf package hierarchy.
410///
411/// Each entry is a `(mod_file_name, package)` pair where `package` is the
412/// dot-separated protobuf package name (e.g., `"google.api"`) and
413/// `mod_file_name` is the corresponding `<pkg>.mod.rs` (only
414/// [`GeneratedFileKind::PackageMod`] outputs need wiring; per-proto
415/// content files are reached via `include!` from the stitcher).
416///
417/// `include_mode` controls how `include!` paths are emitted.
418///
419/// `emit_inner_allow` adds a `#![allow(...)]` inner attribute at the top —
420/// valid when the output is used directly as a module file (`mod.rs`),
421/// invalid when consumed via `include!`.
422pub fn generate_module_tree<F: AsRef<str>, P: AsRef<str>>(
423    entries: &[(F, P)],
424    include_mode: IncludeMode<'_>,
425    emit_inner_allow: bool,
426) -> String {
427    use std::collections::BTreeMap;
428    use std::fmt::Write;
429
430    use crate::idents::escape_mod_ident;
431
432    #[derive(Default)]
433    struct ModNode {
434        files: Vec<String>,
435        children: BTreeMap<String, Self>,
436    }
437
438    let mut root = ModNode::default();
439
440    for (file_name, package) in entries {
441        let package = package.as_ref();
442        let pkg_parts: Vec<&str> = if package.is_empty() {
443            vec![]
444        } else {
445            package.split('.').collect()
446        };
447
448        let mut node = &mut root;
449        for seg in &pkg_parts {
450            node = node.children.entry(seg.to_string()).or_default();
451        }
452        node.files.push(file_name.as_ref().to_string());
453    }
454
455    let lints = ALLOW_LINTS.join(", ");
456    let mut out = String::new();
457    let _ = writeln!(out, "// @generated by buffa-codegen. DO NOT EDIT.");
458    if emit_inner_allow {
459        let _ = writeln!(out, "#![allow({lints})]");
460    }
461    let _ = writeln!(out);
462
463    fn emit(out: &mut String, node: &ModNode, depth: usize, mode: IncludeMode<'_>, lints: &str) {
464        let indent = "    ".repeat(depth);
465
466        for file in &node.files {
467            match mode {
468                IncludeMode::Relative(prefix) => {
469                    let _ = writeln!(out, r#"{indent}include!("{prefix}{file}");"#);
470                }
471                IncludeMode::OutDir => {
472                    let _ = writeln!(
473                        out,
474                        r#"{indent}include!(concat!(env!("OUT_DIR"), "/{file}"));"#
475                    );
476                }
477            }
478        }
479
480        for (name, child) in &node.children {
481            let escaped = escape_mod_ident(name);
482            let _ = writeln!(out, "{indent}#[allow({lints})]");
483            let _ = writeln!(out, "{indent}pub mod {escaped} {{");
484            let _ = writeln!(out, "{indent}    use super::*;");
485            emit(out, child, depth + 1, mode, lints);
486            let _ = writeln!(out, "{indent}}}");
487        }
488    }
489
490    emit(&mut out, &root, 0, include_mode, &lints);
491    out
492}
493
494/// How [`generate_module_tree`] emits `include!` paths.
495#[derive(Debug, Clone, Copy)]
496pub enum IncludeMode<'a> {
497    /// `include!("<prefix><file>")` — relative to the including file.
498    /// Prefix is typically `""` or `"gen/"`.
499    Relative(&'a str),
500    /// `include!(concat!(env!("OUT_DIR"), "/<file>"))` — for build.rs output.
501    OutDir,
502}
503
504/// Validate one input descriptor before generating code for it.
505///
506/// Checks, in one walk of the message tree:
507///
508/// - **Reserved field names**: no field starts with `__buffa_` (would clash
509///   with generated `__buffa_unknown_fields` / `__buffa_cached_size`).
510/// - **Module-name conflicts**: no two sibling messages snake_case to the
511///   same module name (e.g. `HTTPRequest` vs `HttpRequest`).
512/// - **Reserved sentinel**: no package segment, message-module name, or
513///   file-level enum name equals [`SENTINEL_MOD`](context::SENTINEL_MOD).
514///   Ancillary types live under `pkg::__buffa::…`; a proto element
515///   emitting an item named `__buffa` at package root would produce
516///   E0428 against `pub mod __buffa`. This is the only name buffa
517///   reserves in user namespace.
518fn validate_file(file: &FileDescriptorProto) -> Result<(), CodeGenError> {
519    use std::collections::HashMap;
520
521    let sentinel = context::SENTINEL_MOD;
522    let package = file.package.as_deref().unwrap_or("");
523    if package.split('.').any(|seg| seg == sentinel) {
524        return Err(CodeGenError::ReservedModuleName {
525            name: sentinel.to_string(),
526            location: format!("package '{package}'"),
527        });
528    }
529    // File-level enums emit `pub enum <name>` at package root with the
530    // proto name preserved verbatim (no PascalCase normalization), so a
531    // proto `enum __buffa` would land beside `pub mod __buffa`. Nested
532    // enums live inside their owner message's module and cannot collide
533    // with the package-root sentinel, so only file-level is checked.
534    for enum_type in &file.enum_type {
535        let name = enum_type.name.as_deref().unwrap_or("");
536        if name == sentinel {
537            return Err(CodeGenError::ReservedModuleName {
538                name: sentinel.to_string(),
539                location: format!("enum '{package}.{name}'"),
540            });
541        }
542    }
543
544    fn walk(
545        messages: &[crate::generated::descriptor::DescriptorProto],
546        scope: &str,
547        sentinel: &str,
548    ) -> Result<(), CodeGenError> {
549        // snake_case module name → original proto name (for conflict diag).
550        let mut seen: HashMap<String, &str> = HashMap::new();
551
552        for msg in messages {
553            let name = msg.name.as_deref().unwrap_or("");
554            let fqn = if scope.is_empty() {
555                name.to_string()
556            } else {
557                format!("{scope}.{name}")
558            };
559
560            for field in &msg.field {
561                if let Some(fname) = &field.name {
562                    if fname.starts_with("__buffa_") {
563                        return Err(CodeGenError::ReservedFieldName {
564                            message_name: fqn,
565                            field_name: fname.clone(),
566                        });
567                    }
568                }
569            }
570
571            let module_name = crate::oneof::to_snake_case(name);
572            if module_name == sentinel {
573                return Err(CodeGenError::ReservedModuleName {
574                    name: sentinel.to_string(),
575                    location: format!("message '{fqn}'"),
576                });
577            }
578            if let Some(existing) = seen.get(&module_name) {
579                return Err(CodeGenError::ModuleNameConflict {
580                    scope: scope.to_string(),
581                    name_a: existing.to_string(),
582                    name_b: name.to_string(),
583                    module_name,
584                });
585            }
586            seen.insert(module_name, name);
587
588            walk(&msg.nested_type, &fqn, sentinel)?;
589        }
590        Ok(())
591    }
592
593    walk(&file.message_type, package, sentinel)
594}
595
596/// Per-proto content streams plus the file stem, ready to be formatted.
597struct ProtoContent {
598    stem: String,
599    owned: TokenStream,
600    view: TokenStream,
601    oneof: TokenStream,
602    view_oneof: TokenStream,
603    ext: TokenStream,
604    /// Candidate `pub use` re-exports targeting the package root (top-level
605    /// view structs, file-level extension consts). Filtered against the
606    /// package-wide root namespace in [`generate_package_mod`] — the package
607    /// can span multiple `.proto` files, so collisions are only knowable at
608    /// the stitcher level.
609    root_reexports: Vec<message::ReexportCandidate>,
610}
611
612/// Generate the five per-`.proto` content files for one input file.
613fn generate_proto_content(
614    ctx: &context::CodeGenContext,
615    current_package: &str,
616    file: &FileDescriptorProto,
617    reg: &mut message::RegistryPaths,
618) -> Result<ProtoContent, CodeGenError> {
619    use crate::idents::make_field_ident;
620    use crate::message::MessageOutput;
621
622    validate_file(file)?;
623
624    let resolver = imports::ImportResolver::new();
625    let features = crate::features::for_file(file);
626
627    let mut owned = TokenStream::new();
628    let mut view = TokenStream::new();
629    let mut oneof = TokenStream::new();
630    let mut view_oneof = TokenStream::new();
631    let mut ext = TokenStream::new();
632    let mut root_reexports: Vec<message::ReexportCandidate> = Vec::new();
633    let sentinel = make_field_ident(context::SENTINEL_MOD);
634
635    for enum_type in &file.enum_type {
636        let enum_rust_name = enum_type.name.as_deref().unwrap_or("");
637        let enum_fqn = if current_package.is_empty() {
638            enum_rust_name.to_string()
639        } else {
640            format!("{}.{}", current_package, enum_rust_name)
641        };
642        owned.extend(enumeration::generate_enum(
643            ctx,
644            enum_type,
645            enum_rust_name,
646            &enum_fqn,
647            &features,
648            &resolver,
649        )?);
650    }
651
652    for message_type in &file.message_type {
653        let top_level_name = message_type.name.as_deref().unwrap_or("");
654        let proto_fqn = if current_package.is_empty() {
655            top_level_name.to_string()
656        } else {
657            format!("{}.{}", current_package, top_level_name)
658        };
659        let MessageOutput {
660            owned_top,
661            owned_mod,
662            oneof_tree: msg_oneof,
663            view_tree: msg_view,
664            view_oneof_tree: msg_view_oneof,
665            reg: msg_reg,
666        } = message::generate_message(
667            ctx,
668            message_type,
669            current_package,
670            top_level_name,
671            &proto_fqn,
672            &features,
673            &resolver,
674        )?;
675        owned.extend(owned_top);
676        let mod_ident = make_field_ident(&crate::oneof::to_snake_case(top_level_name));
677        for p in msg_reg.json_ext {
678            reg.json_ext.push(quote! { #mod_ident :: #p });
679        }
680        for p in msg_reg.text_ext {
681            reg.text_ext.push(quote! { #mod_ident :: #p });
682        }
683        reg.json_any.extend(msg_reg.json_any);
684        reg.text_any.extend(msg_reg.text_any);
685
686        if !owned_mod.is_empty() {
687            owned.extend(quote! {
688                pub mod #mod_ident {
689                    #[allow(unused_imports)]
690                    use super::*;
691                    #owned_mod
692                }
693            });
694        }
695        oneof.extend(msg_oneof);
696        view.extend(msg_view);
697        view_oneof.extend(msg_view_oneof);
698
699        // Top-level message view → re-export at package root. The leading
700        // `self::` is load-bearing: when consumers nest packages with
701        // `pub mod a { use super::*; pub mod a_b { use super::*; … } }`
702        // (`buffa-build`'s `_include.rs` does this), a parent package's
703        // `__buffa` is in scope via the glob, and Rust's import-resolution
704        // pass treats a glob-imported name as ambiguous against a
705        // **macro-expanded** local one (the `pub mod __buffa` block arrives
706        // via `include!()`), even though a non-macro local definition would
707        // shadow the glob — see rustc E0659. `self::` resolves it
708        // deterministically. `#[doc(inline)]` makes rustdoc render the type's
709        // full page at the natural path instead of a "Re-export of …" stub.
710        if ctx.config.generate_views {
711            let view_ident = format_ident!("{top_level_name}View");
712            root_reexports.push(message::ReexportCandidate {
713                name: view_ident.to_string(),
714                tokens: quote! {
715                    #[doc(inline)]
716                    pub use self :: #sentinel :: view :: #view_ident;
717                },
718            });
719        }
720    }
721
722    // File-level `extend` declarations → `__buffa::ext::` (depth 2).
723    let (file_ext_tokens, file_ext_json, file_ext_text) = extension::generate_extensions(
724        ctx,
725        &file.extension,
726        current_package,
727        2,
728        &features,
729        current_package,
730    )?;
731    ext.extend(file_ext_tokens);
732    for id in file_ext_json {
733        reg.json_ext.push(quote! { #sentinel :: ext :: #id });
734    }
735    for id in file_ext_text {
736        reg.text_ext.push(quote! { #sentinel :: ext :: #id });
737    }
738    // File-level extension consts → re-export at package root. `self::` and
739    // `#[doc(inline)]` for the same reasons as the view re-exports above.
740    for ext_field in &file.extension {
741        let const_ident = extension::extension_const_ident(ext_field.name.as_deref().unwrap_or(""));
742        root_reexports.push(message::ReexportCandidate {
743            name: const_ident.to_string(),
744            tokens: quote! {
745                #[doc(inline)]
746                pub use self :: #sentinel :: ext :: #const_ident;
747            },
748        });
749    }
750
751    Ok(ProtoContent {
752        stem: proto_path_to_stem(file.name.as_deref().unwrap_or("")),
753        owned,
754        view,
755        oneof,
756        view_oneof,
757        ext,
758        root_reexports,
759    })
760}
761
762/// Per-section token streams for one package, ready for the stitcher.
763///
764/// In per-file mode each section holds `include!("<stem>...rs")` calls; in
765/// `file_per_package` mode each holds the actual generated items.
766#[derive(Default)]
767struct PackageSections {
768    owned: Vec<TokenStream>,
769    view: Vec<TokenStream>,
770    oneof: Vec<TokenStream>,
771    view_oneof: Vec<TokenStream>,
772    ext: Vec<TokenStream>,
773}
774
775impl PackageSections {
776    /// Build sections of `include!` calls referencing per-file content.
777    ///
778    /// Paths are bare-sibling (no `OUT_DIR` prefix) so the same stitcher
779    /// works for both `OUT_DIR` builds (where the consumer's
780    /// `include_proto!` already prepended `OUT_DIR`) and checked-in code.
781    fn from_stems(stems: &[String]) -> Self {
782        let includes = |suffix: &str| -> Vec<TokenStream> {
783            stems
784                .iter()
785                .map(|stem| {
786                    let path = format!("{stem}{suffix}.rs");
787                    quote! { include!(#path); }
788                })
789                .collect()
790        };
791        Self {
792            owned: includes(""),
793            view: includes(".__view"),
794            oneof: includes(".__oneof"),
795            view_oneof: includes(".__view_oneof"),
796            ext: includes(".__ext"),
797        }
798    }
799
800    /// Append one proto file's generated items in-line.
801    fn push_inline(&mut self, pc: ProtoContent) {
802        self.owned.push(pc.owned);
803        self.view.push(pc.view);
804        self.oneof.push(pc.oneof);
805        self.view_oneof.push(pc.view_oneof);
806        self.ext.push(pc.ext);
807    }
808}
809
810/// Generate all output files for one proto package: five content files per
811/// `.proto` plus one `<pkg>.mod.rs` stitcher, or a single `<pkg>.rs` when
812/// [`CodeGenConfig::file_per_package`] is set.
813fn generate_package(
814    ctx: &context::CodeGenContext,
815    current_package: &str,
816    files: &[&FileDescriptorProto],
817    out: &mut Vec<GeneratedFile>,
818) -> Result<(), CodeGenError> {
819    // Registry paths are package-root-relative; `register_types` lives at
820    // `__buffa::register_types` (one level deep), so each path gets a
821    // single `super::` prefix when emitted into the fn body.
822    let mut reg = message::RegistryPaths::default();
823    let mut root_reexports: Vec<message::ReexportCandidate> = Vec::new();
824
825    let sections = if ctx.config.file_per_package {
826        let mut sections = PackageSections::default();
827        for file in files {
828            let mut pc = generate_proto_content(ctx, current_package, file, &mut reg)?;
829            root_reexports.append(&mut pc.root_reexports);
830            sections.push_inline(pc);
831        }
832        sections
833    } else {
834        let mut stems: Vec<String> = Vec::new();
835        for file in files {
836            let mut pc = generate_proto_content(ctx, current_package, file, &mut reg)?;
837            root_reexports.append(&mut pc.root_reexports);
838            let source = file.name.as_deref().unwrap_or("");
839            let push = |out: &mut Vec<GeneratedFile>,
840                        suffix: &str,
841                        kind: GeneratedFileKind,
842                        tokens: TokenStream|
843             -> Result<(), CodeGenError> {
844                out.push(GeneratedFile {
845                    name: format!("{}{suffix}.rs", pc.stem),
846                    package: current_package.to_string(),
847                    kind,
848                    content: format_tokens(tokens, source)?,
849                });
850                Ok(())
851            };
852            push(out, "", GeneratedFileKind::Owned, pc.owned)?;
853            push(out, ".__view", GeneratedFileKind::View, pc.view)?;
854            push(out, ".__oneof", GeneratedFileKind::Oneof, pc.oneof)?;
855            push(
856                out,
857                ".__view_oneof",
858                GeneratedFileKind::ViewOneof,
859                pc.view_oneof,
860            )?;
861            push(out, ".__ext", GeneratedFileKind::Ext, pc.ext)?;
862            stems.push(pc.stem);
863        }
864        PackageSections::from_stems(&stems)
865    };
866
867    let reexport_block = surviving_root_reexports(ctx, files, &reg, root_reexports);
868
869    out.push(GeneratedFile {
870        name: if ctx.config.file_per_package {
871            package_to_filename(current_package)
872        } else {
873            package_to_mod_filename(current_package)
874        },
875        package: current_package.to_string(),
876        kind: GeneratedFileKind::PackageMod,
877        content: generate_package_mod(ctx, &sections, &reg, &reexport_block)?,
878    });
879
880    Ok(())
881}
882
883/// Filter the candidate package-root re-exports against the package's
884/// existing root namespace and against each other, returning the surviving
885/// `pub use` lines.
886///
887/// The package root is shared across every `.proto` file in the package, so
888/// the occupied-name set must be built from *all* of them — a top-level
889/// message named `FooView` declared in `a.proto` would shadow `Foo`'s view
890/// re-export from `b.proto`.
891fn surviving_root_reexports(
892    ctx: &context::CodeGenContext,
893    files: &[&FileDescriptorProto],
894    reg: &message::RegistryPaths,
895    mut candidates: Vec<message::ReexportCandidate>,
896) -> TokenStream {
897    use crate::idents::make_field_ident;
898    use std::collections::BTreeSet;
899
900    // Names already occupied at package root by real items: top-level
901    // messages, enums, message snake_case modules, and the `__buffa`
902    // sentinel itself. File-level extension consts live in
903    // `__buffa::ext::`, not at the root, so they are *candidates* (added
904    // by `generate_proto_content`) rather than occupants.
905    let mut occupied: BTreeSet<String> = BTreeSet::new();
906    occupied.insert(context::SENTINEL_MOD.to_string());
907    for file in files {
908        for m in &file.message_type {
909            let name = m.name.as_deref().unwrap_or("");
910            occupied.insert(name.to_string());
911            occupied.insert(crate::oneof::to_snake_case(name));
912        }
913        for e in &file.enum_type {
914            occupied.insert(e.name.as_deref().unwrap_or("").to_string());
915        }
916    }
917
918    // `register_types`, when emitted, lives at `__buffa::register_types`.
919    // `self::` and `#[doc(inline)]` for the same reasons as the view
920    // re-exports above.
921    if ctx.config.emit_register_fn && !reg.is_empty() {
922        let sentinel = make_field_ident(context::SENTINEL_MOD);
923        candidates.push(message::ReexportCandidate {
924            name: "register_types".to_string(),
925            tokens: quote! {
926                #[doc(inline)]
927                pub use self :: #sentinel :: register_types;
928            },
929        });
930    }
931
932    message::emit_surviving_reexports(candidates, &occupied)
933}
934
935/// Render the per-package stitcher: owned items at root plus the
936/// `__buffa::{view,oneof,ext,...}` module wrappers, followed by the
937/// surviving package-root `pub use` re-exports.
938fn generate_package_mod(
939    ctx: &context::CodeGenContext,
940    sections: &PackageSections,
941    reg: &message::RegistryPaths,
942    root_reexports: &TokenStream,
943) -> Result<String, CodeGenError> {
944    use crate::idents::make_field_ident;
945
946    let owned = &sections.owned;
947    let view = &sections.view;
948    let view_oneof = &sections.view_oneof;
949    let oneof = &sections.oneof;
950    let ext = &sections.ext;
951
952    let view_mod = if ctx.config.generate_views {
953        quote! {
954            pub mod view {
955                #[allow(unused_imports)]
956                use super::*;
957                #(#view)*
958                pub mod oneof {
959                    #[allow(unused_imports)]
960                    use super::*;
961                    #(#view_oneof)*
962                }
963            }
964        }
965    } else {
966        TokenStream::new()
967    };
968
969    let register_fn = if ctx.config.emit_register_fn && !reg.is_empty() {
970        let json_any = &reg.json_any;
971        let json_ext = &reg.json_ext;
972        let text_any = &reg.text_any;
973        let text_ext = &reg.text_ext;
974        quote! {
975            /// Register this package's `Any` type entries and extension entries.
976            pub fn register_types(reg: &mut ::buffa::type_registry::TypeRegistry) {
977                #( reg.register_json_any(super::#json_any); )*
978                #( reg.register_json_ext(super::#json_ext); )*
979                #( reg.register_text_any(super::#text_any); )*
980                #( reg.register_text_ext(super::#text_ext); )*
981            }
982        }
983    } else {
984        TokenStream::new()
985    };
986
987    let allow = allow_lints_attr();
988    let sentinel = make_field_ident(context::SENTINEL_MOD);
989    let tokens = quote! {
990        #(#owned)*
991        #allow
992        pub mod #sentinel {
993            #[allow(unused_imports)]
994            use super::*;
995            #view_mod
996            pub mod oneof {
997                #[allow(unused_imports)]
998                use super::*;
999                #(#oneof)*
1000            }
1001            pub mod ext {
1002                #[allow(unused_imports)]
1003                use super::*;
1004                #(#ext)*
1005            }
1006            #register_fn
1007        }
1008        #root_reexports
1009    };
1010
1011    format_tokens(tokens, "")
1012}
1013
1014/// Format a token stream into a generated-file string with the standard
1015/// header comment.
1016fn format_tokens(tokens: TokenStream, source: &str) -> Result<String, CodeGenError> {
1017    let syntax_tree =
1018        syn::parse2::<syn::File>(tokens).map_err(|e| CodeGenError::InvalidSyntax(e.to_string()))?;
1019    let formatted = prettyplease::unparse(&syntax_tree);
1020    let source_line = if source.is_empty() {
1021        String::new()
1022    } else {
1023        format!("// source: {source}\n")
1024    };
1025    Ok(format!(
1026        "// @generated by buffa-codegen. DO NOT EDIT.\n{source_line}\n{formatted}"
1027    ))
1028}
1029
1030/// Convert a proto package name to its `.mod.rs` stitcher filename.
1031///
1032/// e.g., `"google.protobuf"` → `"google.protobuf.mod.rs"`. The unnamed
1033/// package uses the [`SENTINEL_MOD`](context::SENTINEL_MOD) name as its
1034/// filename stem — `package __buffa;` is already rejected by
1035/// `validate_file`, so the unnamed-package stitcher cannot
1036/// collide with any real package's.
1037pub fn package_to_mod_filename(package: &str) -> String {
1038    if package.is_empty() {
1039        format!("{}.mod.rs", context::SENTINEL_MOD)
1040    } else {
1041        format!("{package}.mod.rs")
1042    }
1043}
1044
1045/// Convert a proto package name to its [`file_per_package`] output filename.
1046///
1047/// e.g., `"google.protobuf"` → `"google.protobuf.rs"`. The unnamed
1048/// package uses [`SENTINEL_MOD`](context::SENTINEL_MOD) — same
1049/// collision-avoidance as [`package_to_mod_filename`].
1050///
1051/// [`file_per_package`]: CodeGenConfig::file_per_package
1052pub fn package_to_filename(package: &str) -> String {
1053    if package.is_empty() {
1054        format!("{}.rs", context::SENTINEL_MOD)
1055    } else {
1056        format!("{package}.rs")
1057    }
1058}
1059
1060/// Convert a `.proto` file path to its content-file stem.
1061///
1062/// e.g., `"google/protobuf/timestamp.proto"` → `"google.protobuf.timestamp"`.
1063/// The five content files append `""`, `".__view"`, `".__oneof"`,
1064/// `".__view_oneof"`, `".__ext"` plus `".rs"`.
1065pub fn proto_path_to_stem(proto_path: &str) -> String {
1066    let without_ext = proto_path.strip_suffix(".proto").unwrap_or(proto_path);
1067    without_ext.replace('/', ".")
1068}
1069
1070/// Merge downstream [`Companion`](GeneratedFileKind::Companion) files into
1071/// the per-package stitcher produced by [`generate`].
1072///
1073/// For each companion file this function locates the
1074/// [`PackageMod`](GeneratedFileKind::PackageMod) entry in `files` with a
1075/// matching package and appends `include!("<name>");` at file scope after
1076/// buffa's own output — at package root, alongside the owned message types,
1077/// not under `__buffa::`. The companion files themselves are appended to
1078/// `files` so that build integrations can write everything to disk in one
1079/// pass.
1080///
1081/// **Call this once per build**; it does not deduplicate, so a second call
1082/// with the same companions emits a second `include!` for each, which fails
1083/// to compile downstream with a duplicate-definition error.
1084///
1085/// `name` must be a bare-sibling filename — the same convention buffa uses
1086/// for its own `include!` calls, so it resolves relative to the stitcher
1087/// without any `OUT_DIR` prefix. Names must not contain `"`, `\`, `/`, or
1088/// newlines (the function `debug_assert!`s this in debug builds), and must
1089/// not collide with any of buffa's own generated filenames for the same
1090/// package (`<stem>.rs`, `<stem>.__view.rs`, etc.) — pick an unused suffix
1091/// such as `<stem>.__myplugin.rs`.
1092///
1093/// Companion files with no matching `PackageMod` (e.g. for a package buffa
1094/// did not generate any output for) are still appended to `files` but no
1095/// `include!` is emitted; the caller is responsible for wiring them up. If
1096/// you don't expect orphans, check that every companion's `package` appears
1097/// in `files` as a `PackageMod` after calling.
1098pub fn apply_companions(files: &mut Vec<GeneratedFile>, companions: Vec<GeneratedFile>) {
1099    for comp in &companions {
1100        debug_assert!(
1101            !comp.name.contains(['"', '\\', '/', '\n']),
1102            "companion file name {:?} contains a character that would break \
1103             the generated include!() literal or its bare-sibling resolution",
1104            comp.name
1105        );
1106        if let Some(pkg_mod) = files
1107            .iter_mut()
1108            .find(|f| f.kind == GeneratedFileKind::PackageMod && f.package == comp.package)
1109        {
1110            pkg_mod
1111                .content
1112                .push_str(&format!("include!(\"{}\");\n", comp.name));
1113        }
1114    }
1115    files.extend(companions);
1116}
1117
1118/// Code generation error.
1119#[derive(Debug, Clone, thiserror::Error)]
1120#[non_exhaustive]
1121pub enum CodeGenError {
1122    /// A required field was absent in a descriptor.
1123    ///
1124    /// The `&'static str` names the missing field for diagnostics.
1125    #[error("missing required descriptor field: {0}")]
1126    MissingField(&'static str),
1127    /// A resolved type path string could not be parsed as a Rust type.
1128    #[error("invalid Rust type path: '{0}'")]
1129    InvalidTypePath(String),
1130    /// The accumulated `TokenStream` failed to parse as valid Rust syntax.
1131    #[error("generated code failed to parse as Rust: {0}")]
1132    InvalidSyntax(String),
1133    /// A requested file was not present in the descriptor set.
1134    #[error("file_to_generate '{0}' not found in descriptor set")]
1135    FileNotFound(String),
1136    /// Unexpected descriptor state (e.g. a map entry or oneof that cannot be
1137    /// resolved to a known descriptor field).
1138    #[error("codegen error: {0}")]
1139    Other(String),
1140    /// A proto field name uses the `__buffa_` reserved prefix, which would
1141    /// conflict with buffa's internal generated fields.
1142    #[error(
1143        "reserved field name '{field_name}' in message '{message_name}': \
1144             proto field names starting with '__buffa_' conflict with buffa's \
1145             internal fields"
1146    )]
1147    ReservedFieldName {
1148        message_name: String,
1149        field_name: String,
1150    },
1151    /// Two sibling messages produce the same Rust module name after
1152    /// snake_case conversion (e.g., `HTTPRequest` and `HttpRequest` both
1153    /// become `pub mod http_request`).
1154    #[error(
1155        "module name conflict in '{scope}': messages '{name_a}' and '{name_b}' \
1156         both produce module '{module_name}'"
1157    )]
1158    ModuleNameConflict {
1159        scope: String,
1160        name_a: String,
1161        name_b: String,
1162        module_name: String,
1163    },
1164    /// A proto package segment, message name, or file-level enum name
1165    /// would emit a Rust item matching the reserved sentinel `__buffa`.
1166    ///
1167    /// This is the only name buffa reserves in user namespace. Resolve by
1168    /// renaming the proto element.
1169    #[error(
1170        "reserved name '{name}' at {location}: this name is reserved for \
1171         buffa's generated ancillary types (views, oneof enums, \
1172         extensions). Rename the proto element."
1173    )]
1174    ReservedModuleName { name: String, location: String },
1175    /// The input contains a message with `option message_set_wire_format = true`
1176    /// but [`CodeGenConfig::allow_message_set`] was not set.
1177    #[error(
1178        "message '{message_name}' uses `option message_set_wire_format = true` \
1179         but CodeGenConfig::allow_message_set is false; MessageSet is a legacy \
1180         wire format — set allow_message_set(true) if this is intentional"
1181    )]
1182    MessageSetNotSupported { message_name: String },
1183    /// A custom attribute string configured via [`CodeGenConfig::type_attributes`],
1184    /// [`CodeGenConfig::field_attributes`], or [`CodeGenConfig::message_attributes`]
1185    /// could not be parsed as a Rust attribute.
1186    #[error(
1187        "invalid custom attribute for path '{path}': '{attribute}' is not a valid \
1188         Rust attribute ({detail})"
1189    )]
1190    InvalidCustomAttribute {
1191        path: String,
1192        attribute: String,
1193        detail: String,
1194    },
1195}
1196
1197#[cfg(test)]
1198mod tests;