buffa_codegen/lib.rs
1//! Shared code generation logic for buffa.
2//!
3//! This crate takes protobuf descriptors (`google.protobuf.FileDescriptorProto`,
4//! decoded from binary `FileDescriptorSet` data) and emits Rust source code
5//! that uses the `buffa` runtime.
6//!
7//! It is used by:
8//! - `protoc-gen-buffa` (protoc plugin)
9//! - `buffa-build` (build.rs integration)
10//!
11//! # Architecture
12//!
13//! The code generator is intentionally decoupled from how descriptors are
14//! obtained. It receives fully-resolved `FileDescriptorProto`s and produces
15//! Rust source strings. This means:
16//!
17//! - It doesn't parse `.proto` files.
18//! - It doesn't invoke `protoc`.
19//! - It doesn't do import resolution or name linking.
20//!
21//! All of that is handled upstream (by protoc, buf, or a future parser).
22
23pub(crate) mod comments;
24pub mod context;
25pub(crate) mod defaults;
26pub(crate) mod enumeration;
27pub(crate) mod extension;
28pub(crate) mod features;
29#[doc(hidden)]
30pub use buffa_descriptor::generated;
31pub mod idents;
32pub(crate) mod impl_message;
33pub(crate) mod impl_text;
34pub(crate) mod imports;
35pub(crate) mod message;
36pub(crate) mod oneof;
37pub(crate) mod view;
38
39use crate::generated::descriptor::FileDescriptorProto;
40use proc_macro2::TokenStream;
41use quote::{format_ident, quote};
42
43/// Lints suppressed on generated code at module boundaries.
44///
45/// Consumed by [`generate_module_tree`], the per-package `.mod.rs`
46/// stitcher, and `buffa-build`'s `_include.rs` writer. One list keeps
47/// them in sync.
48pub const ALLOW_LINTS: &[&str] = &[
49 "non_camel_case_types",
50 "dead_code",
51 "unused_imports",
52 "clippy::derivable_impls",
53 "clippy::match_single_binding",
54 "clippy::uninlined_format_args",
55 "clippy::doc_lazy_continuation",
56 // A user `message View { message Inner }` produces
57 // `__buffa::view::view::InnerView`; harmless but trips this lint.
58 "clippy::module_inception",
59];
60
61/// Render [`ALLOW_LINTS`] as a `#[allow(…)]` attribute token stream.
62pub fn allow_lints_attr() -> TokenStream {
63 let lints: Vec<TokenStream> = ALLOW_LINTS
64 .iter()
65 .map(|l| syn::parse_str(l).expect("lint name parses as path"))
66 .collect();
67 quote! { #[allow( #(#lints),* )] }
68}
69
70/// One generated output file.
71///
72/// Each `.proto` produces five **content files** (`<stem>.rs`,
73/// `<stem>.__view.rs`, `<stem>.__oneof.rs`, `<stem>.__view_oneof.rs`,
74/// `<stem>.__ext.rs`) and each proto package produces one
75/// `<dotted.pkg>.mod.rs` **stitcher** that `include!`s the content files
76/// and authors the `pub mod __buffa { … }` ancillary tree.
77/// See `DESIGN.md` → "Generated code layout".
78///
79/// Consumers normally only need to wire up the
80/// [`GeneratedFileKind::PackageMod`] entries (one per package); the five
81/// per-proto content kinds are reached transitively via `include!` from
82/// the stitcher. Write all files to disk; build a module tree from only
83/// the `PackageMod` ones.
84///
85/// With [`CodeGenConfig::file_per_package`] set, the per-proto content
86/// kinds are not emitted at all — the single `<dotted.pkg>.rs` (still
87/// kind `PackageMod`) inlines what the stitcher would `include!`.
88#[derive(Debug)]
89pub struct GeneratedFile {
90 /// The output file path (e.g., `"my.pkg.foo.rs"` or `"my.pkg.mod.rs"`).
91 pub name: String,
92 /// The proto package this file belongs to.
93 pub package: String,
94 /// What this file contains. Build integrations only need to wire up
95 /// [`GeneratedFileKind::PackageMod`] files; everything else is reached
96 /// via `include!` from there.
97 pub kind: GeneratedFileKind,
98 /// The generated Rust source code.
99 pub content: String,
100}
101
102/// Kind of [`GeneratedFile`].
103///
104/// [`generate`] produces five per-proto content kinds — one each of
105/// [`Owned`](Self::Owned), [`View`](Self::View), [`Oneof`](Self::Oneof),
106/// [`ViewOneof`](Self::ViewOneof), and [`Ext`](Self::Ext) per input
107/// `.proto` file — plus one [`PackageMod`](Self::PackageMod) stitcher per
108/// package. Build integrations only need to wire up `PackageMod` entries;
109/// the per-proto content kinds are reached via `include!` from the stitcher
110/// and need only be written to disk alongside it. Under
111/// [`CodeGenConfig::file_per_package`] only `PackageMod` is emitted.
112///
113/// [`Companion`](Self::Companion) is the one kind *not* produced by
114/// [`generate`]: downstream code generators construct `Companion` files
115/// themselves and merge them into buffa's output via
116/// [`apply_companions`].
117///
118/// This enum is `#[non_exhaustive]` — match with a wildcard arm so new
119/// kinds can be added without a major version bump.
120#[derive(Debug, Clone, Copy, PartialEq, Eq)]
121#[non_exhaustive]
122pub enum GeneratedFileKind {
123 /// Owned message structs and enums (`<stem>.rs`).
124 Owned,
125 /// View structs (`<stem>.__view.rs`).
126 View,
127 /// Owned oneof enums (`<stem>.__oneof.rs`).
128 Oneof,
129 /// View oneof enums (`<stem>.__view_oneof.rs`).
130 ViewOneof,
131 /// File-level proto-extension consts (`<stem>.__ext.rs`) — the
132 /// `pub const` `ExtensionDescriptor` items generated from `extend`
133 /// blocks. Not to be confused with [`Companion`](Self::Companion),
134 /// which is unrelated downstream-supplied content.
135 Ext,
136 /// Per-package stitcher (`<dotted.pkg>.mod.rs`). The only file build
137 /// systems need to wire up directly.
138 PackageMod,
139 /// Extra per-proto content from a downstream code generator (service
140 /// stubs, extra trait impls, etc.) that travels with buffa's output.
141 ///
142 /// Not produced by [`generate`]. Construct these in your own generator
143 /// and pass them to [`apply_companions`], which appends an `include!`
144 /// for each one at file scope in the matching package's
145 /// [`PackageMod`](Self::PackageMod) — after buffa's own output, at
146 /// package root alongside the owned message types (**not** under the
147 /// `__buffa::` sentinel module). Items declared `pub` in a companion
148 /// file are visible at `crate::<pkg>::*`.
149 ///
150 /// Not to be confused with [`Ext`](Self::Ext), which is the buffa-
151 /// generated file holding protobuf `extend` consts.
152 Companion,
153}
154
155/// Configuration for code generation.
156#[derive(Debug, Clone)]
157#[non_exhaustive]
158pub struct CodeGenConfig {
159 /// Whether to generate borrowed view types (`MyMessageView<'a>`) in
160 /// addition to owned types.
161 pub generate_views: bool,
162 /// Whether to preserve unknown fields (default: true).
163 pub preserve_unknown_fields: bool,
164 /// Whether to derive `serde::Serialize` / `serde::Deserialize` on
165 /// generated message structs and enum types, and emit `#[serde(with = "...")]`
166 /// attributes for proto3 JSON's special scalar encodings (int64 as quoted
167 /// string, bytes as base64, etc.).
168 ///
169 /// When this is `true`, the downstream crate must depend on `serde` and
170 /// must enable the `buffa/json` feature for the runtime helpers.
171 ///
172 /// Oneof fields use `#[serde(flatten)]` with custom `Serialize` /
173 /// `Deserialize` impls so that each variant appears as a top-level
174 /// JSON field (proto3 JSON inline oneof encoding).
175 pub generate_json: bool,
176 /// Whether to emit `#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]`
177 /// on generated message structs and enum types.
178 ///
179 /// When this is `true`, the downstream crate must add `arbitrary` as an
180 /// optional dependency and enable the `buffa/arbitrary` feature. The
181 /// downstream crate's Cargo feature that gates `arbitrary` must be named
182 /// exactly `"arbitrary"` — the generated `cfg_attr` uses that literal
183 /// string and cannot be customized. This applies to both the struct-level
184 /// `derive(Arbitrary)` and the per-field `#[arbitrary(with = ...)]`
185 /// attributes emitted for `bytes_fields`-typed fields.
186 ///
187 /// For `bytes_fields`-typed fields, codegen emits `#[arbitrary(with = ...)]`
188 /// using helpers in `::buffa::__private` since `bytes::Bytes` has no
189 /// `Arbitrary` impl. Singular, optional, and repeated bytes fields are all
190 /// covered. Map values are always `Vec<u8>` regardless of `bytes_fields`
191 /// and require no special handling.
192 pub generate_arbitrary: bool,
193 /// External type path mappings.
194 ///
195 /// Each entry maps a fully-qualified protobuf path prefix (e.g.,
196 /// `".my.common"`) to a Rust module path (e.g., `"::common_protos"`).
197 /// Types under the proto prefix will reference the extern Rust path
198 /// instead of being generated, allowing shared proto packages to be
199 /// compiled once in a dedicated crate and referenced from others.
200 ///
201 /// Well-known types (`google.protobuf.*`) are automatically mapped to
202 /// `::buffa_types::google::protobuf::*` without needing an explicit
203 /// entry here. To override with a custom implementation, add an
204 /// `extern_path` for `.google.protobuf` pointing to your crate.
205 pub extern_paths: Vec<(String, String)>,
206 /// Fully-qualified proto field paths whose `bytes` fields should use
207 /// `bytes::Bytes` instead of `Vec<u8>`.
208 ///
209 /// Each entry is a proto path prefix (e.g., `".my.pkg.MyMessage.data"` for
210 /// a specific field, or `"."` for all bytes fields). The path is matched
211 /// as a prefix, so `"."` applies to every bytes field in every message.
212 pub bytes_fields: Vec<String>,
213 /// Honor `features.utf8_validation = NONE` by emitting `Vec<u8>` / `&[u8]`
214 /// for such string fields instead of `String` / `&str`.
215 ///
216 /// When `false` (the default), buffa emits `String` for all string fields
217 /// and **validates UTF-8 on decode** — stricter than proto2 requires, but
218 /// ergonomic and safe.
219 ///
220 /// When `true`, string fields with `utf8_validation = NONE` (all proto2
221 /// strings by default, and editions fields that opt into `NONE`) become
222 /// `Vec<u8>` / `&[u8]`. Decode skips validation; the caller decides at the
223 /// call site whether to `std::str::from_utf8` (checked) or
224 /// `from_utf8_unchecked` (trusted-input fast path). This is the only
225 /// sound Rust mapping when strings may actually contain non-UTF-8 bytes.
226 ///
227 /// **This is a breaking change for proto2** — enable only for new code or
228 /// when profiling identifies UTF-8 validation as a bottleneck.
229 pub strict_utf8_mapping: bool,
230 /// Permit `option message_set_wire_format = true` on input messages.
231 ///
232 /// MessageSet is a legacy Google-internal wire format that wraps each
233 /// extension in a group structure instead of using regular field tags.
234 /// When `false` (the default), encountering such a message is a codegen
235 /// error — the flag exists to make MessageSet use explicit, since the
236 /// format is obsolete outside of interop with very old Google protos.
237 pub allow_message_set: bool,
238 /// Whether to emit `impl buffa::text::TextFormat` on generated message
239 /// structs for textproto (human-readable text format) encoding/decoding.
240 ///
241 /// When this is `true`, the downstream crate must enable the `buffa/text`
242 /// feature for the runtime encoder/decoder.
243 pub generate_text: bool,
244 /// Whether the per-package `.mod.rs` stitcher emits
245 /// `__buffa::register_types(&mut TypeRegistry)`.
246 ///
247 /// Default `true`. The fn aggregates `Any` type entries and extension
248 /// entries for every message in the package. Set to `false` for
249 /// crates that don't use extensions/`Any`, or that hand-roll
250 /// registration (e.g. `buffa-types`' `register_wkt_types`, which
251 /// knows the JSON-Any `is_wkt` special-casing the generic fn does
252 /// not). The per-message `__*_JSON_ANY` / `__*_TEXT_ANY` consts are
253 /// still emitted; only the aggregating fn is suppressed.
254 pub emit_register_fn: bool,
255 /// Emit one `<dotted.package>.rs` per proto package instead of the
256 /// per-proto-file content set plus `<pkg>.mod.rs` stitcher.
257 ///
258 /// The single file inlines what the stitcher would otherwise `include!`,
259 /// producing the same `__buffa::{view,oneof,ext,...}` module structure.
260 /// Intended for Buf Schema Registry generated SDKs, whose `lib.rs`
261 /// synthesis builds the module tree from `<dotted.package>.rs` filenames.
262 ///
263 /// Under `strategy: directory` this only sees one directory's files per
264 /// invocation, so the input module must be `PACKAGE_DIRECTORY_MATCH`-clean
265 /// (one package per directory) for the output to be complete. BSR-hosted
266 /// modules satisfy this by lint default. If a package spans multiple
267 /// directories, separate invocations each emit their own `<pkg>.rs` and
268 /// the last write wins — silent partial output, not a codegen error.
269 pub file_per_package: bool,
270 /// Custom attributes to inject on generated types (messages and enums).
271 ///
272 /// Each entry is `(proto_path, attribute)`. The `proto_path` is matched
273 /// as a prefix against the fully-qualified proto name: `"."` applies to
274 /// all types, `".my.pkg"` to types in that package, `".my.pkg.MyMessage"`
275 /// to a specific type. The `attribute` is a raw Rust attribute string
276 /// (e.g., `"#[derive(serde::Serialize)]"`).
277 pub type_attributes: Vec<(String, String)>,
278 /// Custom attributes to inject on generated struct fields.
279 ///
280 /// Each entry is `(proto_path, attribute)`. The `proto_path` is matched
281 /// as a prefix against the fully-qualified field path (e.g.,
282 /// `".my.pkg.MyMessage.my_field"`). `"."` applies to all fields.
283 pub field_attributes: Vec<(String, String)>,
284 /// Custom attributes to inject on generated message structs only (not enums).
285 ///
286 /// Same path-matching semantics as `type_attributes`, but only applied to
287 /// message structs, not enum types. Useful for struct-only attributes like
288 /// `#[serde(default)]`.
289 pub message_attributes: Vec<(String, String)>,
290 /// Custom attributes to inject on generated enum types only (not messages).
291 ///
292 /// Same path-matching semantics as `type_attributes`, but only applied to
293 /// enum types. Useful for enum-only attributes like
294 /// `#[derive(strum::EnumIter)]` when the user does not want to apply the
295 /// same attribute to every message in the matched scope.
296 pub enum_attributes: Vec<(String, String)>,
297}
298
299impl Default for CodeGenConfig {
300 fn default() -> Self {
301 Self {
302 generate_views: true,
303 preserve_unknown_fields: true,
304 generate_json: false,
305 generate_arbitrary: false,
306 extern_paths: Vec::new(),
307 bytes_fields: Vec::new(),
308 strict_utf8_mapping: false,
309 allow_message_set: false,
310 generate_text: false,
311 emit_register_fn: true,
312 file_per_package: false,
313 type_attributes: Vec::new(),
314 field_attributes: Vec::new(),
315 message_attributes: Vec::new(),
316 enum_attributes: Vec::new(),
317 }
318 }
319}
320
321/// Compute the effective extern path list by starting with user-provided
322/// mappings and adding the default WKT mapping if appropriate.
323///
324/// The default mapping `".google.protobuf" → "::buffa_types::google::protobuf"`
325/// is added unless:
326/// - The user already provided an extern_path covering `.google.protobuf`
327/// - Any of the files being generated are in the `google.protobuf` package
328/// (i.e., we're building `buffa-types` itself)
329pub(crate) fn effective_extern_paths(
330 file_descriptors: &[FileDescriptorProto],
331 files_to_generate: &[String],
332 config: &CodeGenConfig,
333) -> Vec<(String, String)> {
334 let mut paths = config.extern_paths.clone();
335
336 // Only an EXACT .google.protobuf mapping suppresses auto-injection.
337 // A sub-package mapping like .google.protobuf.compiler does NOT cover
338 // WKTs like Timestamp — resolve_extern_prefix's longest-prefix matching
339 // lets both coexist, so we still inject the parent mapping.
340 let has_wkt_mapping = paths.iter().any(|(proto, _)| proto == ".google.protobuf");
341
342 if !has_wkt_mapping {
343 // Check if we're generating google.protobuf files ourselves
344 // (e.g., building buffa-types). If so, don't auto-map.
345 let generating_wkts = file_descriptors
346 .iter()
347 .filter(|fd| {
348 fd.name
349 .as_deref()
350 .is_some_and(|n| files_to_generate.iter().any(|f| f == n))
351 })
352 .any(|fd| fd.package.as_deref() == Some("google.protobuf"));
353
354 if !generating_wkts {
355 paths.push((
356 ".google.protobuf".to_string(),
357 "::buffa_types::google::protobuf".to_string(),
358 ));
359 }
360 }
361
362 paths
363}
364
365/// Generate Rust source files from a set of file descriptors.
366///
367/// `files_to_generate` is the set of file names that were explicitly requested
368/// (matching `CodeGeneratorRequest.file_to_generate`). Descriptors for
369/// dependencies may be present in `file_descriptors` but won't produce output
370/// files unless they appear in `files_to_generate`.
371///
372/// Each `.proto` emits five content files; each distinct package emits one
373/// `<pkg>.mod.rs` stitcher. Packages are processed in sorted order for
374/// deterministic output.
375pub fn generate(
376 file_descriptors: &[FileDescriptorProto],
377 files_to_generate: &[String],
378 config: &CodeGenConfig,
379) -> Result<Vec<GeneratedFile>, CodeGenError> {
380 let ctx = context::CodeGenContext::for_generate(file_descriptors, files_to_generate, config);
381
382 // Group requested files by package. BTreeMap → deterministic output order.
383 let mut by_package: std::collections::BTreeMap<String, Vec<&FileDescriptorProto>> =
384 std::collections::BTreeMap::new();
385 for file_name in files_to_generate {
386 let file_desc = file_descriptors
387 .iter()
388 .find(|f| f.name.as_deref() == Some(file_name.as_str()))
389 .ok_or_else(|| CodeGenError::FileNotFound(file_name.clone()))?;
390 let pkg = file_desc.package.as_deref().unwrap_or("").to_string();
391 by_package.entry(pkg).or_default().push(file_desc);
392 }
393
394 let mut output = Vec::new();
395 for (package, files) in by_package {
396 generate_package(&ctx, &package, &files, &mut output)?;
397 }
398
399 Ok(output)
400}
401
402/// Generate a module tree that assembles per-package `.mod.rs` files into
403/// nested `pub mod` blocks matching the protobuf package hierarchy.
404///
405/// Each entry is a `(mod_file_name, package)` pair where `package` is the
406/// dot-separated protobuf package name (e.g., `"google.api"`) and
407/// `mod_file_name` is the corresponding `<pkg>.mod.rs` (only
408/// [`GeneratedFileKind::PackageMod`] outputs need wiring; per-proto
409/// content files are reached via `include!` from the stitcher).
410///
411/// `include_mode` controls how `include!` paths are emitted.
412///
413/// `emit_inner_allow` adds a `#![allow(...)]` inner attribute at the top —
414/// valid when the output is used directly as a module file (`mod.rs`),
415/// invalid when consumed via `include!`.
416pub fn generate_module_tree<F: AsRef<str>, P: AsRef<str>>(
417 entries: &[(F, P)],
418 include_mode: IncludeMode<'_>,
419 emit_inner_allow: bool,
420) -> String {
421 use std::collections::BTreeMap;
422 use std::fmt::Write;
423
424 use crate::idents::escape_mod_ident;
425
426 #[derive(Default)]
427 struct ModNode {
428 files: Vec<String>,
429 children: BTreeMap<String, Self>,
430 }
431
432 let mut root = ModNode::default();
433
434 for (file_name, package) in entries {
435 let package = package.as_ref();
436 let pkg_parts: Vec<&str> = if package.is_empty() {
437 vec![]
438 } else {
439 package.split('.').collect()
440 };
441
442 let mut node = &mut root;
443 for seg in &pkg_parts {
444 node = node.children.entry(seg.to_string()).or_default();
445 }
446 node.files.push(file_name.as_ref().to_string());
447 }
448
449 let lints = ALLOW_LINTS.join(", ");
450 let mut out = String::new();
451 let _ = writeln!(out, "// @generated by buffa-codegen. DO NOT EDIT.");
452 if emit_inner_allow {
453 let _ = writeln!(out, "#![allow({lints})]");
454 }
455 let _ = writeln!(out);
456
457 fn emit(out: &mut String, node: &ModNode, depth: usize, mode: IncludeMode<'_>, lints: &str) {
458 let indent = " ".repeat(depth);
459
460 for file in &node.files {
461 match mode {
462 IncludeMode::Relative(prefix) => {
463 let _ = writeln!(out, r#"{indent}include!("{prefix}{file}");"#);
464 }
465 IncludeMode::OutDir => {
466 let _ = writeln!(
467 out,
468 r#"{indent}include!(concat!(env!("OUT_DIR"), "/{file}"));"#
469 );
470 }
471 }
472 }
473
474 for (name, child) in &node.children {
475 let escaped = escape_mod_ident(name);
476 let _ = writeln!(out, "{indent}#[allow({lints})]");
477 let _ = writeln!(out, "{indent}pub mod {escaped} {{");
478 let _ = writeln!(out, "{indent} use super::*;");
479 emit(out, child, depth + 1, mode, lints);
480 let _ = writeln!(out, "{indent}}}");
481 }
482 }
483
484 emit(&mut out, &root, 0, include_mode, &lints);
485 out
486}
487
488/// How [`generate_module_tree`] emits `include!` paths.
489#[derive(Debug, Clone, Copy)]
490pub enum IncludeMode<'a> {
491 /// `include!("<prefix><file>")` — relative to the including file.
492 /// Prefix is typically `""` or `"gen/"`.
493 Relative(&'a str),
494 /// `include!(concat!(env!("OUT_DIR"), "/<file>"))` — for build.rs output.
495 OutDir,
496}
497
498/// Validate one input descriptor before generating code for it.
499///
500/// Checks, in one walk of the message tree:
501///
502/// - **Reserved field names**: no field starts with `__buffa_` (would clash
503/// with generated `__buffa_unknown_fields` / `__buffa_cached_size`).
504/// - **Module-name conflicts**: no two sibling messages snake_case to the
505/// same module name (e.g. `HTTPRequest` vs `HttpRequest`).
506/// - **Reserved sentinel**: no package segment, message-module name, or
507/// file-level enum name equals [`SENTINEL_MOD`](context::SENTINEL_MOD).
508/// Ancillary types live under `pkg::__buffa::…`; a proto element
509/// emitting an item named `__buffa` at package root would produce
510/// E0428 against `pub mod __buffa`. This is the only name buffa
511/// reserves in user namespace.
512fn validate_file(file: &FileDescriptorProto) -> Result<(), CodeGenError> {
513 use std::collections::HashMap;
514
515 let sentinel = context::SENTINEL_MOD;
516 let package = file.package.as_deref().unwrap_or("");
517 if package.split('.').any(|seg| seg == sentinel) {
518 return Err(CodeGenError::ReservedModuleName {
519 name: sentinel.to_string(),
520 location: format!("package '{package}'"),
521 });
522 }
523 // File-level enums emit `pub enum <name>` at package root with the
524 // proto name preserved verbatim (no PascalCase normalization), so a
525 // proto `enum __buffa` would land beside `pub mod __buffa`. Nested
526 // enums live inside their owner message's module and cannot collide
527 // with the package-root sentinel, so only file-level is checked.
528 for enum_type in &file.enum_type {
529 let name = enum_type.name.as_deref().unwrap_or("");
530 if name == sentinel {
531 return Err(CodeGenError::ReservedModuleName {
532 name: sentinel.to_string(),
533 location: format!("enum '{package}.{name}'"),
534 });
535 }
536 }
537
538 fn walk(
539 messages: &[crate::generated::descriptor::DescriptorProto],
540 scope: &str,
541 sentinel: &str,
542 ) -> Result<(), CodeGenError> {
543 // snake_case module name → original proto name (for conflict diag).
544 let mut seen: HashMap<String, &str> = HashMap::new();
545
546 for msg in messages {
547 let name = msg.name.as_deref().unwrap_or("");
548 let fqn = if scope.is_empty() {
549 name.to_string()
550 } else {
551 format!("{scope}.{name}")
552 };
553
554 for field in &msg.field {
555 if let Some(fname) = &field.name {
556 if fname.starts_with("__buffa_") {
557 return Err(CodeGenError::ReservedFieldName {
558 message_name: fqn,
559 field_name: fname.clone(),
560 });
561 }
562 }
563 }
564
565 let module_name = crate::oneof::to_snake_case(name);
566 if module_name == sentinel {
567 return Err(CodeGenError::ReservedModuleName {
568 name: sentinel.to_string(),
569 location: format!("message '{fqn}'"),
570 });
571 }
572 if let Some(existing) = seen.get(&module_name) {
573 return Err(CodeGenError::ModuleNameConflict {
574 scope: scope.to_string(),
575 name_a: existing.to_string(),
576 name_b: name.to_string(),
577 module_name,
578 });
579 }
580 seen.insert(module_name, name);
581
582 walk(&msg.nested_type, &fqn, sentinel)?;
583 }
584 Ok(())
585 }
586
587 walk(&file.message_type, package, sentinel)
588}
589
590/// Per-proto content streams plus the file stem, ready to be formatted.
591struct ProtoContent {
592 stem: String,
593 owned: TokenStream,
594 view: TokenStream,
595 oneof: TokenStream,
596 view_oneof: TokenStream,
597 ext: TokenStream,
598 /// Candidate `pub use` re-exports targeting the package root (top-level
599 /// view structs, file-level extension consts). Filtered against the
600 /// package-wide root namespace in [`generate_package_mod`] — the package
601 /// can span multiple `.proto` files, so collisions are only knowable at
602 /// the stitcher level.
603 root_reexports: Vec<message::ReexportCandidate>,
604}
605
606/// Generate the five per-`.proto` content files for one input file.
607fn generate_proto_content(
608 ctx: &context::CodeGenContext,
609 current_package: &str,
610 file: &FileDescriptorProto,
611 reg: &mut message::RegistryPaths,
612) -> Result<ProtoContent, CodeGenError> {
613 use crate::idents::make_field_ident;
614 use crate::message::MessageOutput;
615
616 validate_file(file)?;
617
618 let resolver = imports::ImportResolver::new();
619 let features = crate::features::for_file(file);
620
621 let mut owned = TokenStream::new();
622 let mut view = TokenStream::new();
623 let mut oneof = TokenStream::new();
624 let mut view_oneof = TokenStream::new();
625 let mut ext = TokenStream::new();
626 let mut root_reexports: Vec<message::ReexportCandidate> = Vec::new();
627 let sentinel = make_field_ident(context::SENTINEL_MOD);
628
629 for enum_type in &file.enum_type {
630 let enum_rust_name = enum_type.name.as_deref().unwrap_or("");
631 let enum_fqn = if current_package.is_empty() {
632 enum_rust_name.to_string()
633 } else {
634 format!("{}.{}", current_package, enum_rust_name)
635 };
636 owned.extend(enumeration::generate_enum(
637 ctx,
638 enum_type,
639 enum_rust_name,
640 &enum_fqn,
641 &features,
642 &resolver,
643 )?);
644 }
645
646 for message_type in &file.message_type {
647 let top_level_name = message_type.name.as_deref().unwrap_or("");
648 let proto_fqn = if current_package.is_empty() {
649 top_level_name.to_string()
650 } else {
651 format!("{}.{}", current_package, top_level_name)
652 };
653 let MessageOutput {
654 owned_top,
655 owned_mod,
656 oneof_tree: msg_oneof,
657 view_tree: msg_view,
658 view_oneof_tree: msg_view_oneof,
659 reg: msg_reg,
660 } = message::generate_message(
661 ctx,
662 message_type,
663 current_package,
664 top_level_name,
665 &proto_fqn,
666 &features,
667 &resolver,
668 )?;
669 owned.extend(owned_top);
670 let mod_ident = make_field_ident(&crate::oneof::to_snake_case(top_level_name));
671 for p in msg_reg.json_ext {
672 reg.json_ext.push(quote! { #mod_ident :: #p });
673 }
674 for p in msg_reg.text_ext {
675 reg.text_ext.push(quote! { #mod_ident :: #p });
676 }
677 reg.json_any.extend(msg_reg.json_any);
678 reg.text_any.extend(msg_reg.text_any);
679
680 if !owned_mod.is_empty() {
681 owned.extend(quote! {
682 pub mod #mod_ident {
683 #[allow(unused_imports)]
684 use super::*;
685 #owned_mod
686 }
687 });
688 }
689 oneof.extend(msg_oneof);
690 view.extend(msg_view);
691 view_oneof.extend(msg_view_oneof);
692
693 // Top-level message view → re-export at package root. The leading
694 // `self::` is load-bearing: when consumers nest packages with
695 // `pub mod a { use super::*; pub mod a_b { use super::*; … } }`
696 // (`buffa-build`'s `_include.rs` does this), a parent package's
697 // `__buffa` is in scope via the glob, and Rust's import-resolution
698 // pass treats a glob-imported name as ambiguous against a
699 // **macro-expanded** local one (the `pub mod __buffa` block arrives
700 // via `include!()`), even though a non-macro local definition would
701 // shadow the glob — see rustc E0659. `self::` resolves it
702 // deterministically. `#[doc(inline)]` makes rustdoc render the type's
703 // full page at the natural path instead of a "Re-export of …" stub.
704 if ctx.config.generate_views {
705 let view_ident = format_ident!("{top_level_name}View");
706 root_reexports.push(message::ReexportCandidate {
707 name: view_ident.to_string(),
708 tokens: quote! {
709 #[doc(inline)]
710 pub use self :: #sentinel :: view :: #view_ident;
711 },
712 });
713 }
714 }
715
716 // File-level `extend` declarations → `__buffa::ext::` (depth 2).
717 let (file_ext_tokens, file_ext_json, file_ext_text) = extension::generate_extensions(
718 ctx,
719 &file.extension,
720 current_package,
721 2,
722 &features,
723 current_package,
724 )?;
725 ext.extend(file_ext_tokens);
726 for id in file_ext_json {
727 reg.json_ext.push(quote! { #sentinel :: ext :: #id });
728 }
729 for id in file_ext_text {
730 reg.text_ext.push(quote! { #sentinel :: ext :: #id });
731 }
732 // File-level extension consts → re-export at package root. `self::` and
733 // `#[doc(inline)]` for the same reasons as the view re-exports above.
734 for ext_field in &file.extension {
735 let const_ident = extension::extension_const_ident(ext_field.name.as_deref().unwrap_or(""));
736 root_reexports.push(message::ReexportCandidate {
737 name: const_ident.to_string(),
738 tokens: quote! {
739 #[doc(inline)]
740 pub use self :: #sentinel :: ext :: #const_ident;
741 },
742 });
743 }
744
745 Ok(ProtoContent {
746 stem: proto_path_to_stem(file.name.as_deref().unwrap_or("")),
747 owned,
748 view,
749 oneof,
750 view_oneof,
751 ext,
752 root_reexports,
753 })
754}
755
756/// Per-section token streams for one package, ready for the stitcher.
757///
758/// In per-file mode each section holds `include!("<stem>...rs")` calls; in
759/// `file_per_package` mode each holds the actual generated items.
760#[derive(Default)]
761struct PackageSections {
762 owned: Vec<TokenStream>,
763 view: Vec<TokenStream>,
764 oneof: Vec<TokenStream>,
765 view_oneof: Vec<TokenStream>,
766 ext: Vec<TokenStream>,
767}
768
769impl PackageSections {
770 /// Build sections of `include!` calls referencing per-file content.
771 ///
772 /// Paths are bare-sibling (no `OUT_DIR` prefix) so the same stitcher
773 /// works for both `OUT_DIR` builds (where the consumer's
774 /// `include_proto!` already prepended `OUT_DIR`) and checked-in code.
775 fn from_stems(stems: &[String]) -> Self {
776 let includes = |suffix: &str| -> Vec<TokenStream> {
777 stems
778 .iter()
779 .map(|stem| {
780 let path = format!("{stem}{suffix}.rs");
781 quote! { include!(#path); }
782 })
783 .collect()
784 };
785 Self {
786 owned: includes(""),
787 view: includes(".__view"),
788 oneof: includes(".__oneof"),
789 view_oneof: includes(".__view_oneof"),
790 ext: includes(".__ext"),
791 }
792 }
793
794 /// Append one proto file's generated items in-line.
795 fn push_inline(&mut self, pc: ProtoContent) {
796 self.owned.push(pc.owned);
797 self.view.push(pc.view);
798 self.oneof.push(pc.oneof);
799 self.view_oneof.push(pc.view_oneof);
800 self.ext.push(pc.ext);
801 }
802}
803
804/// Generate all output files for one proto package: five content files per
805/// `.proto` plus one `<pkg>.mod.rs` stitcher, or a single `<pkg>.rs` when
806/// [`CodeGenConfig::file_per_package`] is set.
807fn generate_package(
808 ctx: &context::CodeGenContext,
809 current_package: &str,
810 files: &[&FileDescriptorProto],
811 out: &mut Vec<GeneratedFile>,
812) -> Result<(), CodeGenError> {
813 // Registry paths are package-root-relative; `register_types` lives at
814 // `__buffa::register_types` (one level deep), so each path gets a
815 // single `super::` prefix when emitted into the fn body.
816 let mut reg = message::RegistryPaths::default();
817 let mut root_reexports: Vec<message::ReexportCandidate> = Vec::new();
818
819 let sections = if ctx.config.file_per_package {
820 let mut sections = PackageSections::default();
821 for file in files {
822 let mut pc = generate_proto_content(ctx, current_package, file, &mut reg)?;
823 root_reexports.append(&mut pc.root_reexports);
824 sections.push_inline(pc);
825 }
826 sections
827 } else {
828 let mut stems: Vec<String> = Vec::new();
829 for file in files {
830 let mut pc = generate_proto_content(ctx, current_package, file, &mut reg)?;
831 root_reexports.append(&mut pc.root_reexports);
832 let source = file.name.as_deref().unwrap_or("");
833 let push = |out: &mut Vec<GeneratedFile>,
834 suffix: &str,
835 kind: GeneratedFileKind,
836 tokens: TokenStream|
837 -> Result<(), CodeGenError> {
838 out.push(GeneratedFile {
839 name: format!("{}{suffix}.rs", pc.stem),
840 package: current_package.to_string(),
841 kind,
842 content: format_tokens(tokens, source)?,
843 });
844 Ok(())
845 };
846 push(out, "", GeneratedFileKind::Owned, pc.owned)?;
847 push(out, ".__view", GeneratedFileKind::View, pc.view)?;
848 push(out, ".__oneof", GeneratedFileKind::Oneof, pc.oneof)?;
849 push(
850 out,
851 ".__view_oneof",
852 GeneratedFileKind::ViewOneof,
853 pc.view_oneof,
854 )?;
855 push(out, ".__ext", GeneratedFileKind::Ext, pc.ext)?;
856 stems.push(pc.stem);
857 }
858 PackageSections::from_stems(&stems)
859 };
860
861 let reexport_block = surviving_root_reexports(ctx, files, ®, root_reexports);
862
863 out.push(GeneratedFile {
864 name: if ctx.config.file_per_package {
865 package_to_filename(current_package)
866 } else {
867 package_to_mod_filename(current_package)
868 },
869 package: current_package.to_string(),
870 kind: GeneratedFileKind::PackageMod,
871 content: generate_package_mod(ctx, §ions, ®, &reexport_block)?,
872 });
873
874 Ok(())
875}
876
877/// Filter the candidate package-root re-exports against the package's
878/// existing root namespace and against each other, returning the surviving
879/// `pub use` lines.
880///
881/// The package root is shared across every `.proto` file in the package, so
882/// the occupied-name set must be built from *all* of them — a top-level
883/// message named `FooView` declared in `a.proto` would shadow `Foo`'s view
884/// re-export from `b.proto`.
885fn surviving_root_reexports(
886 ctx: &context::CodeGenContext,
887 files: &[&FileDescriptorProto],
888 reg: &message::RegistryPaths,
889 mut candidates: Vec<message::ReexportCandidate>,
890) -> TokenStream {
891 use crate::idents::make_field_ident;
892 use std::collections::BTreeSet;
893
894 // Names already occupied at package root by real items: top-level
895 // messages, enums, message snake_case modules, and the `__buffa`
896 // sentinel itself. File-level extension consts live in
897 // `__buffa::ext::`, not at the root, so they are *candidates* (added
898 // by `generate_proto_content`) rather than occupants.
899 let mut occupied: BTreeSet<String> = BTreeSet::new();
900 occupied.insert(context::SENTINEL_MOD.to_string());
901 for file in files {
902 for m in &file.message_type {
903 let name = m.name.as_deref().unwrap_or("");
904 occupied.insert(name.to_string());
905 occupied.insert(crate::oneof::to_snake_case(name));
906 }
907 for e in &file.enum_type {
908 occupied.insert(e.name.as_deref().unwrap_or("").to_string());
909 }
910 }
911
912 // `register_types`, when emitted, lives at `__buffa::register_types`.
913 // `self::` and `#[doc(inline)]` for the same reasons as the view
914 // re-exports above.
915 if ctx.config.emit_register_fn && !reg.is_empty() {
916 let sentinel = make_field_ident(context::SENTINEL_MOD);
917 candidates.push(message::ReexportCandidate {
918 name: "register_types".to_string(),
919 tokens: quote! {
920 #[doc(inline)]
921 pub use self :: #sentinel :: register_types;
922 },
923 });
924 }
925
926 message::emit_surviving_reexports(candidates, &occupied)
927}
928
929/// Render the per-package stitcher: owned items at root plus the
930/// `__buffa::{view,oneof,ext,...}` module wrappers, followed by the
931/// surviving package-root `pub use` re-exports.
932fn generate_package_mod(
933 ctx: &context::CodeGenContext,
934 sections: &PackageSections,
935 reg: &message::RegistryPaths,
936 root_reexports: &TokenStream,
937) -> Result<String, CodeGenError> {
938 use crate::idents::make_field_ident;
939
940 let owned = §ions.owned;
941 let view = §ions.view;
942 let view_oneof = §ions.view_oneof;
943 let oneof = §ions.oneof;
944 let ext = §ions.ext;
945
946 let view_mod = if ctx.config.generate_views {
947 quote! {
948 pub mod view {
949 #[allow(unused_imports)]
950 use super::*;
951 #(#view)*
952 pub mod oneof {
953 #[allow(unused_imports)]
954 use super::*;
955 #(#view_oneof)*
956 }
957 }
958 }
959 } else {
960 TokenStream::new()
961 };
962
963 let register_fn = if ctx.config.emit_register_fn && !reg.is_empty() {
964 let json_any = ®.json_any;
965 let json_ext = ®.json_ext;
966 let text_any = ®.text_any;
967 let text_ext = ®.text_ext;
968 quote! {
969 /// Register this package's `Any` type entries and extension entries.
970 pub fn register_types(reg: &mut ::buffa::type_registry::TypeRegistry) {
971 #( reg.register_json_any(super::#json_any); )*
972 #( reg.register_json_ext(super::#json_ext); )*
973 #( reg.register_text_any(super::#text_any); )*
974 #( reg.register_text_ext(super::#text_ext); )*
975 }
976 }
977 } else {
978 TokenStream::new()
979 };
980
981 let allow = allow_lints_attr();
982 let sentinel = make_field_ident(context::SENTINEL_MOD);
983 let tokens = quote! {
984 #(#owned)*
985 #allow
986 pub mod #sentinel {
987 #[allow(unused_imports)]
988 use super::*;
989 #view_mod
990 pub mod oneof {
991 #[allow(unused_imports)]
992 use super::*;
993 #(#oneof)*
994 }
995 pub mod ext {
996 #[allow(unused_imports)]
997 use super::*;
998 #(#ext)*
999 }
1000 #register_fn
1001 }
1002 #root_reexports
1003 };
1004
1005 format_tokens(tokens, "")
1006}
1007
1008/// Format a token stream into a generated-file string with the standard
1009/// header comment.
1010fn format_tokens(tokens: TokenStream, source: &str) -> Result<String, CodeGenError> {
1011 let syntax_tree =
1012 syn::parse2::<syn::File>(tokens).map_err(|e| CodeGenError::InvalidSyntax(e.to_string()))?;
1013 let formatted = prettyplease::unparse(&syntax_tree);
1014 let source_line = if source.is_empty() {
1015 String::new()
1016 } else {
1017 format!("// source: {source}\n")
1018 };
1019 Ok(format!(
1020 "// @generated by buffa-codegen. DO NOT EDIT.\n{source_line}\n{formatted}"
1021 ))
1022}
1023
1024/// Convert a proto package name to its `.mod.rs` stitcher filename.
1025///
1026/// e.g., `"google.protobuf"` → `"google.protobuf.mod.rs"`. The unnamed
1027/// package uses the [`SENTINEL_MOD`](context::SENTINEL_MOD) name as its
1028/// filename stem — `package __buffa;` is already rejected by
1029/// `validate_file`, so the unnamed-package stitcher cannot
1030/// collide with any real package's.
1031pub fn package_to_mod_filename(package: &str) -> String {
1032 if package.is_empty() {
1033 format!("{}.mod.rs", context::SENTINEL_MOD)
1034 } else {
1035 format!("{package}.mod.rs")
1036 }
1037}
1038
1039/// Convert a proto package name to its [`file_per_package`] output filename.
1040///
1041/// e.g., `"google.protobuf"` → `"google.protobuf.rs"`. The unnamed
1042/// package uses [`SENTINEL_MOD`](context::SENTINEL_MOD) — same
1043/// collision-avoidance as [`package_to_mod_filename`].
1044///
1045/// [`file_per_package`]: CodeGenConfig::file_per_package
1046pub fn package_to_filename(package: &str) -> String {
1047 if package.is_empty() {
1048 format!("{}.rs", context::SENTINEL_MOD)
1049 } else {
1050 format!("{package}.rs")
1051 }
1052}
1053
1054/// Convert a `.proto` file path to its content-file stem.
1055///
1056/// e.g., `"google/protobuf/timestamp.proto"` → `"google.protobuf.timestamp"`.
1057/// The five content files append `""`, `".__view"`, `".__oneof"`,
1058/// `".__view_oneof"`, `".__ext"` plus `".rs"`.
1059pub fn proto_path_to_stem(proto_path: &str) -> String {
1060 let without_ext = proto_path.strip_suffix(".proto").unwrap_or(proto_path);
1061 without_ext.replace('/', ".")
1062}
1063
1064/// Merge downstream [`Companion`](GeneratedFileKind::Companion) files into
1065/// the per-package stitcher produced by [`generate`].
1066///
1067/// For each companion file this function locates the
1068/// [`PackageMod`](GeneratedFileKind::PackageMod) entry in `files` with a
1069/// matching package and appends `include!("<name>");` at file scope after
1070/// buffa's own output — at package root, alongside the owned message types,
1071/// not under `__buffa::`. The companion files themselves are appended to
1072/// `files` so that build integrations can write everything to disk in one
1073/// pass.
1074///
1075/// **Call this once per build**; it does not deduplicate, so a second call
1076/// with the same companions emits a second `include!` for each, which fails
1077/// to compile downstream with a duplicate-definition error.
1078///
1079/// `name` must be a bare-sibling filename — the same convention buffa uses
1080/// for its own `include!` calls, so it resolves relative to the stitcher
1081/// without any `OUT_DIR` prefix. Names must not contain `"`, `\`, `/`, or
1082/// newlines (the function `debug_assert!`s this in debug builds), and must
1083/// not collide with any of buffa's own generated filenames for the same
1084/// package (`<stem>.rs`, `<stem>.__view.rs`, etc.) — pick an unused suffix
1085/// such as `<stem>.__myplugin.rs`.
1086///
1087/// Companion files with no matching `PackageMod` (e.g. for a package buffa
1088/// did not generate any output for) are still appended to `files` but no
1089/// `include!` is emitted; the caller is responsible for wiring them up. If
1090/// you don't expect orphans, check that every companion's `package` appears
1091/// in `files` as a `PackageMod` after calling.
1092pub fn apply_companions(files: &mut Vec<GeneratedFile>, companions: Vec<GeneratedFile>) {
1093 for comp in &companions {
1094 debug_assert!(
1095 !comp.name.contains(['"', '\\', '/', '\n']),
1096 "companion file name {:?} contains a character that would break \
1097 the generated include!() literal or its bare-sibling resolution",
1098 comp.name
1099 );
1100 if let Some(pkg_mod) = files
1101 .iter_mut()
1102 .find(|f| f.kind == GeneratedFileKind::PackageMod && f.package == comp.package)
1103 {
1104 pkg_mod
1105 .content
1106 .push_str(&format!("include!(\"{}\");\n", comp.name));
1107 }
1108 }
1109 files.extend(companions);
1110}
1111
1112/// Code generation error.
1113#[derive(Debug, Clone, thiserror::Error)]
1114#[non_exhaustive]
1115pub enum CodeGenError {
1116 /// A required field was absent in a descriptor.
1117 ///
1118 /// The `&'static str` names the missing field for diagnostics.
1119 #[error("missing required descriptor field: {0}")]
1120 MissingField(&'static str),
1121 /// A resolved type path string could not be parsed as a Rust type.
1122 #[error("invalid Rust type path: '{0}'")]
1123 InvalidTypePath(String),
1124 /// The accumulated `TokenStream` failed to parse as valid Rust syntax.
1125 #[error("generated code failed to parse as Rust: {0}")]
1126 InvalidSyntax(String),
1127 /// A requested file was not present in the descriptor set.
1128 #[error("file_to_generate '{0}' not found in descriptor set")]
1129 FileNotFound(String),
1130 /// Unexpected descriptor state (e.g. a map entry or oneof that cannot be
1131 /// resolved to a known descriptor field).
1132 #[error("codegen error: {0}")]
1133 Other(String),
1134 /// A proto field name uses the `__buffa_` reserved prefix, which would
1135 /// conflict with buffa's internal generated fields.
1136 #[error(
1137 "reserved field name '{field_name}' in message '{message_name}': \
1138 proto field names starting with '__buffa_' conflict with buffa's \
1139 internal fields"
1140 )]
1141 ReservedFieldName {
1142 message_name: String,
1143 field_name: String,
1144 },
1145 /// Two sibling messages produce the same Rust module name after
1146 /// snake_case conversion (e.g., `HTTPRequest` and `HttpRequest` both
1147 /// become `pub mod http_request`).
1148 #[error(
1149 "module name conflict in '{scope}': messages '{name_a}' and '{name_b}' \
1150 both produce module '{module_name}'"
1151 )]
1152 ModuleNameConflict {
1153 scope: String,
1154 name_a: String,
1155 name_b: String,
1156 module_name: String,
1157 },
1158 /// A proto package segment, message name, or file-level enum name
1159 /// would emit a Rust item matching the reserved sentinel `__buffa`.
1160 ///
1161 /// This is the only name buffa reserves in user namespace. Resolve by
1162 /// renaming the proto element.
1163 #[error(
1164 "reserved name '{name}' at {location}: this name is reserved for \
1165 buffa's generated ancillary types (views, oneof enums, \
1166 extensions). Rename the proto element."
1167 )]
1168 ReservedModuleName { name: String, location: String },
1169 /// The input contains a message with `option message_set_wire_format = true`
1170 /// but [`CodeGenConfig::allow_message_set`] was not set.
1171 #[error(
1172 "message '{message_name}' uses `option message_set_wire_format = true` \
1173 but CodeGenConfig::allow_message_set is false; MessageSet is a legacy \
1174 wire format — set allow_message_set(true) if this is intentional"
1175 )]
1176 MessageSetNotSupported { message_name: String },
1177 /// A custom attribute string configured via [`CodeGenConfig::type_attributes`],
1178 /// [`CodeGenConfig::field_attributes`], or [`CodeGenConfig::message_attributes`]
1179 /// could not be parsed as a Rust attribute.
1180 #[error(
1181 "invalid custom attribute for path '{path}': '{attribute}' is not a valid \
1182 Rust attribute ({detail})"
1183 )]
1184 InvalidCustomAttribute {
1185 path: String,
1186 attribute: String,
1187 detail: String,
1188 },
1189}
1190
1191#[cfg(test)]
1192mod tests;