Skip to main content

buffa_codegen/
lib.rs

1//! Shared code generation logic for buffa.
2//!
3//! This crate takes protobuf descriptors (`google.protobuf.FileDescriptorProto`,
4//! decoded from binary `FileDescriptorSet` data) and emits Rust source code
5//! that uses the `buffa` runtime.
6//!
7//! It is used by:
8//! - `protoc-gen-buffa` (protoc plugin)
9//! - `buffa-build` (build.rs integration)
10//!
11//! # Architecture
12//!
13//! The code generator is intentionally decoupled from how descriptors are
14//! obtained. It receives fully-resolved `FileDescriptorProto`s and produces
15//! Rust source strings. This means:
16//!
17//! - It doesn't parse `.proto` files.
18//! - It doesn't invoke `protoc`.
19//! - It doesn't do import resolution or name linking.
20//!
21//! All of that is handled upstream (by protoc, buf, or a future parser).
22
23pub(crate) mod comments;
24pub mod context;
25pub(crate) mod defaults;
26pub(crate) mod enumeration;
27pub(crate) mod extension;
28pub(crate) mod features;
29#[doc(hidden)]
30pub use buffa_descriptor::generated;
31pub mod idents;
32pub(crate) mod impl_message;
33pub(crate) mod impl_text;
34pub(crate) mod imports;
35pub(crate) mod message;
36pub(crate) mod oneof;
37pub(crate) mod view;
38
39use crate::generated::descriptor::FileDescriptorProto;
40use proc_macro2::TokenStream;
41use quote::quote;
42
43/// Lints suppressed on generated code at module boundaries.
44///
45/// Consumed by [`generate_module_tree`], the per-package `.mod.rs`
46/// stitcher, and `buffa-build`'s `_include.rs` writer. One list keeps
47/// them in sync.
48pub const ALLOW_LINTS: &[&str] = &[
49    "non_camel_case_types",
50    "dead_code",
51    "unused_imports",
52    "clippy::derivable_impls",
53    "clippy::match_single_binding",
54    "clippy::uninlined_format_args",
55    "clippy::doc_lazy_continuation",
56    // A user `message View { message Inner }` produces
57    // `__buffa::view::view::InnerView`; harmless but trips this lint.
58    "clippy::module_inception",
59];
60
61/// Render [`ALLOW_LINTS`] as a `#[allow(…)]` attribute token stream.
62pub fn allow_lints_attr() -> TokenStream {
63    let lints: Vec<TokenStream> = ALLOW_LINTS
64        .iter()
65        .map(|l| syn::parse_str(l).expect("lint name parses as path"))
66        .collect();
67    quote! { #[allow( #(#lints),* )] }
68}
69
70/// One generated output file.
71///
72/// Each `.proto` produces five **content files** (`<stem>.rs`,
73/// `<stem>.__view.rs`, `<stem>.__oneof.rs`, `<stem>.__view_oneof.rs`,
74/// `<stem>.__ext.rs`) and each proto package produces one
75/// `<dotted.pkg>.mod.rs` **stitcher** that `include!`s the content files
76/// and authors the `pub mod __buffa { … }` ancillary tree.
77/// See `DESIGN.md` → "Generated code layout".
78///
79/// Consumers normally only need to wire up the
80/// [`GeneratedFileKind::PackageMod`] entries (one per package); the five
81/// per-proto content kinds are reached transitively via `include!` from
82/// the stitcher. Write all files to disk; build a module tree from only
83/// the `PackageMod` ones.
84#[derive(Debug)]
85pub struct GeneratedFile {
86    /// The output file path (e.g., `"my.pkg.foo.rs"` or `"my.pkg.mod.rs"`).
87    pub name: String,
88    /// The proto package this file belongs to.
89    pub package: String,
90    /// What this file contains. Build integrations only need to wire up
91    /// [`GeneratedFileKind::PackageMod`] files; everything else is reached
92    /// via `include!` from there.
93    pub kind: GeneratedFileKind,
94    /// The generated Rust source code.
95    pub content: String,
96}
97
98/// Kind of [`GeneratedFile`]. The five content kinds are 1:1 with input
99/// `.proto` files; `PackageMod` is 1:1 with packages.
100///
101/// Build integrations only need to wire up [`PackageMod`](Self::PackageMod)
102/// entries — the per-proto content kinds are reached via `include!` from
103/// the stitcher and need only be written to disk alongside it.
104#[derive(Debug, Clone, Copy, PartialEq, Eq)]
105pub enum GeneratedFileKind {
106    /// Owned message structs and enums (`<stem>.rs`).
107    Owned,
108    /// View structs (`<stem>.__view.rs`).
109    View,
110    /// Owned oneof enums (`<stem>.__oneof.rs`).
111    Oneof,
112    /// View oneof enums (`<stem>.__view_oneof.rs`).
113    ViewOneof,
114    /// File-level extension consts (`<stem>.__ext.rs`).
115    Ext,
116    /// Per-package stitcher (`<dotted.pkg>.mod.rs`). The only file build
117    /// systems need to wire up directly.
118    PackageMod,
119}
120
121/// Configuration for code generation.
122#[derive(Debug, Clone)]
123#[non_exhaustive]
124pub struct CodeGenConfig {
125    /// Whether to generate borrowed view types (`MyMessageView<'a>`) in
126    /// addition to owned types.
127    pub generate_views: bool,
128    /// Whether to preserve unknown fields (default: true).
129    pub preserve_unknown_fields: bool,
130    /// Whether to derive `serde::Serialize` / `serde::Deserialize` on
131    /// generated message structs and enum types, and emit `#[serde(with = "...")]`
132    /// attributes for proto3 JSON's special scalar encodings (int64 as quoted
133    /// string, bytes as base64, etc.).
134    ///
135    /// When this is `true`, the downstream crate must depend on `serde` and
136    /// must enable the `buffa/json` feature for the runtime helpers.
137    ///
138    /// Oneof fields use `#[serde(flatten)]` with custom `Serialize` /
139    /// `Deserialize` impls so that each variant appears as a top-level
140    /// JSON field (proto3 JSON inline oneof encoding).
141    pub generate_json: bool,
142    /// Whether to emit `#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]`
143    /// on generated message structs and enum types.
144    ///
145    /// When this is `true`, the downstream crate must add `arbitrary` as an
146    /// optional dependency and enable the `buffa/arbitrary` feature.
147    pub generate_arbitrary: bool,
148    /// External type path mappings.
149    ///
150    /// Each entry maps a fully-qualified protobuf path prefix (e.g.,
151    /// `".my.common"`) to a Rust module path (e.g., `"::common_protos"`).
152    /// Types under the proto prefix will reference the extern Rust path
153    /// instead of being generated, allowing shared proto packages to be
154    /// compiled once in a dedicated crate and referenced from others.
155    ///
156    /// Well-known types (`google.protobuf.*`) are automatically mapped to
157    /// `::buffa_types::google::protobuf::*` without needing an explicit
158    /// entry here. To override with a custom implementation, add an
159    /// `extern_path` for `.google.protobuf` pointing to your crate.
160    pub extern_paths: Vec<(String, String)>,
161    /// Fully-qualified proto field paths whose `bytes` fields should use
162    /// `bytes::Bytes` instead of `Vec<u8>`.
163    ///
164    /// Each entry is a proto path prefix (e.g., `".my.pkg.MyMessage.data"` for
165    /// a specific field, or `"."` for all bytes fields). The path is matched
166    /// as a prefix, so `"."` applies to every bytes field in every message.
167    pub bytes_fields: Vec<String>,
168    /// Honor `features.utf8_validation = NONE` by emitting `Vec<u8>` / `&[u8]`
169    /// for such string fields instead of `String` / `&str`.
170    ///
171    /// When `false` (the default), buffa emits `String` for all string fields
172    /// and **validates UTF-8 on decode** — stricter than proto2 requires, but
173    /// ergonomic and safe.
174    ///
175    /// When `true`, string fields with `utf8_validation = NONE` (all proto2
176    /// strings by default, and editions fields that opt into `NONE`) become
177    /// `Vec<u8>` / `&[u8]`. Decode skips validation; the caller decides at the
178    /// call site whether to `std::str::from_utf8` (checked) or
179    /// `from_utf8_unchecked` (trusted-input fast path). This is the only
180    /// sound Rust mapping when strings may actually contain non-UTF-8 bytes.
181    ///
182    /// **This is a breaking change for proto2** — enable only for new code or
183    /// when profiling identifies UTF-8 validation as a bottleneck.
184    pub strict_utf8_mapping: bool,
185    /// Permit `option message_set_wire_format = true` on input messages.
186    ///
187    /// MessageSet is a legacy Google-internal wire format that wraps each
188    /// extension in a group structure instead of using regular field tags.
189    /// When `false` (the default), encountering such a message is a codegen
190    /// error — the flag exists to make MessageSet use explicit, since the
191    /// format is obsolete outside of interop with very old Google protos.
192    pub allow_message_set: bool,
193    /// Whether to emit `impl buffa::text::TextFormat` on generated message
194    /// structs for textproto (human-readable text format) encoding/decoding.
195    ///
196    /// When this is `true`, the downstream crate must enable the `buffa/text`
197    /// feature for the runtime encoder/decoder.
198    pub generate_text: bool,
199    /// Whether the per-package `.mod.rs` stitcher emits
200    /// `__buffa::register_types(&mut TypeRegistry)`.
201    ///
202    /// Default `true`. The fn aggregates `Any` type entries and extension
203    /// entries for every message in the package. Set to `false` for
204    /// crates that don't use extensions/`Any`, or that hand-roll
205    /// registration (e.g. `buffa-types`' `register_wkt_types`, which
206    /// knows the JSON-Any `is_wkt` special-casing the generic fn does
207    /// not). The per-message `__*_JSON_ANY` / `__*_TEXT_ANY` consts are
208    /// still emitted; only the aggregating fn is suppressed.
209    pub emit_register_fn: bool,
210    /// Custom attributes to inject on generated types (messages and enums).
211    ///
212    /// Each entry is `(proto_path, attribute)`. The `proto_path` is matched
213    /// as a prefix against the fully-qualified proto name: `"."` applies to
214    /// all types, `".my.pkg"` to types in that package, `".my.pkg.MyMessage"`
215    /// to a specific type. The `attribute` is a raw Rust attribute string
216    /// (e.g., `"#[derive(serde::Serialize)]"`).
217    pub type_attributes: Vec<(String, String)>,
218    /// Custom attributes to inject on generated struct fields.
219    ///
220    /// Each entry is `(proto_path, attribute)`. The `proto_path` is matched
221    /// as a prefix against the fully-qualified field path (e.g.,
222    /// `".my.pkg.MyMessage.my_field"`). `"."` applies to all fields.
223    pub field_attributes: Vec<(String, String)>,
224    /// Custom attributes to inject on generated message structs only (not enums).
225    ///
226    /// Same path-matching semantics as `type_attributes`, but only applied to
227    /// message structs, not enum types. Useful for struct-only attributes like
228    /// `#[serde(default)]`.
229    pub message_attributes: Vec<(String, String)>,
230    /// Custom attributes to inject on generated enum types only (not messages).
231    ///
232    /// Same path-matching semantics as `type_attributes`, but only applied to
233    /// enum types. Useful for enum-only attributes like
234    /// `#[derive(strum::EnumIter)]` when the user does not want to apply the
235    /// same attribute to every message in the matched scope.
236    pub enum_attributes: Vec<(String, String)>,
237}
238
239impl Default for CodeGenConfig {
240    fn default() -> Self {
241        Self {
242            generate_views: true,
243            preserve_unknown_fields: true,
244            generate_json: false,
245            generate_arbitrary: false,
246            extern_paths: Vec::new(),
247            bytes_fields: Vec::new(),
248            strict_utf8_mapping: false,
249            allow_message_set: false,
250            generate_text: false,
251            emit_register_fn: true,
252            type_attributes: Vec::new(),
253            field_attributes: Vec::new(),
254            message_attributes: Vec::new(),
255            enum_attributes: Vec::new(),
256        }
257    }
258}
259
260/// Compute the effective extern path list by starting with user-provided
261/// mappings and adding the default WKT mapping if appropriate.
262///
263/// The default mapping `".google.protobuf" → "::buffa_types::google::protobuf"`
264/// is added unless:
265/// - The user already provided an extern_path covering `.google.protobuf`
266/// - Any of the files being generated are in the `google.protobuf` package
267///   (i.e., we're building `buffa-types` itself)
268pub(crate) fn effective_extern_paths(
269    file_descriptors: &[FileDescriptorProto],
270    files_to_generate: &[String],
271    config: &CodeGenConfig,
272) -> Vec<(String, String)> {
273    let mut paths = config.extern_paths.clone();
274
275    // Only an EXACT .google.protobuf mapping suppresses auto-injection.
276    // A sub-package mapping like .google.protobuf.compiler does NOT cover
277    // WKTs like Timestamp — resolve_extern_prefix's longest-prefix matching
278    // lets both coexist, so we still inject the parent mapping.
279    let has_wkt_mapping = paths.iter().any(|(proto, _)| proto == ".google.protobuf");
280
281    if !has_wkt_mapping {
282        // Check if we're generating google.protobuf files ourselves
283        // (e.g., building buffa-types). If so, don't auto-map.
284        let generating_wkts = file_descriptors
285            .iter()
286            .filter(|fd| {
287                fd.name
288                    .as_deref()
289                    .is_some_and(|n| files_to_generate.iter().any(|f| f == n))
290            })
291            .any(|fd| fd.package.as_deref() == Some("google.protobuf"));
292
293        if !generating_wkts {
294            paths.push((
295                ".google.protobuf".to_string(),
296                "::buffa_types::google::protobuf".to_string(),
297            ));
298        }
299    }
300
301    paths
302}
303
304/// Generate Rust source files from a set of file descriptors.
305///
306/// `files_to_generate` is the set of file names that were explicitly requested
307/// (matching `CodeGeneratorRequest.file_to_generate`). Descriptors for
308/// dependencies may be present in `file_descriptors` but won't produce output
309/// files unless they appear in `files_to_generate`.
310///
311/// Each `.proto` emits five content files; each distinct package emits one
312/// `<pkg>.mod.rs` stitcher. Packages are processed in sorted order for
313/// deterministic output.
314pub fn generate(
315    file_descriptors: &[FileDescriptorProto],
316    files_to_generate: &[String],
317    config: &CodeGenConfig,
318) -> Result<Vec<GeneratedFile>, CodeGenError> {
319    let ctx = context::CodeGenContext::for_generate(file_descriptors, files_to_generate, config);
320
321    // Group requested files by package. BTreeMap → deterministic output order.
322    let mut by_package: std::collections::BTreeMap<String, Vec<&FileDescriptorProto>> =
323        std::collections::BTreeMap::new();
324    for file_name in files_to_generate {
325        let file_desc = file_descriptors
326            .iter()
327            .find(|f| f.name.as_deref() == Some(file_name.as_str()))
328            .ok_or_else(|| CodeGenError::FileNotFound(file_name.clone()))?;
329        let pkg = file_desc.package.as_deref().unwrap_or("").to_string();
330        by_package.entry(pkg).or_default().push(file_desc);
331    }
332
333    let mut output = Vec::new();
334    for (package, files) in by_package {
335        generate_package(&ctx, &package, &files, &mut output)?;
336    }
337
338    Ok(output)
339}
340
341/// Generate a module tree that assembles per-package `.mod.rs` files into
342/// nested `pub mod` blocks matching the protobuf package hierarchy.
343///
344/// Each entry is a `(mod_file_name, package)` pair where `package` is the
345/// dot-separated protobuf package name (e.g., `"google.api"`) and
346/// `mod_file_name` is the corresponding `<pkg>.mod.rs` (only
347/// [`GeneratedFileKind::PackageMod`] outputs need wiring; per-proto
348/// content files are reached via `include!` from the stitcher).
349///
350/// `include_mode` controls how `include!` paths are emitted.
351///
352/// `emit_inner_allow` adds a `#![allow(...)]` inner attribute at the top —
353/// valid when the output is used directly as a module file (`mod.rs`),
354/// invalid when consumed via `include!`.
355pub fn generate_module_tree<F: AsRef<str>, P: AsRef<str>>(
356    entries: &[(F, P)],
357    include_mode: IncludeMode<'_>,
358    emit_inner_allow: bool,
359) -> String {
360    use std::collections::BTreeMap;
361    use std::fmt::Write;
362
363    use crate::idents::escape_mod_ident;
364
365    #[derive(Default)]
366    struct ModNode {
367        files: Vec<String>,
368        children: BTreeMap<String, Self>,
369    }
370
371    let mut root = ModNode::default();
372
373    for (file_name, package) in entries {
374        let package = package.as_ref();
375        let pkg_parts: Vec<&str> = if package.is_empty() {
376            vec![]
377        } else {
378            package.split('.').collect()
379        };
380
381        let mut node = &mut root;
382        for seg in &pkg_parts {
383            node = node.children.entry(seg.to_string()).or_default();
384        }
385        node.files.push(file_name.as_ref().to_string());
386    }
387
388    let lints = ALLOW_LINTS.join(", ");
389    let mut out = String::new();
390    let _ = writeln!(out, "// @generated by buffa-codegen. DO NOT EDIT.");
391    if emit_inner_allow {
392        let _ = writeln!(out, "#![allow({lints})]");
393    }
394    let _ = writeln!(out);
395
396    fn emit(out: &mut String, node: &ModNode, depth: usize, mode: IncludeMode<'_>, lints: &str) {
397        let indent = "    ".repeat(depth);
398
399        for file in &node.files {
400            match mode {
401                IncludeMode::Relative(prefix) => {
402                    let _ = writeln!(out, r#"{indent}include!("{prefix}{file}");"#);
403                }
404                IncludeMode::OutDir => {
405                    let _ = writeln!(
406                        out,
407                        r#"{indent}include!(concat!(env!("OUT_DIR"), "/{file}"));"#
408                    );
409                }
410            }
411        }
412
413        for (name, child) in &node.children {
414            let escaped = escape_mod_ident(name);
415            let _ = writeln!(out, "{indent}#[allow({lints})]");
416            let _ = writeln!(out, "{indent}pub mod {escaped} {{");
417            let _ = writeln!(out, "{indent}    use super::*;");
418            emit(out, child, depth + 1, mode, lints);
419            let _ = writeln!(out, "{indent}}}");
420        }
421    }
422
423    emit(&mut out, &root, 0, include_mode, &lints);
424    out
425}
426
427/// How [`generate_module_tree`] emits `include!` paths.
428#[derive(Debug, Clone, Copy)]
429pub enum IncludeMode<'a> {
430    /// `include!("<prefix><file>")` — relative to the including file.
431    /// Prefix is typically `""` or `"gen/"`.
432    Relative(&'a str),
433    /// `include!(concat!(env!("OUT_DIR"), "/<file>"))` — for build.rs output.
434    OutDir,
435}
436
437/// Validate one input descriptor before generating code for it.
438///
439/// Checks, in one walk of the message tree:
440///
441/// - **Reserved field names**: no field starts with `__buffa_` (would clash
442///   with generated `__buffa_unknown_fields` / `__buffa_cached_size`).
443/// - **Module-name conflicts**: no two sibling messages snake_case to the
444///   same module name (e.g. `HTTPRequest` vs `HttpRequest`).
445/// - **Reserved sentinel**: no package segment, message-module name, or
446///   file-level enum name equals [`SENTINEL_MOD`](context::SENTINEL_MOD).
447///   Ancillary types live under `pkg::__buffa::…`; a proto element
448///   emitting an item named `__buffa` at package root would produce
449///   E0428 against `pub mod __buffa`. This is the only name buffa
450///   reserves in user namespace.
451fn validate_file(file: &FileDescriptorProto) -> Result<(), CodeGenError> {
452    use std::collections::HashMap;
453
454    let sentinel = context::SENTINEL_MOD;
455    let package = file.package.as_deref().unwrap_or("");
456    if package.split('.').any(|seg| seg == sentinel) {
457        return Err(CodeGenError::ReservedModuleName {
458            name: sentinel.to_string(),
459            location: format!("package '{package}'"),
460        });
461    }
462    // File-level enums emit `pub enum <name>` at package root with the
463    // proto name preserved verbatim (no PascalCase normalization), so a
464    // proto `enum __buffa` would land beside `pub mod __buffa`. Nested
465    // enums live inside their owner message's module and cannot collide
466    // with the package-root sentinel, so only file-level is checked.
467    for enum_type in &file.enum_type {
468        let name = enum_type.name.as_deref().unwrap_or("");
469        if name == sentinel {
470            return Err(CodeGenError::ReservedModuleName {
471                name: sentinel.to_string(),
472                location: format!("enum '{package}.{name}'"),
473            });
474        }
475    }
476
477    fn walk(
478        messages: &[crate::generated::descriptor::DescriptorProto],
479        scope: &str,
480        sentinel: &str,
481    ) -> Result<(), CodeGenError> {
482        // snake_case module name → original proto name (for conflict diag).
483        let mut seen: HashMap<String, &str> = HashMap::new();
484
485        for msg in messages {
486            let name = msg.name.as_deref().unwrap_or("");
487            let fqn = if scope.is_empty() {
488                name.to_string()
489            } else {
490                format!("{scope}.{name}")
491            };
492
493            for field in &msg.field {
494                if let Some(fname) = &field.name {
495                    if fname.starts_with("__buffa_") {
496                        return Err(CodeGenError::ReservedFieldName {
497                            message_name: fqn,
498                            field_name: fname.clone(),
499                        });
500                    }
501                }
502            }
503
504            let module_name = crate::oneof::to_snake_case(name);
505            if module_name == sentinel {
506                return Err(CodeGenError::ReservedModuleName {
507                    name: sentinel.to_string(),
508                    location: format!("message '{fqn}'"),
509                });
510            }
511            if let Some(existing) = seen.get(&module_name) {
512                return Err(CodeGenError::ModuleNameConflict {
513                    scope: scope.to_string(),
514                    name_a: existing.to_string(),
515                    name_b: name.to_string(),
516                    module_name,
517                });
518            }
519            seen.insert(module_name, name);
520
521            walk(&msg.nested_type, &fqn, sentinel)?;
522        }
523        Ok(())
524    }
525
526    walk(&file.message_type, package, sentinel)
527}
528
529/// Per-proto content streams plus the file stem, ready to be formatted.
530struct ProtoContent {
531    stem: String,
532    owned: TokenStream,
533    view: TokenStream,
534    oneof: TokenStream,
535    view_oneof: TokenStream,
536    ext: TokenStream,
537}
538
539/// Generate the five per-`.proto` content files for one input file.
540fn generate_proto_content(
541    ctx: &context::CodeGenContext,
542    current_package: &str,
543    file: &FileDescriptorProto,
544    reg: &mut message::RegistryPaths,
545) -> Result<ProtoContent, CodeGenError> {
546    use crate::idents::make_field_ident;
547    use crate::message::MessageOutput;
548
549    validate_file(file)?;
550
551    let resolver = imports::ImportResolver::new();
552    let features = crate::features::for_file(file);
553
554    let mut owned = TokenStream::new();
555    let mut view = TokenStream::new();
556    let mut oneof = TokenStream::new();
557    let mut view_oneof = TokenStream::new();
558    let mut ext = TokenStream::new();
559
560    for enum_type in &file.enum_type {
561        let enum_rust_name = enum_type.name.as_deref().unwrap_or("");
562        let enum_fqn = if current_package.is_empty() {
563            enum_rust_name.to_string()
564        } else {
565            format!("{}.{}", current_package, enum_rust_name)
566        };
567        owned.extend(enumeration::generate_enum(
568            ctx,
569            enum_type,
570            enum_rust_name,
571            &enum_fqn,
572            &features,
573            &resolver,
574        )?);
575    }
576
577    for message_type in &file.message_type {
578        let top_level_name = message_type.name.as_deref().unwrap_or("");
579        let proto_fqn = if current_package.is_empty() {
580            top_level_name.to_string()
581        } else {
582            format!("{}.{}", current_package, top_level_name)
583        };
584        let MessageOutput {
585            owned_top,
586            owned_mod,
587            oneof_tree: msg_oneof,
588            view_tree: msg_view,
589            view_oneof_tree: msg_view_oneof,
590            reg: msg_reg,
591        } = message::generate_message(
592            ctx,
593            message_type,
594            current_package,
595            top_level_name,
596            &proto_fqn,
597            &features,
598            &resolver,
599        )?;
600        owned.extend(owned_top);
601        let mod_ident = make_field_ident(&crate::oneof::to_snake_case(top_level_name));
602        for p in msg_reg.json_ext {
603            reg.json_ext.push(quote! { #mod_ident :: #p });
604        }
605        for p in msg_reg.text_ext {
606            reg.text_ext.push(quote! { #mod_ident :: #p });
607        }
608        reg.json_any.extend(msg_reg.json_any);
609        reg.text_any.extend(msg_reg.text_any);
610
611        if !owned_mod.is_empty() {
612            owned.extend(quote! {
613                pub mod #mod_ident {
614                    #[allow(unused_imports)]
615                    use super::*;
616                    #owned_mod
617                }
618            });
619        }
620        oneof.extend(msg_oneof);
621        view.extend(msg_view);
622        view_oneof.extend(msg_view_oneof);
623    }
624
625    // File-level `extend` declarations → `__buffa::ext::` (depth 2).
626    let (file_ext_tokens, file_ext_json, file_ext_text) = extension::generate_extensions(
627        ctx,
628        &file.extension,
629        current_package,
630        2,
631        &features,
632        current_package,
633    )?;
634    ext.extend(file_ext_tokens);
635    let sentinel = make_field_ident(context::SENTINEL_MOD);
636    for id in file_ext_json {
637        reg.json_ext.push(quote! { #sentinel :: ext :: #id });
638    }
639    for id in file_ext_text {
640        reg.text_ext.push(quote! { #sentinel :: ext :: #id });
641    }
642
643    Ok(ProtoContent {
644        stem: proto_path_to_stem(file.name.as_deref().unwrap_or("")),
645        owned,
646        view,
647        oneof,
648        view_oneof,
649        ext,
650    })
651}
652
653/// Generate all output files for one proto package: five content files per
654/// `.proto` plus one `<pkg>.mod.rs` stitcher.
655fn generate_package(
656    ctx: &context::CodeGenContext,
657    current_package: &str,
658    files: &[&FileDescriptorProto],
659    out: &mut Vec<GeneratedFile>,
660) -> Result<(), CodeGenError> {
661    // Registry paths are package-root-relative; `register_types` lives at
662    // `__buffa::register_types` (one level deep), so each path gets a
663    // single `super::` prefix when emitted into the fn body.
664    let mut reg = message::RegistryPaths::default();
665    let mut stems: Vec<String> = Vec::new();
666
667    for file in files {
668        let pc = generate_proto_content(ctx, current_package, file, &mut reg)?;
669        let source = file.name.as_deref().unwrap_or("");
670        let push = |out: &mut Vec<GeneratedFile>,
671                    suffix: &str,
672                    kind: GeneratedFileKind,
673                    tokens: TokenStream|
674         -> Result<(), CodeGenError> {
675            out.push(GeneratedFile {
676                name: format!("{}{suffix}.rs", pc.stem),
677                package: current_package.to_string(),
678                kind,
679                content: format_tokens(tokens, source)?,
680            });
681            Ok(())
682        };
683        push(out, "", GeneratedFileKind::Owned, pc.owned)?;
684        push(out, ".__view", GeneratedFileKind::View, pc.view)?;
685        push(out, ".__oneof", GeneratedFileKind::Oneof, pc.oneof)?;
686        push(
687            out,
688            ".__view_oneof",
689            GeneratedFileKind::ViewOneof,
690            pc.view_oneof,
691        )?;
692        push(out, ".__ext", GeneratedFileKind::Ext, pc.ext)?;
693        stems.push(pc.stem);
694    }
695
696    out.push(GeneratedFile {
697        name: package_to_mod_filename(current_package),
698        package: current_package.to_string(),
699        kind: GeneratedFileKind::PackageMod,
700        content: generate_package_mod(ctx, &stems, &reg)?,
701    });
702
703    Ok(())
704}
705
706/// Render the per-package `<pkg>.mod.rs` stitcher.
707///
708/// `include!` paths are bare-sibling (no `OUT_DIR` prefix) so the same
709/// stitcher works for both `OUT_DIR` builds (where the consumer's
710/// `include_proto!` already prepended `OUT_DIR`) and checked-in code.
711fn generate_package_mod(
712    ctx: &context::CodeGenContext,
713    stems: &[String],
714    reg: &message::RegistryPaths,
715) -> Result<String, CodeGenError> {
716    use crate::idents::make_field_ident;
717
718    let includes = |suffix: &str| -> Vec<TokenStream> {
719        stems
720            .iter()
721            .map(|stem| {
722                let path = format!("{stem}{suffix}.rs");
723                quote! { include!(#path); }
724            })
725            .collect()
726    };
727
728    let owned = includes("");
729    let view = includes(".__view");
730    let view_oneof = includes(".__view_oneof");
731    let oneof = includes(".__oneof");
732    let ext = includes(".__ext");
733
734    let view_mod = if ctx.config.generate_views {
735        quote! {
736            pub mod view {
737                #[allow(unused_imports)]
738                use super::*;
739                #(#view)*
740                pub mod oneof {
741                    #[allow(unused_imports)]
742                    use super::*;
743                    #(#view_oneof)*
744                }
745            }
746        }
747    } else {
748        TokenStream::new()
749    };
750
751    let register_fn = if ctx.config.emit_register_fn && !reg.is_empty() {
752        let json_any = &reg.json_any;
753        let json_ext = &reg.json_ext;
754        let text_any = &reg.text_any;
755        let text_ext = &reg.text_ext;
756        quote! {
757            /// Register this package's `Any` type entries and extension entries.
758            pub fn register_types(reg: &mut ::buffa::type_registry::TypeRegistry) {
759                #( reg.register_json_any(super::#json_any); )*
760                #( reg.register_json_ext(super::#json_ext); )*
761                #( reg.register_text_any(super::#text_any); )*
762                #( reg.register_text_ext(super::#text_ext); )*
763            }
764        }
765    } else {
766        TokenStream::new()
767    };
768
769    let allow = allow_lints_attr();
770    let sentinel = make_field_ident(context::SENTINEL_MOD);
771    let tokens = quote! {
772        #(#owned)*
773        #allow
774        pub mod #sentinel {
775            #[allow(unused_imports)]
776            use super::*;
777            #view_mod
778            pub mod oneof {
779                #[allow(unused_imports)]
780                use super::*;
781                #(#oneof)*
782            }
783            pub mod ext {
784                #[allow(unused_imports)]
785                use super::*;
786                #(#ext)*
787            }
788            #register_fn
789        }
790    };
791
792    format_tokens(tokens, "")
793}
794
795/// Format a token stream into a generated-file string with the standard
796/// header comment.
797fn format_tokens(tokens: TokenStream, source: &str) -> Result<String, CodeGenError> {
798    let syntax_tree =
799        syn::parse2::<syn::File>(tokens).map_err(|e| CodeGenError::InvalidSyntax(e.to_string()))?;
800    let formatted = prettyplease::unparse(&syntax_tree);
801    let source_line = if source.is_empty() {
802        String::new()
803    } else {
804        format!("// source: {source}\n")
805    };
806    Ok(format!(
807        "// @generated by buffa-codegen. DO NOT EDIT.\n{source_line}\n{formatted}"
808    ))
809}
810
811/// Convert a proto package name to its `.mod.rs` stitcher filename.
812///
813/// e.g., `"google.protobuf"` → `"google.protobuf.mod.rs"`. The unnamed
814/// package uses the [`SENTINEL_MOD`](context::SENTINEL_MOD) name as its
815/// filename stem — `package __buffa;` is already rejected by
816/// [`validate_file`], so the unnamed-package stitcher cannot
817/// collide with any real package's.
818pub fn package_to_mod_filename(package: &str) -> String {
819    if package.is_empty() {
820        format!("{}.mod.rs", context::SENTINEL_MOD)
821    } else {
822        format!("{package}.mod.rs")
823    }
824}
825
826/// Convert a `.proto` file path to its content-file stem.
827///
828/// e.g., `"google/protobuf/timestamp.proto"` → `"google.protobuf.timestamp"`.
829/// The five content files append `""`, `".__view"`, `".__oneof"`,
830/// `".__view_oneof"`, `".__ext"` plus `".rs"`.
831pub fn proto_path_to_stem(proto_path: &str) -> String {
832    let without_ext = proto_path.strip_suffix(".proto").unwrap_or(proto_path);
833    without_ext.replace('/', ".")
834}
835
836/// Code generation error.
837#[derive(Debug, Clone, thiserror::Error)]
838#[non_exhaustive]
839pub enum CodeGenError {
840    /// A required field was absent in a descriptor.
841    ///
842    /// The `&'static str` names the missing field for diagnostics.
843    #[error("missing required descriptor field: {0}")]
844    MissingField(&'static str),
845    /// A resolved type path string could not be parsed as a Rust type.
846    #[error("invalid Rust type path: '{0}'")]
847    InvalidTypePath(String),
848    /// The accumulated `TokenStream` failed to parse as valid Rust syntax.
849    #[error("generated code failed to parse as Rust: {0}")]
850    InvalidSyntax(String),
851    /// A requested file was not present in the descriptor set.
852    #[error("file_to_generate '{0}' not found in descriptor set")]
853    FileNotFound(String),
854    /// Unexpected descriptor state (e.g. a map entry or oneof that cannot be
855    /// resolved to a known descriptor field).
856    #[error("codegen error: {0}")]
857    Other(String),
858    /// A proto field name uses the `__buffa_` reserved prefix, which would
859    /// conflict with buffa's internal generated fields.
860    #[error(
861        "reserved field name '{field_name}' in message '{message_name}': \
862             proto field names starting with '__buffa_' conflict with buffa's \
863             internal fields"
864    )]
865    ReservedFieldName {
866        message_name: String,
867        field_name: String,
868    },
869    /// Two sibling messages produce the same Rust module name after
870    /// snake_case conversion (e.g., `HTTPRequest` and `HttpRequest` both
871    /// become `pub mod http_request`).
872    #[error(
873        "module name conflict in '{scope}': messages '{name_a}' and '{name_b}' \
874         both produce module '{module_name}'"
875    )]
876    ModuleNameConflict {
877        scope: String,
878        name_a: String,
879        name_b: String,
880        module_name: String,
881    },
882    /// A proto package segment, message name, or file-level enum name
883    /// would emit a Rust item matching the reserved sentinel `__buffa`.
884    ///
885    /// This is the only name buffa reserves in user namespace. Resolve by
886    /// renaming the proto element.
887    #[error(
888        "reserved name '{name}' at {location}: this name is reserved for \
889         buffa's generated ancillary types (views, oneof enums, \
890         extensions). Rename the proto element."
891    )]
892    ReservedModuleName { name: String, location: String },
893    /// The input contains a message with `option message_set_wire_format = true`
894    /// but [`CodeGenConfig::allow_message_set`] was not set.
895    #[error(
896        "message '{message_name}' uses `option message_set_wire_format = true` \
897         but CodeGenConfig::allow_message_set is false; MessageSet is a legacy \
898         wire format — set allow_message_set(true) if this is intentional"
899    )]
900    MessageSetNotSupported { message_name: String },
901    /// A custom attribute string configured via [`CodeGenConfig::type_attributes`],
902    /// [`CodeGenConfig::field_attributes`], or [`CodeGenConfig::message_attributes`]
903    /// could not be parsed as a Rust attribute.
904    #[error(
905        "invalid custom attribute for path '{path}': '{attribute}' is not a valid \
906         Rust attribute ({detail})"
907    )]
908    InvalidCustomAttribute {
909        path: String,
910        attribute: String,
911        detail: String,
912    },
913}
914
915#[cfg(test)]
916mod tests;