Skip to main content

buffa_codegen/
lib.rs

1//! Shared code generation logic for buffa.
2//!
3//! This crate takes protobuf descriptors (`google.protobuf.FileDescriptorProto`,
4//! decoded from binary `FileDescriptorSet` data) and emits Rust source code
5//! that uses the `buffa` runtime.
6//!
7//! It is used by:
8//! - `protoc-gen-buffa` (protoc plugin)
9//! - `buffa-build` (build.rs integration)
10//!
11//! # Architecture
12//!
13//! The code generator is intentionally decoupled from how descriptors are
14//! obtained. It receives fully-resolved `FileDescriptorProto`s and produces
15//! Rust source strings. This means:
16//!
17//! - It doesn't parse `.proto` files.
18//! - It doesn't invoke `protoc`.
19//! - It doesn't do import resolution or name linking.
20//!
21//! All of that is handled upstream (by protoc, buf, or a future parser).
22
23pub mod context;
24pub(crate) mod defaults;
25pub(crate) mod enumeration;
26pub(crate) mod features;
27#[doc(hidden)]
28pub mod generated;
29pub mod idents;
30pub(crate) mod impl_message;
31pub(crate) mod imports;
32pub(crate) mod message;
33pub(crate) mod oneof;
34pub(crate) mod view;
35
36use crate::generated::descriptor::FileDescriptorProto;
37use proc_macro2::TokenStream;
38use quote::quote;
39
40/// Result of generating Rust code for a single `.proto` file.
41#[derive(Debug)]
42pub struct GeneratedFile {
43    /// The output file path (e.g., "my_package.rs").
44    pub name: String,
45    /// The generated Rust source code.
46    pub content: String,
47}
48
49/// Configuration for code generation.
50#[derive(Debug, Clone)]
51#[non_exhaustive]
52pub struct CodeGenConfig {
53    /// Whether to generate borrowed view types (`MyMessageView<'a>`) in
54    /// addition to owned types.
55    pub generate_views: bool,
56    /// Whether to preserve unknown fields (default: true).
57    pub preserve_unknown_fields: bool,
58    /// Whether to derive `serde::Serialize` / `serde::Deserialize` on
59    /// generated message structs and enum types, and emit `#[serde(with = "...")]`
60    /// attributes for proto3 JSON's special scalar encodings (int64 as quoted
61    /// string, bytes as base64, etc.).
62    ///
63    /// When this is `true`, the downstream crate must depend on `serde` and
64    /// must enable the `buffa/json` feature for the runtime helpers.
65    ///
66    /// Oneof fields use `#[serde(flatten)]` with custom `Serialize` /
67    /// `Deserialize` impls so that each variant appears as a top-level
68    /// JSON field (proto3 JSON inline oneof encoding).
69    pub generate_json: bool,
70    /// Whether to emit `#[cfg_attr(feature = "arbitrary", derive(arbitrary::Arbitrary))]`
71    /// on generated message structs and enum types.
72    ///
73    /// When this is `true`, the downstream crate must add `arbitrary` as an
74    /// optional dependency and enable the `buffa/arbitrary` feature.
75    pub generate_arbitrary: bool,
76    /// External type path mappings.
77    ///
78    /// Each entry maps a fully-qualified protobuf path prefix (e.g.,
79    /// `".my.common"`) to a Rust module path (e.g., `"::common_protos"`).
80    /// Types under the proto prefix will reference the extern Rust path
81    /// instead of being generated, allowing shared proto packages to be
82    /// compiled once in a dedicated crate and referenced from others.
83    ///
84    /// Well-known types (`google.protobuf.*`) are automatically mapped to
85    /// `::buffa_types::google::protobuf::*` without needing an explicit
86    /// entry here. To override with a custom implementation, add an
87    /// `extern_path` for `.google.protobuf` pointing to your crate.
88    pub extern_paths: Vec<(String, String)>,
89    /// Fully-qualified proto field paths whose `bytes` fields should use
90    /// `bytes::Bytes` instead of `Vec<u8>`.
91    ///
92    /// Each entry is a proto path prefix (e.g., `".my.pkg.MyMessage.data"` for
93    /// a specific field, or `"."` for all bytes fields). The path is matched
94    /// as a prefix, so `"."` applies to every bytes field in every message.
95    pub bytes_fields: Vec<String>,
96    /// Honor `features.utf8_validation = NONE` by emitting `Vec<u8>` / `&[u8]`
97    /// for such string fields instead of `String` / `&str`.
98    ///
99    /// When `false` (the default), buffa emits `String` for all string fields
100    /// and **validates UTF-8 on decode** — stricter than proto2 requires, but
101    /// ergonomic and safe.
102    ///
103    /// When `true`, string fields with `utf8_validation = NONE` (all proto2
104    /// strings by default, and editions fields that opt into `NONE`) become
105    /// `Vec<u8>` / `&[u8]`. Decode skips validation; the caller decides at the
106    /// call site whether to `std::str::from_utf8` (checked) or
107    /// `from_utf8_unchecked` (trusted-input fast path). This is the only
108    /// sound Rust mapping when strings may actually contain non-UTF-8 bytes.
109    ///
110    /// **This is a breaking change for proto2** — enable only for new code or
111    /// when profiling identifies UTF-8 validation as a bottleneck.
112    pub strict_utf8_mapping: bool,
113}
114
115impl Default for CodeGenConfig {
116    fn default() -> Self {
117        Self {
118            generate_views: true,
119            preserve_unknown_fields: true,
120            generate_json: false,
121            generate_arbitrary: false,
122            extern_paths: Vec::new(),
123            bytes_fields: Vec::new(),
124            strict_utf8_mapping: false,
125        }
126    }
127}
128
129/// Compute the effective extern path list by starting with user-provided
130/// mappings and adding the default WKT mapping if appropriate.
131///
132/// The default mapping `".google.protobuf" → "::buffa_types::google::protobuf"`
133/// is added unless:
134/// - The user already provided an extern_path covering `.google.protobuf`
135/// - Any of the files being generated are in the `google.protobuf` package
136///   (i.e., we're building `buffa-types` itself)
137pub(crate) fn effective_extern_paths(
138    file_descriptors: &[FileDescriptorProto],
139    files_to_generate: &[String],
140    config: &CodeGenConfig,
141) -> Vec<(String, String)> {
142    let mut paths = config.extern_paths.clone();
143
144    // Only an EXACT .google.protobuf mapping suppresses auto-injection.
145    // A sub-package mapping like .google.protobuf.compiler does NOT cover
146    // WKTs like Timestamp — resolve_extern_prefix's longest-prefix matching
147    // lets both coexist, so we still inject the parent mapping.
148    let has_wkt_mapping = paths.iter().any(|(proto, _)| proto == ".google.protobuf");
149
150    if !has_wkt_mapping {
151        // Check if we're generating google.protobuf files ourselves
152        // (e.g., building buffa-types). If so, don't auto-map.
153        let generating_wkts = file_descriptors
154            .iter()
155            .filter(|fd| {
156                fd.name
157                    .as_deref()
158                    .is_some_and(|n| files_to_generate.iter().any(|f| f == n))
159            })
160            .any(|fd| fd.package.as_deref() == Some("google.protobuf"));
161
162        if !generating_wkts {
163            paths.push((
164                ".google.protobuf".to_string(),
165                "::buffa_types::google::protobuf".to_string(),
166            ));
167        }
168    }
169
170    paths
171}
172
173/// Generate Rust source files from a set of file descriptors.
174///
175/// `files_to_generate` is the set of file names that were explicitly requested
176/// (matching `CodeGeneratorRequest.file_to_generate`). Descriptors for
177/// dependencies may be present in `file_descriptors` but won't produce output
178/// files unless they appear in `files_to_generate`.
179pub fn generate(
180    file_descriptors: &[FileDescriptorProto],
181    files_to_generate: &[String],
182    config: &CodeGenConfig,
183) -> Result<Vec<GeneratedFile>, CodeGenError> {
184    let ctx = context::CodeGenContext::for_generate(file_descriptors, files_to_generate, config);
185
186    let mut output = Vec::new();
187    for file_name in files_to_generate {
188        let file_desc = file_descriptors
189            .iter()
190            .find(|f| f.name.as_deref() == Some(file_name.as_str()))
191            .ok_or_else(|| CodeGenError::FileNotFound(file_name.clone()))?;
192
193        let content = generate_file(&ctx, file_desc)?;
194        let rust_filename = proto_path_to_rust_module(file_name);
195        output.push(GeneratedFile {
196            name: rust_filename,
197            content,
198        });
199    }
200
201    Ok(output)
202}
203
204/// Generate a module tree that assembles generated `.rs` files into
205/// nested `pub mod` blocks matching the protobuf package hierarchy.
206///
207/// Each entry is a `(file_name, package)` pair where `package` is the
208/// dot-separated protobuf package name (e.g., `"google.api"`). The module
209/// tree is built from the **package** hierarchy so that `super::`-based
210/// cross-package references resolve correctly.
211///
212/// `include_prefix` is prepended to file names in `include!` directives.
213/// Use `""` for relative paths or `concat!(env!("OUT_DIR"), "/")` style
214/// for build.rs output.
215///
216/// When `emit_inner_allow` is true, a `#![allow(...)]` inner attribute is
217/// emitted at the top of the file. This is appropriate when the output is
218/// used directly as a module file (e.g., `mod.rs`) but NOT when the output
219/// is consumed via `include!` (inner attributes are not valid in that
220/// context).
221pub fn generate_module_tree(
222    entries: &[(&str, &str)],
223    include_prefix: &str,
224    emit_inner_allow: bool,
225) -> String {
226    use std::collections::BTreeMap;
227    use std::fmt::Write;
228
229    use crate::idents::escape_mod_ident;
230
231    #[derive(Default)]
232    struct ModNode {
233        files: Vec<String>,
234        children: BTreeMap<String, ModNode>,
235    }
236
237    let mut root = ModNode::default();
238
239    for (file_name, package) in entries {
240        let pkg_parts: Vec<&str> = if package.is_empty() {
241            vec![]
242        } else {
243            package.split('.').collect()
244        };
245
246        let mut node = &mut root;
247        for seg in &pkg_parts {
248            node = node.children.entry(seg.to_string()).or_default();
249        }
250        node.files.push(file_name.to_string());
251    }
252
253    let mut out = String::new();
254    writeln!(out, "// @generated by buffa. DO NOT EDIT.").unwrap();
255    const ALLOW_LINTS: &str = "non_camel_case_types, dead_code, unused_imports, \
256        clippy::derivable_impls, clippy::match_single_binding, \
257        clippy::uninlined_format_args, clippy::doc_lazy_continuation";
258
259    if emit_inner_allow {
260        writeln!(out, "#![allow({ALLOW_LINTS})]").unwrap();
261    }
262    writeln!(out).unwrap();
263
264    fn emit(out: &mut String, node: &ModNode, depth: usize, prefix: &str, lints: &str) {
265        let indent = "    ".repeat(depth);
266
267        for file in &node.files {
268            writeln!(out, r#"{indent}include!("{prefix}{file}");"#).unwrap();
269        }
270
271        for (name, child) in &node.children {
272            let escaped = escape_mod_ident(name);
273            writeln!(out, "{indent}#[allow({lints})]").unwrap();
274            writeln!(out, "{indent}pub mod {escaped} {{").unwrap();
275            writeln!(out, "{indent}    use super::*;").unwrap();
276            emit(out, child, depth + 1, prefix, lints);
277            writeln!(out, "{indent}}}").unwrap();
278        }
279    }
280
281    emit(&mut out, &root, 0, include_prefix, ALLOW_LINTS);
282    out
283}
284
285/// Check that no fields in the file use the `__buffa_` reserved prefix.
286fn check_reserved_field_names(file: &FileDescriptorProto) -> Result<(), CodeGenError> {
287    fn check_message(
288        msg: &crate::generated::descriptor::DescriptorProto,
289        parent_name: &str,
290    ) -> Result<(), CodeGenError> {
291        let msg_name = msg.name.as_deref().unwrap_or("");
292        let fqn = if parent_name.is_empty() {
293            msg_name.to_string()
294        } else {
295            format!("{}.{}", parent_name, msg_name)
296        };
297
298        for field in &msg.field {
299            if let Some(name) = &field.name {
300                if name.starts_with("__buffa_") {
301                    return Err(CodeGenError::ReservedFieldName {
302                        message_name: fqn,
303                        field_name: name.clone(),
304                    });
305                }
306            }
307        }
308
309        for nested in &msg.nested_type {
310            check_message(nested, &fqn)?;
311        }
312
313        Ok(())
314    }
315
316    let package = file.package.as_deref().unwrap_or("");
317    for msg in &file.message_type {
318        check_message(msg, package)?;
319    }
320    Ok(())
321}
322
323/// Check that no sibling messages produce the same snake_case module name.
324///
325/// For example, `HTTPRequest` and `HttpRequest` both produce
326/// `pub mod http_request`, which would be a compile error.
327fn check_module_name_conflicts(file: &FileDescriptorProto) -> Result<(), CodeGenError> {
328    use std::collections::HashMap;
329
330    fn check_siblings(
331        messages: &[crate::generated::descriptor::DescriptorProto],
332        scope: &str,
333    ) -> Result<(), CodeGenError> {
334        // Map from snake_case module name → original proto name.
335        let mut seen: HashMap<String, &str> = HashMap::new();
336
337        for msg in messages {
338            let name = msg.name.as_deref().unwrap_or("");
339            let module_name = crate::oneof::to_snake_case(name);
340
341            if let Some(existing) = seen.get(&module_name) {
342                return Err(CodeGenError::ModuleNameConflict {
343                    scope: scope.to_string(),
344                    name_a: existing.to_string(),
345                    name_b: name.to_string(),
346                    module_name,
347                });
348            }
349            seen.insert(module_name, name);
350
351            // Recurse into nested messages.
352            let child_scope = if scope.is_empty() {
353                name.to_string()
354            } else {
355                format!("{}.{}", scope, name)
356            };
357            check_siblings(&msg.nested_type, &child_scope)?;
358        }
359
360        Ok(())
361    }
362
363    let package = file.package.as_deref().unwrap_or("");
364    check_siblings(&file.message_type, package)
365}
366
367/// Check that nested type names don't collide with oneof enum names.
368///
369/// Nested messages/enums and oneof enums coexist in the message's module.
370/// A nested `message MyField` and `oneof my_field` both produce `MyField`.
371fn check_nested_type_oneof_conflicts(file: &FileDescriptorProto) -> Result<(), CodeGenError> {
372    use std::collections::HashSet;
373
374    fn check_message(
375        msg: &crate::generated::descriptor::DescriptorProto,
376        scope: &str,
377    ) -> Result<(), CodeGenError> {
378        let msg_name = msg.name.as_deref().unwrap_or("");
379        let fqn = if scope.is_empty() {
380            msg_name.to_string()
381        } else {
382            format!("{}.{}", scope, msg_name)
383        };
384
385        // Collect names that nested types/enums will occupy in the module.
386        let mut nested_names: HashSet<&str> = HashSet::new();
387        for nested in &msg.nested_type {
388            if let Some(name) = &nested.name {
389                nested_names.insert(name);
390            }
391        }
392        for nested_enum in &msg.enum_type {
393            if let Some(name) = &nested_enum.name {
394                nested_names.insert(name);
395            }
396        }
397
398        // Check each oneof's PascalCase name against nested type names.
399        for oneof in &msg.oneof_decl {
400            if let Some(oneof_name) = &oneof.name {
401                let rust_name = crate::oneof::to_pascal_case(oneof_name);
402                if nested_names.contains(rust_name.as_str()) {
403                    return Err(CodeGenError::NestedTypeOneofConflict {
404                        scope: fqn,
405                        nested_name: rust_name.clone(),
406                        oneof_name: oneof_name.clone(),
407                        rust_name,
408                    });
409                }
410            }
411        }
412
413        // Recurse into nested messages.
414        for nested in &msg.nested_type {
415            check_message(nested, &fqn)?;
416        }
417
418        Ok(())
419    }
420
421    let package = file.package.as_deref().unwrap_or("");
422    for msg in &file.message_type {
423        check_message(msg, package)?;
424    }
425    Ok(())
426}
427
428/// Check that no message named `FooView` collides with the generated view
429/// type for a sibling message `Foo`.
430fn check_view_name_conflicts(file: &FileDescriptorProto) -> Result<(), CodeGenError> {
431    use std::collections::HashSet;
432
433    fn check_siblings(
434        messages: &[crate::generated::descriptor::DescriptorProto],
435        scope: &str,
436    ) -> Result<(), CodeGenError> {
437        // Collect all message names at this level.
438        let names: HashSet<&str> = messages.iter().filter_map(|m| m.name.as_deref()).collect();
439
440        // For each message Foo, check if FooView also exists.
441        for msg in messages {
442            let name = msg.name.as_deref().unwrap_or("");
443            let view_name = format!("{}View", name);
444            if names.contains(view_name.as_str()) {
445                return Err(CodeGenError::ViewNameConflict {
446                    scope: scope.to_string(),
447                    owned_msg: name.to_string(),
448                    view_msg: view_name,
449                });
450            }
451        }
452
453        // Recurse into nested messages.
454        for msg in messages {
455            let name = msg.name.as_deref().unwrap_or("");
456            let child_scope = if scope.is_empty() {
457                name.to_string()
458            } else {
459                format!("{}.{}", scope, name)
460            };
461            check_siblings(&msg.nested_type, &child_scope)?;
462        }
463
464        Ok(())
465    }
466
467    let package = file.package.as_deref().unwrap_or("");
468    check_siblings(&file.message_type, package)
469}
470
471/// Generate Rust source for a single `.proto` file.
472fn generate_file(
473    ctx: &context::CodeGenContext,
474    file: &FileDescriptorProto,
475) -> Result<String, CodeGenError> {
476    // Validate descriptors before generating code.
477    check_reserved_field_names(file)?;
478    check_module_name_conflicts(file)?;
479    check_nested_type_oneof_conflicts(file)?;
480    if ctx.config.generate_views {
481        check_view_name_conflicts(file)?;
482    }
483
484    let resolver = imports::ImportResolver::for_file(file);
485    let mut tokens = resolver.generate_use_block();
486    let current_package = file.package.as_deref().unwrap_or("");
487    let features = crate::features::for_file(file);
488
489    for enum_type in &file.enum_type {
490        let enum_rust_name = enum_type.name.as_deref().unwrap_or("");
491        tokens.extend(enumeration::generate_enum(
492            ctx,
493            enum_type,
494            enum_rust_name,
495            &features,
496            &resolver,
497        )?);
498    }
499    for message_type in &file.message_type {
500        let top_level_name = message_type.name.as_deref().unwrap_or("");
501        let proto_fqn = if current_package.is_empty() {
502            top_level_name.to_string()
503        } else {
504            format!("{}.{}", current_package, top_level_name)
505        };
506        let (msg_top, msg_mod) = message::generate_message(
507            ctx,
508            message_type,
509            current_package,
510            top_level_name,
511            &proto_fqn,
512            &features,
513            &resolver,
514        )?;
515        tokens.extend(msg_top);
516
517        let view_mod = if ctx.config.generate_views {
518            let (view_top, view_mod) = view::generate_view(
519                ctx,
520                message_type,
521                current_package,
522                top_level_name,
523                &proto_fqn,
524                &features,
525            )?;
526            tokens.extend(view_top);
527            view_mod
528        } else {
529            TokenStream::new()
530        };
531
532        // Combine message and view module items into a single `pub mod`.
533        let mod_name = crate::oneof::to_snake_case(top_level_name);
534        let mod_ident = crate::message::make_field_ident(&mod_name);
535        if !msg_mod.is_empty() || !view_mod.is_empty() {
536            tokens.extend(quote! {
537                pub mod #mod_ident {
538                    #[allow(unused_imports)]
539                    use super::*;
540                    #msg_mod
541                    #view_mod
542                }
543            });
544        }
545    }
546
547    // Parse the token stream into a syn::File and format with prettyplease.
548    // Regular `//` comments cannot appear inside quote! blocks, so the file
549    // header is prepended as a raw string after formatting.
550    let syntax_tree =
551        syn::parse2::<syn::File>(tokens).map_err(|e| CodeGenError::InvalidSyntax(e.to_string()))?;
552    let formatted = prettyplease::unparse(&syntax_tree);
553
554    let source_line = file
555        .name
556        .as_ref()
557        .map_or(String::new(), |n| format!("// source: {n}\n"));
558
559    Ok(format!(
560        "// @generated by protoc-gen-buffa. DO NOT EDIT.\n{source_line}\n{formatted}"
561    ))
562}
563
564/// Convert a `.proto` file path to a Rust module file name.
565///
566/// e.g., "google/protobuf/timestamp.proto" → "google.protobuf.timestamp.rs"
567/// Convert a proto file path to a generated Rust file name.
568///
569/// e.g., `"google/protobuf/timestamp.proto"` → `"google.protobuf.timestamp.rs"`
570pub fn proto_path_to_rust_module(proto_path: &str) -> String {
571    let without_ext = proto_path.strip_suffix(".proto").unwrap_or(proto_path);
572    format!("{}.rs", without_ext.replace('/', "."))
573}
574
575/// Code generation error.
576#[derive(Debug, Clone, thiserror::Error)]
577#[non_exhaustive]
578pub enum CodeGenError {
579    /// A required field was absent in a descriptor.
580    ///
581    /// The `&'static str` names the missing field for diagnostics.
582    #[error("missing required descriptor field: {0}")]
583    MissingField(&'static str),
584    /// A resolved type path string could not be parsed as a Rust type.
585    #[error("invalid Rust type path: '{0}'")]
586    InvalidTypePath(String),
587    /// The accumulated `TokenStream` failed to parse as valid Rust syntax.
588    #[error("generated code failed to parse as Rust: {0}")]
589    InvalidSyntax(String),
590    /// A requested file was not present in the descriptor set.
591    #[error("file_to_generate '{0}' not found in descriptor set")]
592    FileNotFound(String),
593    /// Unexpected descriptor state (e.g. a map entry or oneof that cannot be
594    /// resolved to a known descriptor field).
595    #[error("codegen error: {0}")]
596    Other(String),
597    /// A proto field name uses the `__buffa_` reserved prefix, which would
598    /// conflict with buffa's internal generated fields.
599    #[error(
600        "reserved field name '{field_name}' in message '{message_name}': \
601             proto field names starting with '__buffa_' conflict with buffa's \
602             internal fields"
603    )]
604    ReservedFieldName {
605        message_name: String,
606        field_name: String,
607    },
608    /// Two sibling messages produce the same Rust module name after
609    /// snake_case conversion (e.g., `HTTPRequest` and `HttpRequest` both
610    /// become `pub mod http_request`).
611    #[error(
612        "module name conflict in '{scope}': messages '{name_a}' and '{name_b}' \
613         both produce module '{module_name}'"
614    )]
615    ModuleNameConflict {
616        scope: String,
617        name_a: String,
618        name_b: String,
619        module_name: String,
620    },
621    /// A nested message/enum name collides with a oneof enum name inside
622    /// the same message module.
623    #[error(
624        "name conflict in '{scope}': nested type '{nested_name}' and \
625         oneof '{oneof_name}' both produce '{rust_name}' in the message module"
626    )]
627    NestedTypeOneofConflict {
628        scope: String,
629        nested_name: String,
630        oneof_name: String,
631        rust_name: String,
632    },
633    /// A message named `FooView` collides with the generated view type for
634    /// message `Foo`.
635    #[error(
636        "name conflict in '{scope}': message '{view_msg}' collides with \
637         the generated view type for message '{owned_msg}'"
638    )]
639    ViewNameConflict {
640        scope: String,
641        owned_msg: String,
642        view_msg: String,
643    },
644}
645
646#[cfg(test)]
647mod tests;