Skip to main content

buffa_build/
lib.rs

1//! Build-time integration for buffa.
2//!
3//! Use this crate in your `build.rs` to compile `.proto` files into Rust code
4//! at build time. Parses `.proto` files into a `FileDescriptorSet` (via
5//! `protoc` or `buf`), then uses `buffa-codegen` to generate Rust source.
6//!
7//! # Example
8//!
9//! ```rust,ignore
10//! // build.rs
11//! fn main() {
12//!     buffa_build::Config::new()
13//!         .files(&["proto/my_service.proto"])
14//!         .includes(&["proto/"])
15//!         .compile()
16//!         .unwrap();
17//! }
18//! ```
19//!
20//! # Requirements
21//!
22//! By default, requires `protoc` on the system PATH (or set via the `PROTOC`
23//! environment variable) — the same as `prost-build` and `tonic-build`.
24//!
25//! If `protoc` is unavailable or outdated on your platform, `buf` can be
26//! used instead — see [`Config::use_buf()`]. Alternatively, feed a
27//! pre-compiled descriptor set via [`Config::descriptor_set()`].
28
29use std::io::Write;
30use std::path::{Path, PathBuf};
31use std::process::Command;
32
33use buffa::Message;
34use buffa_codegen::generated::descriptor::FileDescriptorSet;
35
36use buffa_codegen::CodeGenConfig;
37
38/// How to produce a `FileDescriptorSet` from `.proto` files.
39#[derive(Debug, Clone, Default)]
40enum DescriptorSource {
41    /// Invoke `protoc` (default). Requires `protoc` on PATH or `PROTOC` env var.
42    #[default]
43    Protoc,
44    /// Invoke `buf build --as-file-descriptor-set`. Requires `buf` on PATH.
45    Buf,
46    /// Read a pre-built `FileDescriptorSet` from a file.
47    Precompiled(PathBuf),
48}
49
50/// Builder for configuring and running protobuf compilation.
51pub struct Config {
52    files: Vec<PathBuf>,
53    includes: Vec<PathBuf>,
54    out_dir: Option<PathBuf>,
55    codegen_config: CodeGenConfig,
56    descriptor_source: DescriptorSource,
57    /// If set, generate a module-tree include file with this name in the
58    /// output directory. Users can then `include!` this single file instead
59    /// of manually setting up `pub mod` nesting.
60    include_file: Option<String>,
61}
62
63impl Config {
64    /// Create a new configuration with defaults.
65    pub fn new() -> Self {
66        Self {
67            files: Vec::new(),
68            includes: Vec::new(),
69            out_dir: None,
70            codegen_config: CodeGenConfig::default(),
71            descriptor_source: DescriptorSource::default(),
72            include_file: None,
73        }
74    }
75
76    /// Add `.proto` files to compile.
77    #[must_use]
78    pub fn files(mut self, files: &[impl AsRef<Path>]) -> Self {
79        self.files
80            .extend(files.iter().map(|f| f.as_ref().to_path_buf()));
81        self
82    }
83
84    /// Add include directories for protoc to search for imports.
85    #[must_use]
86    pub fn includes(mut self, includes: &[impl AsRef<Path>]) -> Self {
87        self.includes
88            .extend(includes.iter().map(|i| i.as_ref().to_path_buf()));
89        self
90    }
91
92    /// Set the output directory for generated files.
93    /// Defaults to `$OUT_DIR` if not set.
94    #[must_use]
95    pub fn out_dir(mut self, dir: impl Into<PathBuf>) -> Self {
96        self.out_dir = Some(dir.into());
97        self
98    }
99
100    /// Enable or disable view type generation (default: true).
101    #[must_use]
102    pub fn generate_views(mut self, enabled: bool) -> Self {
103        self.codegen_config.generate_views = enabled;
104        self
105    }
106
107    /// Enable or disable serde Serialize/Deserialize derive generation
108    /// for generated message structs and enum types (default: false).
109    ///
110    /// When enabled, the downstream crate must depend on `serde` and enable
111    /// the `buffa/json` feature for the runtime helpers.
112    #[must_use]
113    pub fn generate_json(mut self, enabled: bool) -> Self {
114        self.codegen_config.generate_json = enabled;
115        self
116    }
117
118    /// Enable or disable `#[derive(arbitrary::Arbitrary)]` on generated
119    /// types (default: false).
120    ///
121    /// The derive is gated behind `#[cfg_attr(feature = "arbitrary", ...)]`
122    /// so the downstream crate compiles with or without the feature enabled.
123    #[must_use]
124    pub fn generate_arbitrary(mut self, enabled: bool) -> Self {
125        self.codegen_config.generate_arbitrary = enabled;
126        self
127    }
128
129    /// Enable or disable unknown field preservation (default: true).
130    ///
131    /// When enabled (the default), unrecognized fields encountered during
132    /// decode are stored and re-emitted on encode — essential for proxy /
133    /// middleware services and round-trip fidelity across schema versions.
134    ///
135    /// **Disabling is primarily a memory optimization** (24 bytes/message for
136    /// the `UnknownFields` Vec header), not a throughput one. When no unknown
137    /// fields appear on the wire — the common case for schema-aligned
138    /// services — decode and encode costs are effectively identical in
139    /// either mode. Consider disabling for embedded / `no_std` targets or
140    /// large in-memory collections of small messages.
141    #[must_use]
142    pub fn preserve_unknown_fields(mut self, enabled: bool) -> Self {
143        self.codegen_config.preserve_unknown_fields = enabled;
144        self
145    }
146
147    /// Honor `features.utf8_validation = NONE` by emitting `Vec<u8>` / `&[u8]`
148    /// for such string fields instead of `String` / `&str` (default: false).
149    ///
150    /// When disabled (the default), all string fields map to `String` and
151    /// UTF-8 is validated on decode — stricter than proto2 requires, but
152    /// ergonomic and safe.
153    ///
154    /// When enabled, string fields with `utf8_validation = NONE` become
155    /// `Vec<u8>` / `&[u8]`. Decode skips validation; the caller chooses
156    /// whether to `std::str::from_utf8` (checked) or `from_utf8_unchecked`
157    /// (trusted-input fast path). This is the only sound Rust mapping when
158    /// strings may actually contain non-UTF-8 bytes.
159    ///
160    /// **Note for proto2 users**: proto2's default is `utf8_validation = NONE`,
161    /// so enabling this turns ALL proto2 string fields into `Vec<u8>`. Use
162    /// only for new code or when profiling identifies UTF-8 validation as a
163    /// bottleneck (it can be 10%+ of decode CPU for string-heavy messages).
164    ///
165    /// **JSON note**: fields normalized to bytes serialize as base64 in JSON
166    /// (the proto3 JSON encoding for `bytes`). Keep strict mapping disabled
167    /// for fields that need JSON string interop with other implementations.
168    #[must_use]
169    pub fn strict_utf8_mapping(mut self, enabled: bool) -> Self {
170        self.codegen_config.strict_utf8_mapping = enabled;
171        self
172    }
173
174    /// Declare an external type path mapping.
175    ///
176    /// Types under the given protobuf path prefix will reference the specified
177    /// Rust module path instead of being generated. This allows shared proto
178    /// packages to be compiled once in a dedicated crate and referenced from
179    /// others.
180    ///
181    /// `proto_path` is a fully-qualified protobuf package path, e.g.,
182    /// `".my.common"` or `"my.common"` (the leading dot is optional and will
183    /// be added automatically). `rust_path` is the Rust module path where
184    /// those types are accessible (e.g., `"::common_protos"`).
185    ///
186    /// # Example
187    ///
188    /// ```rust,ignore
189    /// buffa_build::Config::new()
190    ///     .extern_path(".my.common", "::common_protos")
191    ///     .files(&["proto/my_service.proto"])
192    ///     .includes(&["proto/"])
193    ///     .compile()
194    ///     .unwrap();
195    /// ```
196    #[must_use]
197    pub fn extern_path(
198        mut self,
199        proto_path: impl Into<String>,
200        rust_path: impl Into<String>,
201    ) -> Self {
202        let mut proto_path = proto_path.into();
203        // Normalize: ensure the proto path is fully-qualified (leading dot).
204        // Accept both ".my.package" and "my.package" for convenience.
205        if !proto_path.starts_with('.') {
206            proto_path.insert(0, '.');
207        }
208        self.codegen_config
209            .extern_paths
210            .push((proto_path, rust_path.into()));
211        self
212    }
213
214    /// Configure `bytes` fields to use `bytes::Bytes` instead of `Vec<u8>`.
215    ///
216    /// Each path is a fully-qualified proto path prefix. Use `"."` to apply
217    /// to all bytes fields, or specify individual field paths like
218    /// `".my.pkg.MyMessage.data"`.
219    ///
220    /// # Example
221    ///
222    /// ```rust,ignore
223    /// buffa_build::Config::new()
224    ///     .bytes(&["."])  // all bytes fields use Bytes
225    ///     .files(&["proto/my_service.proto"])
226    ///     .includes(&["proto/"])
227    ///     .compile()
228    ///     .unwrap();
229    /// ```
230    #[must_use]
231    pub fn use_bytes_type_in(mut self, paths: &[impl AsRef<str>]) -> Self {
232        self.codegen_config
233            .bytes_fields
234            .extend(paths.iter().map(|p| p.as_ref().to_string()));
235        self
236    }
237
238    /// Use `bytes::Bytes` for all `bytes` fields in all messages.
239    ///
240    /// This is a convenience for `.use_bytes_type_in(&["."])`. Use `.use_bytes_type_in(&[...])` with
241    /// specific proto paths if you only want `Bytes` for certain fields.
242    #[must_use]
243    pub fn use_bytes_type(mut self) -> Self {
244        self.codegen_config.bytes_fields.push(".".to_string());
245        self
246    }
247
248    /// Use `buf build` instead of `protoc` for descriptor generation.
249    ///
250    /// `buf` is often easier to install and keep current than `protoc`
251    /// (which many distros pin to old versions). This mode is intended for
252    /// the **single-crate case**: a `buf.yaml` at the crate root defining
253    /// the module layout.
254    ///
255    /// Requires `buf` on PATH and a `buf.yaml` at the crate root. The
256    /// [`includes()`](Self::includes) setting is ignored — buf resolves
257    /// imports via its own module configuration.
258    ///
259    /// Each path given to [`files()`](Self::files) must be **relative to its
260    /// owning module's directory** (the `path:` value inside `buf.yaml`), not
261    /// the crate root where `buf.yaml` itself lives. buf strips the module
262    /// path when producing `FileDescriptorProto.name`, so for
263    /// `modules: [{path: proto}]` and a file on disk at
264    /// `proto/api/v1/service.proto`, the descriptor name is
265    /// `api/v1/service.proto` — that is what `.files()` must contain.
266    /// Multiple modules in one `buf.yaml` work fine; buf enforces that
267    /// module-relative names are unique across the workspace.
268    ///
269    /// # Monorepo / multi-module setups
270    ///
271    /// For a workspace-root `buf.yaml` with many modules, this mode is a
272    /// poor fit. Prefer running `buf generate` with the `protoc-gen-buffa`
273    /// plugin and checking in the generated code, or use
274    /// [`descriptor_set()`](Self::descriptor_set) with the output of
275    /// `buf build --as-file-descriptor-set -o fds.binpb <module-path>`
276    /// run as a pre-build step.
277    ///
278    /// # Example
279    ///
280    /// ```rust,ignore
281    /// // buf.yaml (at crate root):
282    /// //   version: v2
283    /// //   modules:
284    /// //     - path: proto
285    /// //
286    /// // build.rs:
287    /// buffa_build::Config::new()
288    ///     .use_buf()
289    ///     .files(&["api/v1/service.proto"])  // relative to module root
290    ///     .compile()
291    ///     .unwrap();
292    /// ```
293    #[must_use]
294    pub fn use_buf(mut self) -> Self {
295        self.descriptor_source = DescriptorSource::Buf;
296        self
297    }
298
299    /// Use a pre-compiled `FileDescriptorSet` binary file as input.
300    ///
301    /// Skips invoking `protoc` or `buf` entirely. The file must contain a
302    /// serialized `google.protobuf.FileDescriptorSet` (as produced by
303    /// `protoc --descriptor_set_out` or `buf build --as-file-descriptor-set`).
304    ///
305    /// When using this, `.files()` specifies which proto files in the
306    /// descriptor set to generate code for (matching by proto file name).
307    #[must_use]
308    pub fn descriptor_set(mut self, path: impl Into<PathBuf>) -> Self {
309        self.descriptor_source = DescriptorSource::Precompiled(path.into());
310        self
311    }
312
313    /// Generate a module-tree include file alongside the per-package `.rs`
314    /// files.
315    ///
316    /// The include file contains nested `pub mod` declarations with
317    /// `include!()` directives that assemble the generated code into a
318    /// module hierarchy matching the protobuf package structure. Users can
319    /// then include this single file instead of manually creating the
320    /// module tree.
321    ///
322    /// The form of the emitted `include!` directives depends on whether
323    /// [`out_dir`](Self::out_dir) was set:
324    ///
325    /// - **Default (`$OUT_DIR`)**: emits
326    ///   `include!(concat!(env!("OUT_DIR"), "/foo.rs"))`, for use from
327    ///   `build.rs` via `include!(concat!(env!("OUT_DIR"), "/<name>"))`.
328    /// - **Explicit `out_dir`**: emits sibling-relative `include!("foo.rs")`,
329    ///   for checking the generated code into the source tree and referencing
330    ///   it as a module (e.g. `mod gen;`).
331    ///
332    /// # Example — `build.rs` / `$OUT_DIR`
333    ///
334    /// ```rust,ignore
335    /// // build.rs
336    /// buffa_build::Config::new()
337    ///     .files(&["proto/my_service.proto"])
338    ///     .includes(&["proto/"])
339    ///     .include_file("_include.rs")
340    ///     .compile()
341    ///     .unwrap();
342    ///
343    /// // lib.rs
344    /// include!(concat!(env!("OUT_DIR"), "/_include.rs"));
345    /// ```
346    ///
347    /// # Example — checked-in source
348    ///
349    /// ```rust,ignore
350    /// // codegen.rs (run manually, not from build.rs)
351    /// buffa_build::Config::new()
352    ///     .files(&["proto/my_service.proto"])
353    ///     .includes(&["proto/"])
354    ///     .out_dir("src/gen")
355    ///     .include_file("mod.rs")
356    ///     .compile()
357    ///     .unwrap();
358    ///
359    /// // lib.rs
360    /// mod gen;
361    /// ```
362    #[must_use]
363    pub fn include_file(mut self, name: impl Into<String>) -> Self {
364        self.include_file = Some(name.into());
365        self
366    }
367
368    /// Compile proto files and generate Rust source.
369    ///
370    /// # Errors
371    ///
372    /// Returns an error if:
373    /// - `OUT_DIR` is not set and no `out_dir` was configured
374    /// - `protoc` or `buf` cannot be found on `PATH` (when using those sources)
375    /// - the proto compiler exits with a non-zero status (syntax errors,
376    ///   missing imports, etc.)
377    /// - a precompiled descriptor set file cannot be read
378    /// - the descriptor set bytes cannot be decoded as a `FileDescriptorSet`
379    /// - code generation fails (e.g. unsupported proto feature)
380    /// - the output directory cannot be created or written to
381    pub fn compile(self) -> Result<(), Box<dyn std::error::Error>> {
382        // When out_dir is explicitly set, the include file should use
383        // relative `include!("foo.rs")` paths (the index is a sibling of the
384        // generated files). When defaulted to $OUT_DIR, keep the
385        // `concat!(env!("OUT_DIR"), ...)` form so that
386        // `include!(concat!(env!("OUT_DIR"), "/_include.rs"))` from src/
387        // still resolves to absolute paths.
388        let relative_includes = self.out_dir.is_some();
389        let out_dir = self
390            .out_dir
391            .or_else(|| std::env::var("OUT_DIR").ok().map(PathBuf::from))
392            .ok_or("OUT_DIR not set and no out_dir configured")?;
393
394        // Produce a FileDescriptorSet from the configured source.
395        let descriptor_bytes = match &self.descriptor_source {
396            DescriptorSource::Protoc => invoke_protoc(&self.files, &self.includes)?,
397            DescriptorSource::Buf => invoke_buf()?,
398            DescriptorSource::Precompiled(path) => std::fs::read(path).map_err(|e| {
399                format!("failed to read descriptor set '{}': {}", path.display(), e)
400            })?,
401        };
402        let fds = FileDescriptorSet::decode_from_slice(&descriptor_bytes)
403            .map_err(|e| format!("failed to decode FileDescriptorSet: {}", e))?;
404
405        // Determine which files were explicitly requested.
406        //
407        // `FileDescriptorProto.name` contains the path relative to the proto
408        // source root (protoc: `--proto_path`; buf: the module root). For
409        // Precompiled and Buf mode, `.files()` are expected to already be
410        // proto-relative names. For Protoc mode, strip the longest matching
411        // include prefix.
412        let files_to_generate: Vec<String> = if matches!(
413            self.descriptor_source,
414            DescriptorSource::Precompiled(_) | DescriptorSource::Buf
415        ) {
416            self.files
417                .iter()
418                .filter_map(|f| f.to_str().map(str::to_string))
419                .collect()
420        } else {
421            self.files
422                .iter()
423                .map(|f| proto_relative_name(f, &self.includes))
424                .filter(|s| !s.is_empty())
425                .collect()
426        };
427
428        // Generate Rust source.
429        let generated =
430            buffa_codegen::generate(&fds.file, &files_to_generate, &self.codegen_config)?;
431
432        // Build a map from generated file name to proto package for the
433        // module tree generator.
434        let file_to_package: std::collections::HashMap<String, String> = fds
435            .file
436            .iter()
437            .map(|fd| {
438                let proto_name = fd.name.as_deref().unwrap_or("");
439                let rs_name = buffa_codegen::proto_path_to_rust_module(proto_name);
440                let package = fd.package.as_deref().unwrap_or("").to_string();
441                (rs_name, package)
442            })
443            .collect();
444
445        // Write output files and collect (name, package) pairs.
446        let mut output_entries: Vec<(String, String)> = Vec::new();
447        for file in generated {
448            let path = out_dir.join(&file.name);
449            if let Some(parent) = path.parent() {
450                std::fs::create_dir_all(parent)?;
451            }
452            let mut f = std::fs::File::create(&path)?;
453            f.write_all(file.content.as_bytes())?;
454            let package = file_to_package.get(&file.name).cloned().unwrap_or_default();
455            output_entries.push((file.name, package));
456        }
457
458        // Generate the include file if requested.
459        if let Some(ref include_name) = self.include_file {
460            let include_content = generate_include_file(&output_entries, relative_includes);
461            let include_path = out_dir.join(include_name);
462            let mut f = std::fs::File::create(&include_path)?;
463            f.write_all(include_content.as_bytes())?;
464        }
465
466        // Tell cargo to re-run if any proto file changes.
467        //
468        // For Buf mode, `self.files` are module-root-relative and cargo can't
469        // stat them — use `buf ls-files` instead, which lists all workspace
470        // protos with workspace-relative paths. This also catches changes to
471        // transitively-imported protos (a gap in the Protoc mode, which only
472        // watches explicitly-listed files).
473        match self.descriptor_source {
474            DescriptorSource::Buf => emit_buf_rerun_if_changed(),
475            DescriptorSource::Protoc => {
476                // Rerun if PROTOC changes (different binary may accept
477                // protos the previous one rejected, e.g. newer editions).
478                println!("cargo:rerun-if-env-changed=PROTOC");
479                for proto_file in &self.files {
480                    println!("cargo:rerun-if-changed={}", proto_file.display());
481                }
482            }
483            DescriptorSource::Precompiled(ref path) => {
484                println!("cargo:rerun-if-changed={}", path.display());
485            }
486        }
487
488        Ok(())
489    }
490}
491
492impl Default for Config {
493    fn default() -> Self {
494        Self::new()
495    }
496}
497
498/// Invoke `protoc` to produce a `FileDescriptorSet` (serialized bytes).
499fn invoke_protoc(
500    files: &[PathBuf],
501    includes: &[PathBuf],
502) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
503    let protoc = std::env::var("PROTOC").unwrap_or_else(|_| "protoc".to_string());
504
505    let descriptor_file =
506        tempfile::NamedTempFile::new().map_err(|e| format!("failed to create temp file: {}", e))?;
507    let descriptor_path = descriptor_file.path().to_path_buf();
508
509    let mut cmd = Command::new(&protoc);
510    cmd.arg("--include_imports");
511    cmd.arg(format!(
512        "--descriptor_set_out={}",
513        descriptor_path.display()
514    ));
515
516    for include in includes {
517        cmd.arg(format!("--proto_path={}", include.display()));
518    }
519
520    for file in files {
521        cmd.arg(file.as_os_str());
522    }
523
524    let output = cmd
525        .output()
526        .map_err(|e| format!("failed to run protoc ({}): {}", protoc, e))?;
527
528    if !output.status.success() {
529        let stderr = String::from_utf8_lossy(&output.stderr);
530        return Err(format!("protoc failed: {}", stderr).into());
531    }
532
533    let bytes = std::fs::read(&descriptor_path)
534        .map_err(|e| format!("failed to read descriptor set: {}", e))?;
535
536    Ok(bytes)
537}
538
539/// Invoke `buf build` to produce a `FileDescriptorSet` (serialized bytes).
540///
541/// Requires a `buf.yaml` discoverable from the build script's cwd. Builds
542/// the entire workspace — no `--path` filtering, because buf's `--path` flag
543/// expects workspace-relative paths while `FileDescriptorProto.name` is
544/// module-root-relative; passing user paths to both would be a contradiction.
545/// Codegen filtering happens on our side via `files_to_generate` matching.
546fn invoke_buf() -> Result<Vec<u8>, Box<dyn std::error::Error>> {
547    let output = Command::new("buf")
548        .arg("build")
549        .arg("--as-file-descriptor-set")
550        .arg("-o")
551        .arg("-")
552        .output()
553        .map_err(|e| format!("failed to run buf (is it installed and on PATH?): {e}"))?;
554
555    if !output.status.success() {
556        let stderr = String::from_utf8_lossy(&output.stderr);
557        return Err(
558            format!("buf build failed (is buf.yaml present at crate root?): {stderr}").into(),
559        );
560    }
561
562    Ok(output.stdout)
563}
564
565/// Emit `cargo:rerun-if-changed` directives for a buf workspace.
566///
567/// Runs `buf ls-files` to discover all proto files with workspace-relative
568/// paths (which cargo can stat). Also watches `buf.yaml` and `buf.lock`
569/// (the latter only if it exists — cargo treats a missing rerun-if-changed
570/// path as always-dirty). Failure is non-fatal: worst case cargo reruns
571/// every build.
572fn emit_buf_rerun_if_changed() {
573    println!("cargo:rerun-if-changed=buf.yaml");
574    if Path::new("buf.lock").exists() {
575        println!("cargo:rerun-if-changed=buf.lock");
576    }
577    match Command::new("buf").arg("ls-files").output() {
578        Ok(out) if out.status.success() => {
579            for line in String::from_utf8_lossy(&out.stdout).lines() {
580                let path = line.trim();
581                if !path.is_empty() {
582                    println!("cargo:rerun-if-changed={path}");
583                }
584            }
585        }
586        _ => {
587            // ls-files failed; cargo already knows about buf.yaml above.
588            // If buf itself is missing, invoke_buf() will error clearly.
589        }
590    }
591}
592
593/// Convert a filesystem proto path to the name protoc uses in the descriptor.
594///
595/// `FileDescriptorProto.name` is relative to the `--proto_path` include
596/// directory. This strips the longest matching include prefix; if no include
597/// matches, returns the path as-is (not just file_name — that would break
598/// nested proto directories).
599fn proto_relative_name(file: &Path, includes: &[PathBuf]) -> String {
600    // Longest prefix wins: a file under both "proto/" and "proto/vendor/"
601    // should strip "proto/vendor/" for a correct relative name.
602    let mut best: Option<&Path> = None;
603    for include in includes {
604        if let Ok(rel) = file.strip_prefix(include) {
605            match best {
606                Some(prev) if prev.as_os_str().len() <= rel.as_os_str().len() => {}
607                _ => best = Some(rel),
608            }
609        }
610    }
611    best.unwrap_or(file).to_str().unwrap_or("").to_string()
612}
613
614/// Generate the content of an include file that assembles generated `.rs`
615/// files into a nested module tree matching the protobuf package hierarchy.
616///
617/// Each generated file is named like `my.package.file_name.rs`. The package
618/// segments become `pub mod` wrappers, and the file is `include!`d inside
619/// the innermost module.
620///
621/// For example, files `["foo.bar.rs", "foo.baz.rs"]` produce:
622/// ```text
623/// pub mod foo {
624///     #[allow(unused_imports)]
625///     use super::*;
626///     include!(concat!(env!("OUT_DIR"), "/foo.bar.rs"));
627///     include!(concat!(env!("OUT_DIR"), "/foo.baz.rs"));
628/// }
629/// ```
630///
631/// When `relative` is true (the caller set [`Config::out_dir`] explicitly),
632/// `include!` directives use bare sibling paths (`include!("foo.bar.rs")`)
633/// instead of the `env!("OUT_DIR")` prefix, so the include file works when
634/// checked into the source tree and referenced via `mod`.
635fn generate_include_file(entries: &[(String, String)], relative: bool) -> String {
636    use std::collections::BTreeMap;
637    use std::fmt::Write;
638
639    fn escape_mod_name(name: &str) -> String {
640        const KEYWORDS: &[&str] = &[
641            "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn",
642            "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref",
643            "return", "self", "Self", "static", "struct", "super", "trait", "true", "type",
644            "unsafe", "use", "where", "while", "async", "await", "dyn", "gen", "abstract",
645            "become", "box", "do", "final", "macro", "override", "priv", "try", "typeof",
646            "unsized", "virtual", "yield",
647        ];
648        if KEYWORDS.contains(&name) {
649            if matches!(name, "self" | "super" | "Self" | "crate") {
650                format!("{name}_")
651            } else {
652                format!("r#{name}")
653            }
654        } else {
655            name.to_string()
656        }
657    }
658
659    #[derive(Default)]
660    struct ModNode {
661        files: Vec<String>,
662        children: BTreeMap<String, ModNode>,
663    }
664
665    let mut root = ModNode::default();
666    for (file_name, package) in entries {
667        let pkg_parts: Vec<&str> = if package.is_empty() {
668            vec![]
669        } else {
670            package.split('.').collect()
671        };
672        let mut node = &mut root;
673        for seg in &pkg_parts {
674            node = node.children.entry(seg.to_string()).or_default();
675        }
676        node.files.push(file_name.clone());
677    }
678
679    let mut out = String::new();
680    writeln!(out, "// @generated by buffa-build. DO NOT EDIT.").unwrap();
681    writeln!(out).unwrap();
682
683    fn emit(out: &mut String, node: &ModNode, depth: usize, relative: bool) {
684        let indent = "    ".repeat(depth);
685        for file in &node.files {
686            if relative {
687                writeln!(out, r#"{indent}include!("{file}");"#).unwrap();
688            } else {
689                writeln!(
690                    out,
691                    r#"{indent}include!(concat!(env!("OUT_DIR"), "/{file}"));"#
692                )
693                .unwrap();
694            }
695        }
696        for (name, child) in &node.children {
697            let escaped = escape_mod_name(name);
698            writeln!(
699                out,
700                "{indent}#[allow(non_camel_case_types, dead_code, unused_imports, \
701                 clippy::derivable_impls, clippy::match_single_binding)]"
702            )
703            .unwrap();
704            writeln!(out, "{indent}pub mod {escaped} {{").unwrap();
705            writeln!(out, "{indent}    use super::*;").unwrap();
706            emit(out, child, depth + 1, relative);
707            writeln!(out, "{indent}}}").unwrap();
708        }
709    }
710
711    emit(&mut out, &root, 0, relative);
712    out
713}
714
715#[cfg(test)]
716mod tests {
717    use super::*;
718
719    #[test]
720    fn proto_relative_name_strips_include() {
721        let got = proto_relative_name(
722            Path::new("proto/my/service.proto"),
723            &[PathBuf::from("proto/")],
724        );
725        assert_eq!(got, "my/service.proto");
726    }
727
728    #[test]
729    fn proto_relative_name_longest_prefix_wins() {
730        // Overlapping includes: file under both proto/ and proto/vendor/.
731        // Must strip the LONGER prefix for the correct relative name.
732        let got = proto_relative_name(
733            Path::new("proto/vendor/ext.proto"),
734            &[PathBuf::from("proto/"), PathBuf::from("proto/vendor/")],
735        );
736        assert_eq!(got, "ext.proto");
737        // Same with reversed include order.
738        let got = proto_relative_name(
739            Path::new("proto/vendor/ext.proto"),
740            &[PathBuf::from("proto/vendor/"), PathBuf::from("proto/")],
741        );
742        assert_eq!(got, "ext.proto");
743    }
744
745    #[test]
746    fn proto_relative_name_no_match_returns_full_path() {
747        // Regression: previously fell back to file_name(), which stripped
748        // directory components and broke descriptor_set() mode with nested
749        // proto packages. Now returns the full path as-is.
750        let got = proto_relative_name(Path::new("my/pkg/service.proto"), &[]);
751        assert_eq!(got, "my/pkg/service.proto");
752    }
753
754    #[test]
755    fn proto_relative_name_no_match_with_unrelated_includes() {
756        let got = proto_relative_name(
757            Path::new("src/my.proto"),
758            &[PathBuf::from("other/"), PathBuf::from("third/")],
759        );
760        assert_eq!(got, "src/my.proto");
761    }
762
763    #[test]
764    fn include_file_out_dir_mode_uses_env_var() {
765        let entries = vec![
766            ("foo.bar.rs".to_string(), "foo".to_string()),
767            ("root.rs".to_string(), String::new()),
768        ];
769        let out = generate_include_file(&entries, false);
770        assert!(
771            out.contains(r#"include!(concat!(env!("OUT_DIR"), "/foo.bar.rs"));"#),
772            "nested-package file should use env!(OUT_DIR): {out}"
773        );
774        assert!(
775            out.contains(r#"include!(concat!(env!("OUT_DIR"), "/root.rs"));"#),
776            "empty-package file should use env!(OUT_DIR): {out}"
777        );
778        assert!(!out.contains(r#"include!("foo.bar.rs")"#));
779    }
780
781    #[test]
782    fn include_file_relative_mode_uses_sibling_paths() {
783        let entries = vec![
784            ("foo.bar.rs".to_string(), "foo".to_string()),
785            ("root.rs".to_string(), String::new()),
786        ];
787        let out = generate_include_file(&entries, true);
788        assert!(
789            out.contains(r#"include!("foo.bar.rs");"#),
790            "nested-package file should use relative path: {out}"
791        );
792        assert!(
793            out.contains(r#"include!("root.rs");"#),
794            "empty-package file should use relative path: {out}"
795        );
796        assert!(
797            !out.contains("OUT_DIR"),
798            "relative mode must not reference OUT_DIR: {out}"
799        );
800    }
801
802    #[test]
803    fn include_file_relative_mode_nested_packages() {
804        // Two files in the same depth-2 package: verifies the relative flag
805        // propagates through recursive emit() calls and both files land in
806        // the same innermost mod.
807        let entries = vec![
808            ("a.b.one.rs".to_string(), "a.b".to_string()),
809            ("a.b.two.rs".to_string(), "a.b".to_string()),
810        ];
811        let out = generate_include_file(&entries, true);
812        // Both includes should appear once, at the same depth-2 indent,
813        // inside a single `pub mod b { ... }`.
814        let indent = "        "; // depth 2 = 8 spaces
815        assert!(
816            out.contains(&format!(r#"{indent}include!("a.b.one.rs");"#)),
817            "first file at depth 2: {out}"
818        );
819        assert!(
820            out.contains(&format!(r#"{indent}include!("a.b.two.rs");"#)),
821            "second file at depth 2: {out}"
822        );
823        assert_eq!(
824            out.matches("pub mod b {").count(),
825            1,
826            "both files share one `mod b`: {out}"
827        );
828        assert!(!out.contains("OUT_DIR"));
829    }
830}