Skip to main content

buffa_build/
lib.rs

1//! Build-time integration for buffa.
2//!
3//! Use this crate in your `build.rs` to compile `.proto` files into Rust code
4//! at build time. Parses `.proto` files into a `FileDescriptorSet` (via
5//! `protoc` or `buf`), then uses `buffa-codegen` to generate Rust source.
6//!
7//! # Example
8//!
9//! ```rust,ignore
10//! // build.rs
11//! fn main() {
12//!     buffa_build::Config::new()
13//!         .files(&["proto/my_service.proto"])
14//!         .includes(&["proto/"])
15//!         .compile()
16//!         .unwrap();
17//! }
18//! ```
19//!
20//! # Requirements
21//!
22//! By default, requires `protoc` on the system PATH (or set via the `PROTOC`
23//! environment variable) — the same as `prost-build` and `tonic-build`.
24//!
25//! If `protoc` is unavailable or outdated on your platform, `buf` can be
26//! used instead — see [`Config::use_buf()`]. Alternatively, feed a
27//! pre-compiled descriptor set via [`Config::descriptor_set()`].
28
29use std::path::{Path, PathBuf};
30use std::process::Command;
31
32use buffa::Message;
33use buffa_codegen::generated::descriptor::FileDescriptorSet;
34
35use buffa_codegen::CodeGenConfig;
36
37/// How to produce a `FileDescriptorSet` from `.proto` files.
38#[derive(Debug, Clone, Default)]
39enum DescriptorSource {
40    /// Invoke `protoc` (default). Requires `protoc` on PATH or `PROTOC` env var.
41    #[default]
42    Protoc,
43    /// Invoke `buf build --as-file-descriptor-set`. Requires `buf` on PATH.
44    Buf,
45    /// Read a pre-built `FileDescriptorSet` from a file.
46    Precompiled(PathBuf),
47}
48
49/// Builder for configuring and running protobuf compilation.
50pub struct Config {
51    files: Vec<PathBuf>,
52    includes: Vec<PathBuf>,
53    out_dir: Option<PathBuf>,
54    codegen_config: CodeGenConfig,
55    descriptor_source: DescriptorSource,
56    /// If set, generate a module-tree include file with this name in the
57    /// output directory. Users can then `include!` this single file instead
58    /// of manually setting up `pub mod` nesting.
59    include_file: Option<String>,
60}
61
62impl Config {
63    /// Create a new configuration with defaults.
64    pub fn new() -> Self {
65        Self {
66            files: Vec::new(),
67            includes: Vec::new(),
68            out_dir: None,
69            codegen_config: CodeGenConfig::default(),
70            descriptor_source: DescriptorSource::default(),
71            include_file: None,
72        }
73    }
74
75    /// Add `.proto` files to compile.
76    #[must_use]
77    pub fn files(mut self, files: &[impl AsRef<Path>]) -> Self {
78        self.files
79            .extend(files.iter().map(|f| f.as_ref().to_path_buf()));
80        self
81    }
82
83    /// Add include directories for protoc to search for imports.
84    #[must_use]
85    pub fn includes(mut self, includes: &[impl AsRef<Path>]) -> Self {
86        self.includes
87            .extend(includes.iter().map(|i| i.as_ref().to_path_buf()));
88        self
89    }
90
91    /// Set the output directory for generated files.
92    /// Defaults to `$OUT_DIR` if not set.
93    #[must_use]
94    pub fn out_dir(mut self, dir: impl Into<PathBuf>) -> Self {
95        self.out_dir = Some(dir.into());
96        self
97    }
98
99    /// Enable or disable view type generation (default: true).
100    #[must_use]
101    pub fn generate_views(mut self, enabled: bool) -> Self {
102        self.codegen_config.generate_views = enabled;
103        self
104    }
105
106    /// Enable or disable serde Serialize/Deserialize derive generation
107    /// for generated message structs and enum types (default: false).
108    ///
109    /// When enabled, the downstream crate must depend on `serde` and enable
110    /// the `buffa/json` feature for the runtime helpers.
111    #[must_use]
112    pub fn generate_json(mut self, enabled: bool) -> Self {
113        self.codegen_config.generate_json = enabled;
114        self
115    }
116
117    /// Enable or disable `impl buffa::text::TextFormat` on generated message
118    /// structs (default: false).
119    ///
120    /// When enabled, the downstream crate must enable the `buffa/text`
121    /// feature for the runtime textproto encoder/decoder.
122    #[must_use]
123    pub fn generate_text(mut self, enabled: bool) -> Self {
124        self.codegen_config.generate_text = enabled;
125        self
126    }
127
128    /// Enable or disable `#[derive(arbitrary::Arbitrary)]` on generated
129    /// types (default: false).
130    ///
131    /// The derive is gated behind `#[cfg_attr(feature = "arbitrary", ...)]`
132    /// so the downstream crate compiles with or without the feature enabled.
133    #[must_use]
134    pub fn generate_arbitrary(mut self, enabled: bool) -> Self {
135        self.codegen_config.generate_arbitrary = enabled;
136        self
137    }
138
139    /// Enable or disable unknown field preservation (default: true).
140    ///
141    /// When enabled (the default), unrecognized fields encountered during
142    /// decode are stored and re-emitted on encode — essential for proxy /
143    /// middleware services and round-trip fidelity across schema versions.
144    ///
145    /// **Disabling is primarily a memory optimization** (24 bytes/message for
146    /// the `UnknownFields` Vec header), not a throughput one. When no unknown
147    /// fields appear on the wire — the common case for schema-aligned
148    /// services — decode and encode costs are effectively identical in
149    /// either mode. Consider disabling for embedded / `no_std` targets or
150    /// large in-memory collections of small messages.
151    #[must_use]
152    pub fn preserve_unknown_fields(mut self, enabled: bool) -> Self {
153        self.codegen_config.preserve_unknown_fields = enabled;
154        self
155    }
156
157    /// Honor `features.utf8_validation = NONE` by emitting `Vec<u8>` / `&[u8]`
158    /// for such string fields instead of `String` / `&str` (default: false).
159    ///
160    /// When disabled (the default), all string fields map to `String` and
161    /// UTF-8 is validated on decode — stricter than proto2 requires, but
162    /// ergonomic and safe.
163    ///
164    /// When enabled, string fields with `utf8_validation = NONE` become
165    /// `Vec<u8>` / `&[u8]`. Decode skips validation; the caller chooses
166    /// whether to `std::str::from_utf8` (checked) or `from_utf8_unchecked`
167    /// (trusted-input fast path). This is the only sound Rust mapping when
168    /// strings may actually contain non-UTF-8 bytes.
169    ///
170    /// **Note for proto2 users**: proto2's default is `utf8_validation = NONE`,
171    /// so enabling this turns ALL proto2 string fields into `Vec<u8>`. Use
172    /// only for new code or when profiling identifies UTF-8 validation as a
173    /// bottleneck (it can be 10%+ of decode CPU for string-heavy messages).
174    ///
175    /// **JSON note**: fields normalized to bytes serialize as base64 in JSON
176    /// (the proto3 JSON encoding for `bytes`). Keep strict mapping disabled
177    /// for fields that need JSON string interop with other implementations.
178    #[must_use]
179    pub fn strict_utf8_mapping(mut self, enabled: bool) -> Self {
180        self.codegen_config.strict_utf8_mapping = enabled;
181        self
182    }
183
184    /// Permit `option message_set_wire_format = true` on input messages.
185    ///
186    /// MessageSet is a legacy Google-internal wire format. Default: `false`
187    /// (such messages produce a codegen error). Set to `true` only when
188    /// compiling protos that interoperate with old Google-internal services.
189    #[must_use]
190    pub fn allow_message_set(mut self, enabled: bool) -> Self {
191        self.codegen_config.allow_message_set = enabled;
192        self
193    }
194
195    /// Declare an external type path mapping.
196    ///
197    /// Types under the given protobuf path prefix will reference the specified
198    /// Rust module path instead of being generated. This allows shared proto
199    /// packages to be compiled once in a dedicated crate and referenced from
200    /// others.
201    ///
202    /// `proto_path` is a fully-qualified protobuf package path, e.g.,
203    /// `".my.common"` or `"my.common"` (the leading dot is optional and will
204    /// be added automatically). `rust_path` is the Rust module path where
205    /// those types are accessible (e.g., `"::common_protos"`).
206    ///
207    /// # Example
208    ///
209    /// ```rust,ignore
210    /// buffa_build::Config::new()
211    ///     .extern_path(".my.common", "::common_protos")
212    ///     .files(&["proto/my_service.proto"])
213    ///     .includes(&["proto/"])
214    ///     .compile()
215    ///     .unwrap();
216    /// ```
217    #[must_use]
218    pub fn extern_path(
219        mut self,
220        proto_path: impl Into<String>,
221        rust_path: impl Into<String>,
222    ) -> Self {
223        let mut proto_path = proto_path.into();
224        // Normalize: ensure the proto path is fully-qualified (leading dot).
225        // Accept both ".my.package" and "my.package" for convenience.
226        if !proto_path.starts_with('.') {
227            proto_path.insert(0, '.');
228        }
229        self.codegen_config
230            .extern_paths
231            .push((proto_path, rust_path.into()));
232        self
233    }
234
235    /// Configure `bytes` fields to use `bytes::Bytes` instead of `Vec<u8>`.
236    ///
237    /// Each path is a fully-qualified proto path prefix. Use `"."` to apply
238    /// to all bytes fields, or specify individual field paths like
239    /// `".my.pkg.MyMessage.data"`.
240    ///
241    /// # Example
242    ///
243    /// ```rust,ignore
244    /// buffa_build::Config::new()
245    ///     .bytes(&["."])  // all bytes fields use Bytes
246    ///     .files(&["proto/my_service.proto"])
247    ///     .includes(&["proto/"])
248    ///     .compile()
249    ///     .unwrap();
250    /// ```
251    #[must_use]
252    pub fn use_bytes_type_in(mut self, paths: &[impl AsRef<str>]) -> Self {
253        self.codegen_config
254            .bytes_fields
255            .extend(paths.iter().map(|p| p.as_ref().to_string()));
256        self
257    }
258
259    /// Use `bytes::Bytes` for all `bytes` fields in all messages.
260    ///
261    /// This is a convenience for `.use_bytes_type_in(&["."])`. Use `.use_bytes_type_in(&[...])` with
262    /// specific proto paths if you only want `Bytes` for certain fields.
263    #[must_use]
264    pub fn use_bytes_type(mut self) -> Self {
265        self.codegen_config.bytes_fields.push(".".to_string());
266        self
267    }
268
269    /// Use `buf build` instead of `protoc` for descriptor generation.
270    ///
271    /// `buf` is often easier to install and keep current than `protoc`
272    /// (which many distros pin to old versions). This mode is intended for
273    /// the **single-crate case**: a `buf.yaml` at the crate root defining
274    /// the module layout.
275    ///
276    /// Requires `buf` on PATH and a `buf.yaml` at the crate root. The
277    /// [`includes()`](Self::includes) setting is ignored — buf resolves
278    /// imports via its own module configuration.
279    ///
280    /// Each path given to [`files()`](Self::files) must be **relative to its
281    /// owning module's directory** (the `path:` value inside `buf.yaml`), not
282    /// the crate root where `buf.yaml` itself lives. buf strips the module
283    /// path when producing `FileDescriptorProto.name`, so for
284    /// `modules: [{path: proto}]` and a file on disk at
285    /// `proto/api/v1/service.proto`, the descriptor name is
286    /// `api/v1/service.proto` — that is what `.files()` must contain.
287    /// Multiple modules in one `buf.yaml` work fine; buf enforces that
288    /// module-relative names are unique across the workspace.
289    ///
290    /// # Monorepo / multi-module setups
291    ///
292    /// For a workspace-root `buf.yaml` with many modules, this mode is a
293    /// poor fit. Prefer running `buf generate` with the `protoc-gen-buffa`
294    /// plugin and checking in the generated code, or use
295    /// [`descriptor_set()`](Self::descriptor_set) with the output of
296    /// `buf build --as-file-descriptor-set -o fds.binpb <module-path>`
297    /// run as a pre-build step.
298    ///
299    /// # Example
300    ///
301    /// ```rust,ignore
302    /// // buf.yaml (at crate root):
303    /// //   version: v2
304    /// //   modules:
305    /// //     - path: proto
306    /// //
307    /// // build.rs:
308    /// buffa_build::Config::new()
309    ///     .use_buf()
310    ///     .files(&["api/v1/service.proto"])  // relative to module root
311    ///     .compile()
312    ///     .unwrap();
313    /// ```
314    #[must_use]
315    pub fn use_buf(mut self) -> Self {
316        self.descriptor_source = DescriptorSource::Buf;
317        self
318    }
319
320    /// Use a pre-compiled `FileDescriptorSet` binary file as input.
321    ///
322    /// Skips invoking `protoc` or `buf` entirely. The file must contain a
323    /// serialized `google.protobuf.FileDescriptorSet` (as produced by
324    /// `protoc --descriptor_set_out` or `buf build --as-file-descriptor-set`).
325    ///
326    /// When using this, `.files()` specifies which proto files in the
327    /// descriptor set to generate code for (matching by proto file name).
328    #[must_use]
329    pub fn descriptor_set(mut self, path: impl Into<PathBuf>) -> Self {
330        self.descriptor_source = DescriptorSource::Precompiled(path.into());
331        self
332    }
333
334    /// Generate a module-tree include file alongside the per-package `.rs`
335    /// files.
336    ///
337    /// The include file contains nested `pub mod` declarations with
338    /// `include!()` directives that assemble the generated code into a
339    /// module hierarchy matching the protobuf package structure. Users can
340    /// then include this single file instead of manually creating the
341    /// module tree.
342    ///
343    /// The form of the emitted `include!` directives depends on whether
344    /// [`out_dir`](Self::out_dir) was set:
345    ///
346    /// - **Default (`$OUT_DIR`)**: emits
347    ///   `include!(concat!(env!("OUT_DIR"), "/foo.rs"))`, for use from
348    ///   `build.rs` via `include!(concat!(env!("OUT_DIR"), "/<name>"))`.
349    /// - **Explicit `out_dir`**: emits sibling-relative `include!("foo.rs")`,
350    ///   for checking the generated code into the source tree and referencing
351    ///   it as a module (e.g. `mod gen;`).
352    ///
353    /// # Example — `build.rs` / `$OUT_DIR`
354    ///
355    /// ```rust,ignore
356    /// // build.rs
357    /// buffa_build::Config::new()
358    ///     .files(&["proto/my_service.proto"])
359    ///     .includes(&["proto/"])
360    ///     .include_file("_include.rs")
361    ///     .compile()
362    ///     .unwrap();
363    ///
364    /// // lib.rs
365    /// include!(concat!(env!("OUT_DIR"), "/_include.rs"));
366    /// ```
367    ///
368    /// # Example — checked-in source
369    ///
370    /// ```rust,ignore
371    /// // codegen.rs (run manually, not from build.rs)
372    /// buffa_build::Config::new()
373    ///     .files(&["proto/my_service.proto"])
374    ///     .includes(&["proto/"])
375    ///     .out_dir("src/gen")
376    ///     .include_file("mod.rs")
377    ///     .compile()
378    ///     .unwrap();
379    ///
380    /// // lib.rs
381    /// mod gen;
382    /// ```
383    #[must_use]
384    pub fn include_file(mut self, name: impl Into<String>) -> Self {
385        self.include_file = Some(name.into());
386        self
387    }
388
389    /// Compile proto files and generate Rust source.
390    ///
391    /// # Errors
392    ///
393    /// Returns an error if:
394    /// - `OUT_DIR` is not set and no `out_dir` was configured
395    /// - `protoc` or `buf` cannot be found on `PATH` (when using those sources)
396    /// - the proto compiler exits with a non-zero status (syntax errors,
397    ///   missing imports, etc.)
398    /// - a precompiled descriptor set file cannot be read
399    /// - the descriptor set bytes cannot be decoded as a `FileDescriptorSet`
400    /// - code generation fails (e.g. unsupported proto feature)
401    /// - the output directory cannot be created or written to
402    pub fn compile(self) -> Result<(), Box<dyn std::error::Error>> {
403        // When out_dir is explicitly set, the include file should use
404        // relative `include!("foo.rs")` paths (the index is a sibling of the
405        // generated files). When defaulted to $OUT_DIR, keep the
406        // `concat!(env!("OUT_DIR"), ...)` form so that
407        // `include!(concat!(env!("OUT_DIR"), "/_include.rs"))` from src/
408        // still resolves to absolute paths.
409        let relative_includes = self.out_dir.is_some();
410        let out_dir = self
411            .out_dir
412            .or_else(|| std::env::var("OUT_DIR").ok().map(PathBuf::from))
413            .ok_or("OUT_DIR not set and no out_dir configured")?;
414
415        // Produce a FileDescriptorSet from the configured source.
416        let descriptor_bytes = match &self.descriptor_source {
417            DescriptorSource::Protoc => invoke_protoc(&self.files, &self.includes)?,
418            DescriptorSource::Buf => invoke_buf()?,
419            DescriptorSource::Precompiled(path) => std::fs::read(path).map_err(|e| {
420                format!("failed to read descriptor set '{}': {}", path.display(), e)
421            })?,
422        };
423        let fds = FileDescriptorSet::decode_from_slice(&descriptor_bytes)
424            .map_err(|e| format!("failed to decode FileDescriptorSet: {}", e))?;
425
426        // Determine which files were explicitly requested.
427        //
428        // `FileDescriptorProto.name` contains the path relative to the proto
429        // source root (protoc: `--proto_path`; buf: the module root). For
430        // Precompiled and Buf mode, `.files()` are expected to already be
431        // proto-relative names. For Protoc mode, strip the longest matching
432        // include prefix.
433        let files_to_generate: Vec<String> = if matches!(
434            self.descriptor_source,
435            DescriptorSource::Precompiled(_) | DescriptorSource::Buf
436        ) {
437            self.files
438                .iter()
439                .filter_map(|f| f.to_str().map(str::to_string))
440                .collect()
441        } else {
442            self.files
443                .iter()
444                .map(|f| proto_relative_name(f, &self.includes))
445                .filter(|s| !s.is_empty())
446                .collect()
447        };
448
449        // Generate Rust source.
450        let generated =
451            buffa_codegen::generate(&fds.file, &files_to_generate, &self.codegen_config)?;
452
453        // Build a map from generated file name to proto package for the
454        // module tree generator.
455        let file_to_package: std::collections::HashMap<String, String> = fds
456            .file
457            .iter()
458            .map(|fd| {
459                let proto_name = fd.name.as_deref().unwrap_or("");
460                let rs_name = buffa_codegen::proto_path_to_rust_module(proto_name);
461                let package = fd.package.as_deref().unwrap_or("").to_string();
462                (rs_name, package)
463            })
464            .collect();
465
466        // Write output files and collect (name, package) pairs.
467        let mut output_entries: Vec<(String, String)> = Vec::new();
468        for file in generated {
469            let path = out_dir.join(&file.name);
470            if let Some(parent) = path.parent() {
471                std::fs::create_dir_all(parent)?;
472            }
473            write_if_changed(&path, file.content.as_bytes())?;
474            let package = file_to_package.get(&file.name).cloned().unwrap_or_default();
475            output_entries.push((file.name, package));
476        }
477
478        // Generate the include file if requested.
479        if let Some(ref include_name) = self.include_file {
480            let include_content = generate_include_file(&output_entries, relative_includes);
481            let include_path = out_dir.join(include_name);
482            write_if_changed(&include_path, include_content.as_bytes())?;
483        }
484
485        // Tell cargo to re-run if any proto file changes.
486        //
487        // For Buf mode, `self.files` are module-root-relative and cargo can't
488        // stat them — use `buf ls-files` instead, which lists all workspace
489        // protos with workspace-relative paths. This also catches changes to
490        // transitively-imported protos (a gap in the Protoc mode, which only
491        // watches explicitly-listed files).
492        match self.descriptor_source {
493            DescriptorSource::Buf => emit_buf_rerun_if_changed(),
494            DescriptorSource::Protoc => {
495                // Rerun if PROTOC changes (different binary may accept
496                // protos the previous one rejected, e.g. newer editions).
497                println!("cargo:rerun-if-env-changed=PROTOC");
498                for proto_file in &self.files {
499                    println!("cargo:rerun-if-changed={}", proto_file.display());
500                }
501            }
502            DescriptorSource::Precompiled(ref path) => {
503                println!("cargo:rerun-if-changed={}", path.display());
504            }
505        }
506
507        Ok(())
508    }
509}
510
511impl Default for Config {
512    fn default() -> Self {
513        Self::new()
514    }
515}
516
517/// Write `content` to `path` only if the file doesn't already exist with
518/// identical content. Avoids bumping timestamps on unchanged files, which
519/// prevents unnecessary downstream recompilation.
520fn write_if_changed(path: &Path, content: &[u8]) -> std::io::Result<()> {
521    if let Ok(existing) = std::fs::read(path) {
522        if existing == content {
523            return Ok(());
524        }
525    }
526    std::fs::write(path, content)
527}
528
529/// Invoke `protoc` to produce a `FileDescriptorSet` (serialized bytes).
530fn invoke_protoc(
531    files: &[PathBuf],
532    includes: &[PathBuf],
533) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
534    let protoc = std::env::var("PROTOC").unwrap_or_else(|_| "protoc".to_string());
535
536    let descriptor_file =
537        tempfile::NamedTempFile::new().map_err(|e| format!("failed to create temp file: {}", e))?;
538    let descriptor_path = descriptor_file.path().to_path_buf();
539
540    let mut cmd = Command::new(&protoc);
541    cmd.arg("--include_imports");
542    cmd.arg("--include_source_info");
543    cmd.arg(format!(
544        "--descriptor_set_out={}",
545        descriptor_path.display()
546    ));
547
548    for include in includes {
549        cmd.arg(format!("--proto_path={}", include.display()));
550    }
551
552    for file in files {
553        cmd.arg(file.as_os_str());
554    }
555
556    let output = cmd
557        .output()
558        .map_err(|e| format!("failed to run protoc ({}): {}", protoc, e))?;
559
560    if !output.status.success() {
561        let stderr = String::from_utf8_lossy(&output.stderr);
562        return Err(format!("protoc failed: {}", stderr).into());
563    }
564
565    let bytes = std::fs::read(&descriptor_path)
566        .map_err(|e| format!("failed to read descriptor set: {}", e))?;
567
568    Ok(bytes)
569}
570
571/// Invoke `buf build` to produce a `FileDescriptorSet` (serialized bytes).
572///
573/// Requires a `buf.yaml` discoverable from the build script's cwd. Builds
574/// the entire workspace — no `--path` filtering, because buf's `--path` flag
575/// expects workspace-relative paths while `FileDescriptorProto.name` is
576/// module-root-relative; passing user paths to both would be a contradiction.
577/// Codegen filtering happens on our side via `files_to_generate` matching.
578fn invoke_buf() -> Result<Vec<u8>, Box<dyn std::error::Error>> {
579    // buf build includes SourceCodeInfo by default (there's an
580    // --exclude-source-info flag to disable it), so proto comments
581    // propagate to generated code without an explicit opt-in here.
582    let output = Command::new("buf")
583        .arg("build")
584        .arg("--as-file-descriptor-set")
585        .arg("-o")
586        .arg("-")
587        .output()
588        .map_err(|e| format!("failed to run buf (is it installed and on PATH?): {e}"))?;
589
590    if !output.status.success() {
591        let stderr = String::from_utf8_lossy(&output.stderr);
592        return Err(
593            format!("buf build failed (is buf.yaml present at crate root?): {stderr}").into(),
594        );
595    }
596
597    Ok(output.stdout)
598}
599
600/// Emit `cargo:rerun-if-changed` directives for a buf workspace.
601///
602/// Runs `buf ls-files` to discover all proto files with workspace-relative
603/// paths (which cargo can stat). Also watches `buf.yaml` and `buf.lock`
604/// (the latter only if it exists — cargo treats a missing rerun-if-changed
605/// path as always-dirty). Failure is non-fatal: worst case cargo reruns
606/// every build.
607fn emit_buf_rerun_if_changed() {
608    println!("cargo:rerun-if-changed=buf.yaml");
609    if Path::new("buf.lock").exists() {
610        println!("cargo:rerun-if-changed=buf.lock");
611    }
612    match Command::new("buf").arg("ls-files").output() {
613        Ok(out) if out.status.success() => {
614            for line in String::from_utf8_lossy(&out.stdout).lines() {
615                let path = line.trim();
616                if !path.is_empty() {
617                    println!("cargo:rerun-if-changed={path}");
618                }
619            }
620        }
621        _ => {
622            // ls-files failed; cargo already knows about buf.yaml above.
623            // If buf itself is missing, invoke_buf() will error clearly.
624        }
625    }
626}
627
628/// Convert a filesystem proto path to the name protoc uses in the descriptor.
629///
630/// `FileDescriptorProto.name` is relative to the `--proto_path` include
631/// directory. This strips the longest matching include prefix; if no include
632/// matches, returns the path as-is (not just file_name — that would break
633/// nested proto directories).
634fn proto_relative_name(file: &Path, includes: &[PathBuf]) -> String {
635    // Longest prefix wins: a file under both "proto/" and "proto/vendor/"
636    // should strip "proto/vendor/" for a correct relative name.
637    let mut best: Option<&Path> = None;
638    for include in includes {
639        if let Ok(rel) = file.strip_prefix(include) {
640            match best {
641                Some(prev) if prev.as_os_str().len() <= rel.as_os_str().len() => {}
642                _ => best = Some(rel),
643            }
644        }
645    }
646    best.unwrap_or(file).to_str().unwrap_or("").to_string()
647}
648
649/// Generate the content of an include file that assembles generated `.rs`
650/// files into a nested module tree matching the protobuf package hierarchy.
651///
652/// Each generated file is named like `my.package.file_name.rs`. The package
653/// segments become `pub mod` wrappers, and the file is `include!`d inside
654/// the innermost module.
655///
656/// For example, files `["foo.bar.rs", "foo.baz.rs"]` produce:
657/// ```text
658/// pub mod foo {
659///     #[allow(unused_imports)]
660///     use super::*;
661///     include!(concat!(env!("OUT_DIR"), "/foo.bar.rs"));
662///     include!(concat!(env!("OUT_DIR"), "/foo.baz.rs"));
663/// }
664/// ```
665///
666/// When `relative` is true (the caller set [`Config::out_dir`] explicitly),
667/// `include!` directives use bare sibling paths (`include!("foo.bar.rs")`)
668/// instead of the `env!("OUT_DIR")` prefix, so the include file works when
669/// checked into the source tree and referenced via `mod`.
670fn generate_include_file(entries: &[(String, String)], relative: bool) -> String {
671    use std::collections::BTreeMap;
672    use std::fmt::Write;
673
674    fn escape_mod_name(name: &str) -> String {
675        const KEYWORDS: &[&str] = &[
676            "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn",
677            "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref",
678            "return", "self", "Self", "static", "struct", "super", "trait", "true", "type",
679            "unsafe", "use", "where", "while", "async", "await", "dyn", "gen", "abstract",
680            "become", "box", "do", "final", "macro", "override", "priv", "try", "typeof",
681            "unsized", "virtual", "yield",
682        ];
683        if KEYWORDS.contains(&name) {
684            if matches!(name, "self" | "super" | "Self" | "crate") {
685                format!("{name}_")
686            } else {
687                format!("r#{name}")
688            }
689        } else {
690            name.to_string()
691        }
692    }
693
694    #[derive(Default)]
695    struct ModNode {
696        files: Vec<String>,
697        children: BTreeMap<String, Self>,
698    }
699
700    let mut root = ModNode::default();
701    for (file_name, package) in entries {
702        let pkg_parts: Vec<&str> = if package.is_empty() {
703            vec![]
704        } else {
705            package.split('.').collect()
706        };
707        let mut node = &mut root;
708        for seg in &pkg_parts {
709            node = node.children.entry(seg.to_string()).or_default();
710        }
711        node.files.push(file_name.clone());
712    }
713
714    let mut out = String::new();
715    writeln!(out, "// @generated by buffa-build. DO NOT EDIT.").unwrap();
716    writeln!(out).unwrap();
717
718    fn emit(out: &mut String, node: &ModNode, depth: usize, relative: bool) {
719        let indent = "    ".repeat(depth);
720        for file in &node.files {
721            if relative {
722                writeln!(out, r#"{indent}include!("{file}");"#).unwrap();
723            } else {
724                writeln!(
725                    out,
726                    r#"{indent}include!(concat!(env!("OUT_DIR"), "/{file}"));"#
727                )
728                .unwrap();
729            }
730        }
731        for (name, child) in &node.children {
732            let escaped = escape_mod_name(name);
733            writeln!(
734                out,
735                "{indent}#[allow(non_camel_case_types, dead_code, unused_imports, \
736                 clippy::derivable_impls, clippy::match_single_binding)]"
737            )
738            .unwrap();
739            writeln!(out, "{indent}pub mod {escaped} {{").unwrap();
740            writeln!(out, "{indent}    use super::*;").unwrap();
741            emit(out, child, depth + 1, relative);
742            writeln!(out, "{indent}}}").unwrap();
743        }
744    }
745
746    emit(&mut out, &root, 0, relative);
747    out
748}
749
750#[cfg(test)]
751mod tests {
752    use super::*;
753
754    #[test]
755    fn proto_relative_name_strips_include() {
756        let got = proto_relative_name(
757            Path::new("proto/my/service.proto"),
758            &[PathBuf::from("proto/")],
759        );
760        assert_eq!(got, "my/service.proto");
761    }
762
763    #[test]
764    fn proto_relative_name_longest_prefix_wins() {
765        // Overlapping includes: file under both proto/ and proto/vendor/.
766        // Must strip the LONGER prefix for the correct relative name.
767        let got = proto_relative_name(
768            Path::new("proto/vendor/ext.proto"),
769            &[PathBuf::from("proto/"), PathBuf::from("proto/vendor/")],
770        );
771        assert_eq!(got, "ext.proto");
772        // Same with reversed include order.
773        let got = proto_relative_name(
774            Path::new("proto/vendor/ext.proto"),
775            &[PathBuf::from("proto/vendor/"), PathBuf::from("proto/")],
776        );
777        assert_eq!(got, "ext.proto");
778    }
779
780    #[test]
781    fn proto_relative_name_no_match_returns_full_path() {
782        // Regression: previously fell back to file_name(), which stripped
783        // directory components and broke descriptor_set() mode with nested
784        // proto packages. Now returns the full path as-is.
785        let got = proto_relative_name(Path::new("my/pkg/service.proto"), &[]);
786        assert_eq!(got, "my/pkg/service.proto");
787    }
788
789    #[test]
790    fn proto_relative_name_no_match_with_unrelated_includes() {
791        let got = proto_relative_name(
792            Path::new("src/my.proto"),
793            &[PathBuf::from("other/"), PathBuf::from("third/")],
794        );
795        assert_eq!(got, "src/my.proto");
796    }
797
798    #[test]
799    fn include_file_out_dir_mode_uses_env_var() {
800        let entries = vec![
801            ("foo.bar.rs".to_string(), "foo".to_string()),
802            ("root.rs".to_string(), String::new()),
803        ];
804        let out = generate_include_file(&entries, false);
805        assert!(
806            out.contains(r#"include!(concat!(env!("OUT_DIR"), "/foo.bar.rs"));"#),
807            "nested-package file should use env!(OUT_DIR): {out}"
808        );
809        assert!(
810            out.contains(r#"include!(concat!(env!("OUT_DIR"), "/root.rs"));"#),
811            "empty-package file should use env!(OUT_DIR): {out}"
812        );
813        assert!(!out.contains(r#"include!("foo.bar.rs")"#));
814    }
815
816    #[test]
817    fn include_file_relative_mode_uses_sibling_paths() {
818        let entries = vec![
819            ("foo.bar.rs".to_string(), "foo".to_string()),
820            ("root.rs".to_string(), String::new()),
821        ];
822        let out = generate_include_file(&entries, true);
823        assert!(
824            out.contains(r#"include!("foo.bar.rs");"#),
825            "nested-package file should use relative path: {out}"
826        );
827        assert!(
828            out.contains(r#"include!("root.rs");"#),
829            "empty-package file should use relative path: {out}"
830        );
831        assert!(
832            !out.contains("OUT_DIR"),
833            "relative mode must not reference OUT_DIR: {out}"
834        );
835    }
836
837    #[test]
838    fn include_file_relative_mode_nested_packages() {
839        // Two files in the same depth-2 package: verifies the relative flag
840        // propagates through recursive emit() calls and both files land in
841        // the same innermost mod.
842        let entries = vec![
843            ("a.b.one.rs".to_string(), "a.b".to_string()),
844            ("a.b.two.rs".to_string(), "a.b".to_string()),
845        ];
846        let out = generate_include_file(&entries, true);
847        // Both includes should appear once, at the same depth-2 indent,
848        // inside a single `pub mod b { ... }`.
849        let indent = "        "; // depth 2 = 8 spaces
850        assert!(
851            out.contains(&format!(r#"{indent}include!("a.b.one.rs");"#)),
852            "first file at depth 2: {out}"
853        );
854        assert!(
855            out.contains(&format!(r#"{indent}include!("a.b.two.rs");"#)),
856            "second file at depth 2: {out}"
857        );
858        assert_eq!(
859            out.matches("pub mod b {").count(),
860            1,
861            "both files share one `mod b`: {out}"
862        );
863        assert!(!out.contains("OUT_DIR"));
864    }
865
866    #[test]
867    fn write_if_changed_creates_new_file() {
868        let dir = tempfile::tempdir().unwrap();
869        let path = dir.path().join("new.rs");
870        write_if_changed(&path, b"hello").unwrap();
871        assert_eq!(std::fs::read(&path).unwrap(), b"hello");
872    }
873
874    #[test]
875    fn write_if_changed_skips_identical_content() {
876        let dir = tempfile::tempdir().unwrap();
877        let path = dir.path().join("same.rs");
878        std::fs::write(&path, b"content").unwrap();
879        let mtime_before = std::fs::metadata(&path).unwrap().modified().unwrap();
880
881        // Sleep briefly so any write would produce a different mtime.
882        std::thread::sleep(std::time::Duration::from_millis(50));
883
884        write_if_changed(&path, b"content").unwrap();
885        let mtime_after = std::fs::metadata(&path).unwrap().modified().unwrap();
886        assert_eq!(mtime_before, mtime_after);
887    }
888
889    #[test]
890    fn write_if_changed_overwrites_different_content() {
891        let dir = tempfile::tempdir().unwrap();
892        let path = dir.path().join("changed.rs");
893        std::fs::write(&path, b"old").unwrap();
894
895        write_if_changed(&path, b"new").unwrap();
896        assert_eq!(std::fs::read(&path).unwrap(), b"new");
897    }
898}