buffa_build/lib.rs
1//! Build-time integration for buffa.
2//!
3//! Use this crate in your `build.rs` to compile `.proto` files into Rust code
4//! at build time. Parses `.proto` files into a `FileDescriptorSet` (via
5//! `protoc` or `buf`), then uses `buffa-codegen` to generate Rust source.
6//!
7//! # Example
8//!
9//! ```rust,ignore
10//! // build.rs
11//! fn main() {
12//! buffa_build::Config::new()
13//! .files(&["proto/my_service.proto"])
14//! .includes(&["proto/"])
15//! .compile()
16//! .unwrap();
17//! }
18//! ```
19//!
20//! # Requirements
21//!
22//! By default, requires `protoc` on the system PATH (or set via the `PROTOC`
23//! environment variable) — the same as `prost-build` and `tonic-build`.
24//!
25//! If `protoc` is unavailable or outdated on your platform, `buf` can be
26//! used instead — see [`Config::use_buf()`]. Alternatively, feed a
27//! pre-compiled descriptor set via [`Config::descriptor_set()`].
28
29use std::path::{Path, PathBuf};
30use std::process::Command;
31
32use buffa::Message;
33use buffa_codegen::generated::descriptor::FileDescriptorSet;
34
35use buffa_codegen::CodeGenConfig;
36
37/// How to produce a `FileDescriptorSet` from `.proto` files.
38#[derive(Debug, Clone, Default)]
39enum DescriptorSource {
40 /// Invoke `protoc` (default). Requires `protoc` on PATH or `PROTOC` env var.
41 #[default]
42 Protoc,
43 /// Invoke `buf build --as-file-descriptor-set`. Requires `buf` on PATH.
44 Buf,
45 /// Read a pre-built `FileDescriptorSet` from a file.
46 Precompiled(PathBuf),
47}
48
49/// Builder for configuring and running protobuf compilation.
50pub struct Config {
51 files: Vec<PathBuf>,
52 includes: Vec<PathBuf>,
53 out_dir: Option<PathBuf>,
54 codegen_config: CodeGenConfig,
55 descriptor_source: DescriptorSource,
56 /// If set, generate a module-tree include file with this name in the
57 /// output directory. Users can then `include!` this single file instead
58 /// of manually setting up `pub mod` nesting.
59 include_file: Option<String>,
60}
61
62impl Config {
63 /// Create a new configuration with defaults.
64 pub fn new() -> Self {
65 Self {
66 files: Vec::new(),
67 includes: Vec::new(),
68 out_dir: None,
69 codegen_config: CodeGenConfig::default(),
70 descriptor_source: DescriptorSource::default(),
71 include_file: None,
72 }
73 }
74
75 /// Add `.proto` files to compile.
76 #[must_use]
77 pub fn files(mut self, files: &[impl AsRef<Path>]) -> Self {
78 self.files
79 .extend(files.iter().map(|f| f.as_ref().to_path_buf()));
80 self
81 }
82
83 /// Add include directories for protoc to search for imports.
84 #[must_use]
85 pub fn includes(mut self, includes: &[impl AsRef<Path>]) -> Self {
86 self.includes
87 .extend(includes.iter().map(|i| i.as_ref().to_path_buf()));
88 self
89 }
90
91 /// Set the output directory for generated files.
92 /// Defaults to `$OUT_DIR` if not set.
93 #[must_use]
94 pub fn out_dir(mut self, dir: impl Into<PathBuf>) -> Self {
95 self.out_dir = Some(dir.into());
96 self
97 }
98
99 /// Enable or disable view type generation (default: true).
100 #[must_use]
101 pub fn generate_views(mut self, enabled: bool) -> Self {
102 self.codegen_config.generate_views = enabled;
103 self
104 }
105
106 /// Enable or disable serde Serialize/Deserialize derive generation
107 /// for generated message structs and enum types (default: false).
108 ///
109 /// When enabled, the downstream crate must depend on `serde` and enable
110 /// the `buffa/json` feature for the runtime helpers.
111 #[must_use]
112 pub fn generate_json(mut self, enabled: bool) -> Self {
113 self.codegen_config.generate_json = enabled;
114 self
115 }
116
117 /// Enable or disable `impl buffa::text::TextFormat` on generated message
118 /// structs (default: false).
119 ///
120 /// When enabled, the downstream crate must enable the `buffa/text`
121 /// feature for the runtime textproto encoder/decoder.
122 #[must_use]
123 pub fn generate_text(mut self, enabled: bool) -> Self {
124 self.codegen_config.generate_text = enabled;
125 self
126 }
127
128 /// Enable or disable `#[derive(arbitrary::Arbitrary)]` on generated
129 /// types (default: false).
130 ///
131 /// The derive is gated behind `#[cfg_attr(feature = "arbitrary", ...)]`
132 /// so the downstream crate compiles with or without the feature enabled.
133 #[must_use]
134 pub fn generate_arbitrary(mut self, enabled: bool) -> Self {
135 self.codegen_config.generate_arbitrary = enabled;
136 self
137 }
138
139 /// Enable or disable unknown field preservation (default: true).
140 ///
141 /// When enabled (the default), unrecognized fields encountered during
142 /// decode are stored and re-emitted on encode — essential for proxy /
143 /// middleware services and round-trip fidelity across schema versions.
144 ///
145 /// **Disabling is primarily a memory optimization** (24 bytes/message for
146 /// the `UnknownFields` Vec header), not a throughput one. When no unknown
147 /// fields appear on the wire — the common case for schema-aligned
148 /// services — decode and encode costs are effectively identical in
149 /// either mode. Consider disabling for embedded / `no_std` targets or
150 /// large in-memory collections of small messages.
151 #[must_use]
152 pub fn preserve_unknown_fields(mut self, enabled: bool) -> Self {
153 self.codegen_config.preserve_unknown_fields = enabled;
154 self
155 }
156
157 /// Honor `features.utf8_validation = NONE` by emitting `Vec<u8>` / `&[u8]`
158 /// for such string fields instead of `String` / `&str` (default: false).
159 ///
160 /// When disabled (the default), all string fields map to `String` and
161 /// UTF-8 is validated on decode — stricter than proto2 requires, but
162 /// ergonomic and safe.
163 ///
164 /// When enabled, string fields with `utf8_validation = NONE` become
165 /// `Vec<u8>` / `&[u8]`. Decode skips validation; the caller chooses
166 /// whether to `std::str::from_utf8` (checked) or `from_utf8_unchecked`
167 /// (trusted-input fast path). This is the only sound Rust mapping when
168 /// strings may actually contain non-UTF-8 bytes.
169 ///
170 /// **Note for proto2 users**: proto2's default is `utf8_validation = NONE`,
171 /// so enabling this turns ALL proto2 string fields into `Vec<u8>`. Use
172 /// only for new code or when profiling identifies UTF-8 validation as a
173 /// bottleneck (it can be 10%+ of decode CPU for string-heavy messages).
174 ///
175 /// **JSON note**: fields normalized to bytes serialize as base64 in JSON
176 /// (the proto3 JSON encoding for `bytes`). Keep strict mapping disabled
177 /// for fields that need JSON string interop with other implementations.
178 #[must_use]
179 pub fn strict_utf8_mapping(mut self, enabled: bool) -> Self {
180 self.codegen_config.strict_utf8_mapping = enabled;
181 self
182 }
183
184 /// Permit `option message_set_wire_format = true` on input messages.
185 ///
186 /// MessageSet is a legacy Google-internal wire format. Default: `false`
187 /// (such messages produce a codegen error). Set to `true` only when
188 /// compiling protos that interoperate with old Google-internal services.
189 #[must_use]
190 pub fn allow_message_set(mut self, enabled: bool) -> Self {
191 self.codegen_config.allow_message_set = enabled;
192 self
193 }
194
195 /// Declare an external type path mapping.
196 ///
197 /// Types under the given protobuf path prefix will reference the specified
198 /// Rust module path instead of being generated. This allows shared proto
199 /// packages to be compiled once in a dedicated crate and referenced from
200 /// others.
201 ///
202 /// `proto_path` is a fully-qualified protobuf package path, e.g.,
203 /// `".my.common"` or `"my.common"` (the leading dot is optional and will
204 /// be added automatically). `rust_path` is the Rust module path where
205 /// those types are accessible (e.g., `"::common_protos"`).
206 ///
207 /// # Example
208 ///
209 /// ```rust,ignore
210 /// buffa_build::Config::new()
211 /// .extern_path(".my.common", "::common_protos")
212 /// .files(&["proto/my_service.proto"])
213 /// .includes(&["proto/"])
214 /// .compile()
215 /// .unwrap();
216 /// ```
217 #[must_use]
218 pub fn extern_path(
219 mut self,
220 proto_path: impl Into<String>,
221 rust_path: impl Into<String>,
222 ) -> Self {
223 let mut proto_path = proto_path.into();
224 // Normalize: ensure the proto path is fully-qualified (leading dot).
225 // Accept both ".my.package" and "my.package" for convenience.
226 if !proto_path.starts_with('.') {
227 proto_path.insert(0, '.');
228 }
229 self.codegen_config
230 .extern_paths
231 .push((proto_path, rust_path.into()));
232 self
233 }
234
235 /// Configure `bytes` fields to use `bytes::Bytes` instead of `Vec<u8>`.
236 ///
237 /// Each path is a fully-qualified proto path prefix. Use `"."` to apply
238 /// to all bytes fields, or specify individual field paths like
239 /// `".my.pkg.MyMessage.data"`.
240 ///
241 /// # Example
242 ///
243 /// ```rust,ignore
244 /// buffa_build::Config::new()
245 /// .bytes(&["."]) // all bytes fields use Bytes
246 /// .files(&["proto/my_service.proto"])
247 /// .includes(&["proto/"])
248 /// .compile()
249 /// .unwrap();
250 /// ```
251 #[must_use]
252 pub fn use_bytes_type_in(mut self, paths: &[impl AsRef<str>]) -> Self {
253 self.codegen_config
254 .bytes_fields
255 .extend(paths.iter().map(|p| p.as_ref().to_string()));
256 self
257 }
258
259 /// Use `bytes::Bytes` for all `bytes` fields in all messages.
260 ///
261 /// This is a convenience for `.use_bytes_type_in(&["."])`. Use `.use_bytes_type_in(&[...])` with
262 /// specific proto paths if you only want `Bytes` for certain fields.
263 #[must_use]
264 pub fn use_bytes_type(mut self) -> Self {
265 self.codegen_config.bytes_fields.push(".".to_string());
266 self
267 }
268
269 /// Use `buf build` instead of `protoc` for descriptor generation.
270 ///
271 /// `buf` is often easier to install and keep current than `protoc`
272 /// (which many distros pin to old versions). This mode is intended for
273 /// the **single-crate case**: a `buf.yaml` at the crate root defining
274 /// the module layout.
275 ///
276 /// Requires `buf` on PATH and a `buf.yaml` at the crate root. The
277 /// [`includes()`](Self::includes) setting is ignored — buf resolves
278 /// imports via its own module configuration.
279 ///
280 /// Each path given to [`files()`](Self::files) must be **relative to its
281 /// owning module's directory** (the `path:` value inside `buf.yaml`), not
282 /// the crate root where `buf.yaml` itself lives. buf strips the module
283 /// path when producing `FileDescriptorProto.name`, so for
284 /// `modules: [{path: proto}]` and a file on disk at
285 /// `proto/api/v1/service.proto`, the descriptor name is
286 /// `api/v1/service.proto` — that is what `.files()` must contain.
287 /// Multiple modules in one `buf.yaml` work fine; buf enforces that
288 /// module-relative names are unique across the workspace.
289 ///
290 /// # Monorepo / multi-module setups
291 ///
292 /// For a workspace-root `buf.yaml` with many modules, this mode is a
293 /// poor fit. Prefer running `buf generate` with the `protoc-gen-buffa`
294 /// plugin and checking in the generated code, or use
295 /// [`descriptor_set()`](Self::descriptor_set) with the output of
296 /// `buf build --as-file-descriptor-set -o fds.binpb <module-path>`
297 /// run as a pre-build step.
298 ///
299 /// # Example
300 ///
301 /// ```rust,ignore
302 /// // buf.yaml (at crate root):
303 /// // version: v2
304 /// // modules:
305 /// // - path: proto
306 /// //
307 /// // build.rs:
308 /// buffa_build::Config::new()
309 /// .use_buf()
310 /// .files(&["api/v1/service.proto"]) // relative to module root
311 /// .compile()
312 /// .unwrap();
313 /// ```
314 #[must_use]
315 pub fn use_buf(mut self) -> Self {
316 self.descriptor_source = DescriptorSource::Buf;
317 self
318 }
319
320 /// Use a pre-compiled `FileDescriptorSet` binary file as input.
321 ///
322 /// Skips invoking `protoc` or `buf` entirely. The file must contain a
323 /// serialized `google.protobuf.FileDescriptorSet` (as produced by
324 /// `protoc --descriptor_set_out` or `buf build --as-file-descriptor-set`).
325 ///
326 /// When using this, `.files()` specifies which proto files in the
327 /// descriptor set to generate code for (matching by proto file name).
328 #[must_use]
329 pub fn descriptor_set(mut self, path: impl Into<PathBuf>) -> Self {
330 self.descriptor_source = DescriptorSource::Precompiled(path.into());
331 self
332 }
333
334 /// Generate a module-tree include file alongside the per-package `.rs`
335 /// files.
336 ///
337 /// The include file contains nested `pub mod` declarations with
338 /// `include!()` directives that assemble the generated code into a
339 /// module hierarchy matching the protobuf package structure. Users can
340 /// then include this single file instead of manually creating the
341 /// module tree.
342 ///
343 /// The form of the emitted `include!` directives depends on whether
344 /// [`out_dir`](Self::out_dir) was set:
345 ///
346 /// - **Default (`$OUT_DIR`)**: emits
347 /// `include!(concat!(env!("OUT_DIR"), "/foo.rs"))`, for use from
348 /// `build.rs` via `include!(concat!(env!("OUT_DIR"), "/<name>"))`.
349 /// - **Explicit `out_dir`**: emits sibling-relative `include!("foo.rs")`,
350 /// for checking the generated code into the source tree and referencing
351 /// it as a module (e.g. `mod gen;`).
352 ///
353 /// # Example — `build.rs` / `$OUT_DIR`
354 ///
355 /// ```rust,ignore
356 /// // build.rs
357 /// buffa_build::Config::new()
358 /// .files(&["proto/my_service.proto"])
359 /// .includes(&["proto/"])
360 /// .include_file("_include.rs")
361 /// .compile()
362 /// .unwrap();
363 ///
364 /// // lib.rs
365 /// include!(concat!(env!("OUT_DIR"), "/_include.rs"));
366 /// ```
367 ///
368 /// # Example — checked-in source
369 ///
370 /// ```rust,ignore
371 /// // codegen.rs (run manually, not from build.rs)
372 /// buffa_build::Config::new()
373 /// .files(&["proto/my_service.proto"])
374 /// .includes(&["proto/"])
375 /// .out_dir("src/gen")
376 /// .include_file("mod.rs")
377 /// .compile()
378 /// .unwrap();
379 ///
380 /// // lib.rs
381 /// mod gen;
382 /// ```
383 #[must_use]
384 pub fn include_file(mut self, name: impl Into<String>) -> Self {
385 self.include_file = Some(name.into());
386 self
387 }
388
389 /// Compile proto files and generate Rust source.
390 ///
391 /// # Errors
392 ///
393 /// Returns an error if:
394 /// - `OUT_DIR` is not set and no `out_dir` was configured
395 /// - `protoc` or `buf` cannot be found on `PATH` (when using those sources)
396 /// - the proto compiler exits with a non-zero status (syntax errors,
397 /// missing imports, etc.)
398 /// - a precompiled descriptor set file cannot be read
399 /// - the descriptor set bytes cannot be decoded as a `FileDescriptorSet`
400 /// - code generation fails (e.g. unsupported proto feature)
401 /// - the output directory cannot be created or written to
402 pub fn compile(self) -> Result<(), Box<dyn std::error::Error>> {
403 // When out_dir is explicitly set, the include file should use
404 // relative `include!("foo.rs")` paths (the index is a sibling of the
405 // generated files). When defaulted to $OUT_DIR, keep the
406 // `concat!(env!("OUT_DIR"), ...)` form so that
407 // `include!(concat!(env!("OUT_DIR"), "/_include.rs"))` from src/
408 // still resolves to absolute paths.
409 let relative_includes = self.out_dir.is_some();
410 let out_dir = self
411 .out_dir
412 .or_else(|| std::env::var("OUT_DIR").ok().map(PathBuf::from))
413 .ok_or("OUT_DIR not set and no out_dir configured")?;
414
415 // Produce a FileDescriptorSet from the configured source.
416 let descriptor_bytes = match &self.descriptor_source {
417 DescriptorSource::Protoc => invoke_protoc(&self.files, &self.includes)?,
418 DescriptorSource::Buf => invoke_buf()?,
419 DescriptorSource::Precompiled(path) => std::fs::read(path).map_err(|e| {
420 format!("failed to read descriptor set '{}': {}", path.display(), e)
421 })?,
422 };
423 let fds = FileDescriptorSet::decode_from_slice(&descriptor_bytes)
424 .map_err(|e| format!("failed to decode FileDescriptorSet: {}", e))?;
425
426 // Determine which files were explicitly requested.
427 //
428 // `FileDescriptorProto.name` contains the path relative to the proto
429 // source root (protoc: `--proto_path`; buf: the module root). For
430 // Precompiled and Buf mode, `.files()` are expected to already be
431 // proto-relative names. For Protoc mode, strip the longest matching
432 // include prefix.
433 let files_to_generate: Vec<String> = if matches!(
434 self.descriptor_source,
435 DescriptorSource::Precompiled(_) | DescriptorSource::Buf
436 ) {
437 self.files
438 .iter()
439 .filter_map(|f| f.to_str().map(str::to_string))
440 .collect()
441 } else {
442 self.files
443 .iter()
444 .map(|f| proto_relative_name(f, &self.includes))
445 .filter(|s| !s.is_empty())
446 .collect()
447 };
448
449 // Generate Rust source.
450 let generated =
451 buffa_codegen::generate(&fds.file, &files_to_generate, &self.codegen_config)?;
452
453 // Build a map from generated file name to proto package for the
454 // module tree generator.
455 let file_to_package: std::collections::HashMap<String, String> = fds
456 .file
457 .iter()
458 .map(|fd| {
459 let proto_name = fd.name.as_deref().unwrap_or("");
460 let rs_name = buffa_codegen::proto_path_to_rust_module(proto_name);
461 let package = fd.package.as_deref().unwrap_or("").to_string();
462 (rs_name, package)
463 })
464 .collect();
465
466 // Write output files and collect (name, package) pairs.
467 let mut output_entries: Vec<(String, String)> = Vec::new();
468 for file in generated {
469 let path = out_dir.join(&file.name);
470 if let Some(parent) = path.parent() {
471 std::fs::create_dir_all(parent)?;
472 }
473 write_if_changed(&path, file.content.as_bytes())?;
474 let package = file_to_package.get(&file.name).cloned().unwrap_or_default();
475 output_entries.push((file.name, package));
476 }
477
478 // Generate the include file if requested.
479 if let Some(ref include_name) = self.include_file {
480 let include_content = generate_include_file(&output_entries, relative_includes);
481 let include_path = out_dir.join(include_name);
482 write_if_changed(&include_path, include_content.as_bytes())?;
483 }
484
485 // Tell cargo to re-run if any proto file changes.
486 //
487 // For Buf mode, `self.files` are module-root-relative and cargo can't
488 // stat them — use `buf ls-files` instead, which lists all workspace
489 // protos with workspace-relative paths. This also catches changes to
490 // transitively-imported protos (a gap in the Protoc mode, which only
491 // watches explicitly-listed files).
492 match self.descriptor_source {
493 DescriptorSource::Buf => emit_buf_rerun_if_changed(),
494 DescriptorSource::Protoc => {
495 // Rerun if PROTOC changes (different binary may accept
496 // protos the previous one rejected, e.g. newer editions).
497 println!("cargo:rerun-if-env-changed=PROTOC");
498 for proto_file in &self.files {
499 println!("cargo:rerun-if-changed={}", proto_file.display());
500 }
501 }
502 DescriptorSource::Precompiled(ref path) => {
503 println!("cargo:rerun-if-changed={}", path.display());
504 }
505 }
506
507 Ok(())
508 }
509}
510
511impl Default for Config {
512 fn default() -> Self {
513 Self::new()
514 }
515}
516
517/// Write `content` to `path` only if the file doesn't already exist with
518/// identical content. Avoids bumping timestamps on unchanged files, which
519/// prevents unnecessary downstream recompilation.
520fn write_if_changed(path: &Path, content: &[u8]) -> std::io::Result<()> {
521 if let Ok(existing) = std::fs::read(path) {
522 if existing == content {
523 return Ok(());
524 }
525 }
526 std::fs::write(path, content)
527}
528
529/// Invoke `protoc` to produce a `FileDescriptorSet` (serialized bytes).
530fn invoke_protoc(
531 files: &[PathBuf],
532 includes: &[PathBuf],
533) -> Result<Vec<u8>, Box<dyn std::error::Error>> {
534 let protoc = std::env::var("PROTOC").unwrap_or_else(|_| "protoc".to_string());
535
536 let descriptor_file =
537 tempfile::NamedTempFile::new().map_err(|e| format!("failed to create temp file: {}", e))?;
538 let descriptor_path = descriptor_file.path().to_path_buf();
539
540 let mut cmd = Command::new(&protoc);
541 cmd.arg("--include_imports");
542 cmd.arg("--include_source_info");
543 cmd.arg(format!(
544 "--descriptor_set_out={}",
545 descriptor_path.display()
546 ));
547
548 for include in includes {
549 cmd.arg(format!("--proto_path={}", include.display()));
550 }
551
552 for file in files {
553 cmd.arg(file.as_os_str());
554 }
555
556 let output = cmd
557 .output()
558 .map_err(|e| format!("failed to run protoc ({}): {}", protoc, e))?;
559
560 if !output.status.success() {
561 let stderr = String::from_utf8_lossy(&output.stderr);
562 return Err(format!("protoc failed: {}", stderr).into());
563 }
564
565 let bytes = std::fs::read(&descriptor_path)
566 .map_err(|e| format!("failed to read descriptor set: {}", e))?;
567
568 Ok(bytes)
569}
570
571/// Invoke `buf build` to produce a `FileDescriptorSet` (serialized bytes).
572///
573/// Requires a `buf.yaml` discoverable from the build script's cwd. Builds
574/// the entire workspace — no `--path` filtering, because buf's `--path` flag
575/// expects workspace-relative paths while `FileDescriptorProto.name` is
576/// module-root-relative; passing user paths to both would be a contradiction.
577/// Codegen filtering happens on our side via `files_to_generate` matching.
578fn invoke_buf() -> Result<Vec<u8>, Box<dyn std::error::Error>> {
579 // buf build includes SourceCodeInfo by default (there's an
580 // --exclude-source-info flag to disable it), so proto comments
581 // propagate to generated code without an explicit opt-in here.
582 let output = Command::new("buf")
583 .arg("build")
584 .arg("--as-file-descriptor-set")
585 .arg("-o")
586 .arg("-")
587 .output()
588 .map_err(|e| format!("failed to run buf (is it installed and on PATH?): {e}"))?;
589
590 if !output.status.success() {
591 let stderr = String::from_utf8_lossy(&output.stderr);
592 return Err(
593 format!("buf build failed (is buf.yaml present at crate root?): {stderr}").into(),
594 );
595 }
596
597 Ok(output.stdout)
598}
599
600/// Emit `cargo:rerun-if-changed` directives for a buf workspace.
601///
602/// Runs `buf ls-files` to discover all proto files with workspace-relative
603/// paths (which cargo can stat). Also watches `buf.yaml` and `buf.lock`
604/// (the latter only if it exists — cargo treats a missing rerun-if-changed
605/// path as always-dirty). Failure is non-fatal: worst case cargo reruns
606/// every build.
607fn emit_buf_rerun_if_changed() {
608 println!("cargo:rerun-if-changed=buf.yaml");
609 if Path::new("buf.lock").exists() {
610 println!("cargo:rerun-if-changed=buf.lock");
611 }
612 match Command::new("buf").arg("ls-files").output() {
613 Ok(out) if out.status.success() => {
614 for line in String::from_utf8_lossy(&out.stdout).lines() {
615 let path = line.trim();
616 if !path.is_empty() {
617 println!("cargo:rerun-if-changed={path}");
618 }
619 }
620 }
621 _ => {
622 // ls-files failed; cargo already knows about buf.yaml above.
623 // If buf itself is missing, invoke_buf() will error clearly.
624 }
625 }
626}
627
628/// Convert a filesystem proto path to the name protoc uses in the descriptor.
629///
630/// `FileDescriptorProto.name` is relative to the `--proto_path` include
631/// directory. This strips the longest matching include prefix; if no include
632/// matches, returns the path as-is (not just file_name — that would break
633/// nested proto directories).
634fn proto_relative_name(file: &Path, includes: &[PathBuf]) -> String {
635 // Longest prefix wins: a file under both "proto/" and "proto/vendor/"
636 // should strip "proto/vendor/" for a correct relative name.
637 let mut best: Option<&Path> = None;
638 for include in includes {
639 if let Ok(rel) = file.strip_prefix(include) {
640 match best {
641 Some(prev) if prev.as_os_str().len() <= rel.as_os_str().len() => {}
642 _ => best = Some(rel),
643 }
644 }
645 }
646 best.unwrap_or(file).to_str().unwrap_or("").to_string()
647}
648
649/// Generate the content of an include file that assembles generated `.rs`
650/// files into a nested module tree matching the protobuf package hierarchy.
651///
652/// Each generated file is named like `my.package.file_name.rs`. The package
653/// segments become `pub mod` wrappers, and the file is `include!`d inside
654/// the innermost module.
655///
656/// For example, files `["foo.bar.rs", "foo.baz.rs"]` produce:
657/// ```text
658/// pub mod foo {
659/// #[allow(unused_imports)]
660/// use super::*;
661/// include!(concat!(env!("OUT_DIR"), "/foo.bar.rs"));
662/// include!(concat!(env!("OUT_DIR"), "/foo.baz.rs"));
663/// }
664/// ```
665///
666/// When `relative` is true (the caller set [`Config::out_dir`] explicitly),
667/// `include!` directives use bare sibling paths (`include!("foo.bar.rs")`)
668/// instead of the `env!("OUT_DIR")` prefix, so the include file works when
669/// checked into the source tree and referenced via `mod`.
670fn generate_include_file(entries: &[(String, String)], relative: bool) -> String {
671 use std::collections::BTreeMap;
672 use std::fmt::Write;
673
674 fn escape_mod_name(name: &str) -> String {
675 const KEYWORDS: &[&str] = &[
676 "as", "break", "const", "continue", "crate", "else", "enum", "extern", "false", "fn",
677 "for", "if", "impl", "in", "let", "loop", "match", "mod", "move", "mut", "pub", "ref",
678 "return", "self", "Self", "static", "struct", "super", "trait", "true", "type",
679 "unsafe", "use", "where", "while", "async", "await", "dyn", "gen", "abstract",
680 "become", "box", "do", "final", "macro", "override", "priv", "try", "typeof",
681 "unsized", "virtual", "yield",
682 ];
683 if KEYWORDS.contains(&name) {
684 if matches!(name, "self" | "super" | "Self" | "crate") {
685 format!("{name}_")
686 } else {
687 format!("r#{name}")
688 }
689 } else {
690 name.to_string()
691 }
692 }
693
694 #[derive(Default)]
695 struct ModNode {
696 files: Vec<String>,
697 children: BTreeMap<String, Self>,
698 }
699
700 let mut root = ModNode::default();
701 for (file_name, package) in entries {
702 let pkg_parts: Vec<&str> = if package.is_empty() {
703 vec![]
704 } else {
705 package.split('.').collect()
706 };
707 let mut node = &mut root;
708 for seg in &pkg_parts {
709 node = node.children.entry(seg.to_string()).or_default();
710 }
711 node.files.push(file_name.clone());
712 }
713
714 let mut out = String::new();
715 writeln!(out, "// @generated by buffa-build. DO NOT EDIT.").unwrap();
716 writeln!(out).unwrap();
717
718 fn emit(out: &mut String, node: &ModNode, depth: usize, relative: bool) {
719 let indent = " ".repeat(depth);
720 for file in &node.files {
721 if relative {
722 writeln!(out, r#"{indent}include!("{file}");"#).unwrap();
723 } else {
724 writeln!(
725 out,
726 r#"{indent}include!(concat!(env!("OUT_DIR"), "/{file}"));"#
727 )
728 .unwrap();
729 }
730 }
731 for (name, child) in &node.children {
732 let escaped = escape_mod_name(name);
733 writeln!(
734 out,
735 "{indent}#[allow(non_camel_case_types, dead_code, unused_imports, \
736 clippy::derivable_impls, clippy::match_single_binding)]"
737 )
738 .unwrap();
739 writeln!(out, "{indent}pub mod {escaped} {{").unwrap();
740 writeln!(out, "{indent} use super::*;").unwrap();
741 emit(out, child, depth + 1, relative);
742 writeln!(out, "{indent}}}").unwrap();
743 }
744 }
745
746 emit(&mut out, &root, 0, relative);
747 out
748}
749
750#[cfg(test)]
751mod tests {
752 use super::*;
753
754 #[test]
755 fn proto_relative_name_strips_include() {
756 let got = proto_relative_name(
757 Path::new("proto/my/service.proto"),
758 &[PathBuf::from("proto/")],
759 );
760 assert_eq!(got, "my/service.proto");
761 }
762
763 #[test]
764 fn proto_relative_name_longest_prefix_wins() {
765 // Overlapping includes: file under both proto/ and proto/vendor/.
766 // Must strip the LONGER prefix for the correct relative name.
767 let got = proto_relative_name(
768 Path::new("proto/vendor/ext.proto"),
769 &[PathBuf::from("proto/"), PathBuf::from("proto/vendor/")],
770 );
771 assert_eq!(got, "ext.proto");
772 // Same with reversed include order.
773 let got = proto_relative_name(
774 Path::new("proto/vendor/ext.proto"),
775 &[PathBuf::from("proto/vendor/"), PathBuf::from("proto/")],
776 );
777 assert_eq!(got, "ext.proto");
778 }
779
780 #[test]
781 fn proto_relative_name_no_match_returns_full_path() {
782 // Regression: previously fell back to file_name(), which stripped
783 // directory components and broke descriptor_set() mode with nested
784 // proto packages. Now returns the full path as-is.
785 let got = proto_relative_name(Path::new("my/pkg/service.proto"), &[]);
786 assert_eq!(got, "my/pkg/service.proto");
787 }
788
789 #[test]
790 fn proto_relative_name_no_match_with_unrelated_includes() {
791 let got = proto_relative_name(
792 Path::new("src/my.proto"),
793 &[PathBuf::from("other/"), PathBuf::from("third/")],
794 );
795 assert_eq!(got, "src/my.proto");
796 }
797
798 #[test]
799 fn include_file_out_dir_mode_uses_env_var() {
800 let entries = vec![
801 ("foo.bar.rs".to_string(), "foo".to_string()),
802 ("root.rs".to_string(), String::new()),
803 ];
804 let out = generate_include_file(&entries, false);
805 assert!(
806 out.contains(r#"include!(concat!(env!("OUT_DIR"), "/foo.bar.rs"));"#),
807 "nested-package file should use env!(OUT_DIR): {out}"
808 );
809 assert!(
810 out.contains(r#"include!(concat!(env!("OUT_DIR"), "/root.rs"));"#),
811 "empty-package file should use env!(OUT_DIR): {out}"
812 );
813 assert!(!out.contains(r#"include!("foo.bar.rs")"#));
814 }
815
816 #[test]
817 fn include_file_relative_mode_uses_sibling_paths() {
818 let entries = vec![
819 ("foo.bar.rs".to_string(), "foo".to_string()),
820 ("root.rs".to_string(), String::new()),
821 ];
822 let out = generate_include_file(&entries, true);
823 assert!(
824 out.contains(r#"include!("foo.bar.rs");"#),
825 "nested-package file should use relative path: {out}"
826 );
827 assert!(
828 out.contains(r#"include!("root.rs");"#),
829 "empty-package file should use relative path: {out}"
830 );
831 assert!(
832 !out.contains("OUT_DIR"),
833 "relative mode must not reference OUT_DIR: {out}"
834 );
835 }
836
837 #[test]
838 fn include_file_relative_mode_nested_packages() {
839 // Two files in the same depth-2 package: verifies the relative flag
840 // propagates through recursive emit() calls and both files land in
841 // the same innermost mod.
842 let entries = vec![
843 ("a.b.one.rs".to_string(), "a.b".to_string()),
844 ("a.b.two.rs".to_string(), "a.b".to_string()),
845 ];
846 let out = generate_include_file(&entries, true);
847 // Both includes should appear once, at the same depth-2 indent,
848 // inside a single `pub mod b { ... }`.
849 let indent = " "; // depth 2 = 8 spaces
850 assert!(
851 out.contains(&format!(r#"{indent}include!("a.b.one.rs");"#)),
852 "first file at depth 2: {out}"
853 );
854 assert!(
855 out.contains(&format!(r#"{indent}include!("a.b.two.rs");"#)),
856 "second file at depth 2: {out}"
857 );
858 assert_eq!(
859 out.matches("pub mod b {").count(),
860 1,
861 "both files share one `mod b`: {out}"
862 );
863 assert!(!out.contains("OUT_DIR"));
864 }
865
866 #[test]
867 fn write_if_changed_creates_new_file() {
868 let dir = tempfile::tempdir().unwrap();
869 let path = dir.path().join("new.rs");
870 write_if_changed(&path, b"hello").unwrap();
871 assert_eq!(std::fs::read(&path).unwrap(), b"hello");
872 }
873
874 #[test]
875 fn write_if_changed_skips_identical_content() {
876 let dir = tempfile::tempdir().unwrap();
877 let path = dir.path().join("same.rs");
878 std::fs::write(&path, b"content").unwrap();
879 let mtime_before = std::fs::metadata(&path).unwrap().modified().unwrap();
880
881 // Sleep briefly so any write would produce a different mtime.
882 std::thread::sleep(std::time::Duration::from_millis(50));
883
884 write_if_changed(&path, b"content").unwrap();
885 let mtime_after = std::fs::metadata(&path).unwrap().modified().unwrap();
886 assert_eq!(mtime_before, mtime_after);
887 }
888
889 #[test]
890 fn write_if_changed_overwrites_different_content() {
891 let dir = tempfile::tempdir().unwrap();
892 let path = dir.path().join("changed.rs");
893 std::fs::write(&path, b"old").unwrap();
894
895 write_if_changed(&path, b"new").unwrap();
896 assert_eq!(std::fs::read(&path).unwrap(), b"new");
897 }
898}