Skip to main content

obs_build/
config.rs

1//! `obs_build::Config` — the build-script entry point.
2//!
3//! Walks the FDS via `buffa-reflect`, reads `(obs.v1.event)` /
4//! `(obs.v1.field)` custom options out of the
5//! `__buffa_unknown_fields` byte stream (per
6//! `docs/research/spike-buffa-reflect.md`), and emits four files into
7//! `$OUT_DIR/obs/`:
8//!
9//! - `schemas.rs`  — `EventSchemaErased` impls + `linkme` registrations
10//! - `builders.rs` — fluent setter + `.emit()` per event
11//! - `lints.rs`    — const-eval lint asserts (L001/L002/L003/L011)
12//! - `arrow_schema.rs` — Arrow fragment dispatch table (Phase-2 stub)
13//!
14//! The user wires every file in via:
15//!
16//! ```ignore
17//! obs::include_schemas!("myapp.v1");   // one macro, four `include!`s
18//! ```
19//!
20//! Spec 12 § 3.1 + § 4.
21
22use std::{
23    path::{Path, PathBuf},
24    process::Command,
25};
26
27use buffa::Message;
28use buffa_descriptor::generated::descriptor::FileDescriptorSet;
29use buffa_reflect::{DescriptorPool, Kind};
30
31use crate::{
32    codegen::{
33        EventDecl, FieldDecl, render_arrow_schema, render_builders, render_lints, render_schemas,
34    },
35    lints::LintProtoType,
36    options::{CodegenError, read_event_options, read_field_options},
37};
38
39/// Source of the `FileDescriptorSet`. Spec 12 § 4.
40#[derive(Debug, Clone, Default)]
41#[non_exhaustive]
42pub enum DescriptorSource {
43    /// Invoke `protoc` (default). Requires it on `PATH` or via
44    /// `PROTOC` env.
45    #[default]
46    Protoc,
47    /// Use a pre-built FDS file (skips protoc invocation).
48    Precompiled(PathBuf),
49}
50
51/// Build-script entry point.
52#[derive(Debug, Default)]
53pub struct Config {
54    files: Vec<PathBuf>,
55    includes: Vec<PathBuf>,
56    out_dir: Option<PathBuf>,
57    descriptor_source: DescriptorSource,
58    event_prefix: Option<String>,
59    /// Codegen feature toggles per spec 12 § 4. The defaults are
60    /// conservative: lints + schemas + builders + arrow on; render and
61    /// scrub off (deferred to later phases).
62    arrow_schema: bool,
63    json_render: bool,
64    payload_scrub: bool,
65    otel_attribute_view: bool,
66}
67
68impl Config {
69    /// New config with sane defaults (lints + schemas + builders + arrow on).
70    #[must_use]
71    pub fn new() -> Self {
72        Self {
73            arrow_schema: true,
74            json_render: false,
75            payload_scrub: true,
76            otel_attribute_view: true,
77            ..Self::default()
78        }
79    }
80
81    /// Add proto file paths.
82    #[must_use]
83    pub fn files(mut self, files: &[impl AsRef<Path>]) -> Self {
84        self.files
85            .extend(files.iter().map(|p| p.as_ref().to_owned()));
86        self
87    }
88
89    /// Add include directories searched by `protoc`.
90    #[must_use]
91    pub fn include(mut self, dir: impl AsRef<Path>) -> Self {
92        self.includes.push(dir.as_ref().to_owned());
93        self
94    }
95
96    /// Override the output directory. Defaults to `$OUT_DIR`.
97    #[must_use]
98    pub fn out_dir(mut self, dir: impl AsRef<Path>) -> Self {
99        self.out_dir = Some(dir.as_ref().to_owned());
100        self
101    }
102
103    /// Pull `obs/v1/options.proto` from the embedded `obs-build`
104    /// package so the user does not need to vendor it. The proto file
105    /// is bundled at compile time and extracted to `$OUT_DIR/obs/include/`.
106    #[must_use]
107    pub fn include_obs_options(mut self) -> Self {
108        self.includes
109            .push(PathBuf::from("__obs_build_embedded_options__"));
110        self
111    }
112
113    /// Use a pre-compiled FDS file (skips protoc).
114    #[must_use]
115    pub fn descriptor_source(mut self, src: DescriptorSource) -> Self {
116        self.descriptor_source = src;
117        self
118    }
119
120    /// Override the workspace event prefix used by lint L011. Defaults
121    /// to reading `OBS_EVENT_PREFIX` env var, then falling back to
122    /// `"Obs"`. Spec 12 § 3.4.
123    #[must_use]
124    pub fn event_prefix(mut self, prefix: impl Into<String>) -> Self {
125        self.event_prefix = Some(prefix.into());
126        self
127    }
128
129    /// Toggle Arrow schema fragment emission. On by default. Spec 12 § 4.
130    #[must_use]
131    pub fn with_arrow_schema(mut self, on: bool) -> Self {
132        self.arrow_schema = on;
133        self
134    }
135
136    /// Toggle JSON-render dispatcher. Off by default; lands in Phase 4. Spec 12 § 4.
137    #[must_use]
138    pub fn with_json_render(mut self, on: bool) -> Self {
139        self.json_render = on;
140        self
141    }
142
143    /// Toggle payload scrub dispatcher. On by default. Spec 12 § 4.
144    #[must_use]
145    pub fn with_payload_scrub(mut self, on: bool) -> Self {
146        self.payload_scrub = on;
147        self
148    }
149
150    /// Toggle OTel attribute view emission. On by default. Spec 12 § 4.
151    #[must_use]
152    pub fn with_otel_attribute_view(mut self, on: bool) -> Self {
153        self.otel_attribute_view = on;
154        self
155    }
156
157    /// Run codegen.
158    ///
159    /// Two-stage pipeline per spec 12 § 3:
160    ///
161    /// 1. **Stage 1 — `buffa-build`**: compile the `.proto` files into Rust wire types under
162    ///    `$OUT_DIR/obs_buffa.rs` (one entry-point file via `include_file`). Skipped when the user
163    ///    pre-built the FDS via `descriptor_source(Precompiled(_))` and there are no `.proto` files
164    ///    set on `Config` (the test path).
165    /// 2. **Stage 2 — obs codegen**: read `(obs.v1.event)` / `(obs.v1.field)` annotations from the
166    ///    descriptor pool and emit `$OUT_DIR/obs/{schemas,builders,lints,arrow_schema}.rs`.
167    ///
168    /// # Errors
169    ///
170    /// Returns `CodegenError` for any step that fails: `protoc`
171    /// invocation, `buffa-build` invocation, FDS decode, option scan,
172    /// generated-file IO.
173    pub fn compile(self) -> Result<(), CodegenError> {
174        let out_dir = self
175            .out_dir
176            .clone()
177            .or_else(|| std::env::var("OUT_DIR").ok().map(PathBuf::from))
178            .ok_or_else(|| CodegenError::Protoc("OUT_DIR not set".into()))?;
179        std::fs::create_dir_all(out_dir.join("obs")).map_err(CodegenError::OutputIo)?;
180
181        // Materialise embedded include dir (obs/v1/options.proto) once
182        // so both the buffa-build and protoc invocations share it.
183        let mut effective_includes = self.includes.clone();
184        if effective_includes
185            .iter()
186            .any(|p| p.as_os_str() == "__obs_build_embedded_options__")
187        {
188            let embed_dir = out_dir.join("obs").join("include");
189            materialise_embedded_options(&embed_dir).map_err(CodegenError::OutputIo)?;
190            effective_includes.retain(|p| p.as_os_str() != "__obs_build_embedded_options__");
191            effective_includes.push(embed_dir);
192        }
193
194        // Stage 1: buffa-build for wire types. Skip when the user has
195        // no .proto files (the test path uses a precompiled FDS only).
196        if !self.files.is_empty() {
197            self.invoke_buffa_build(&out_dir, &effective_includes)?;
198        }
199
200        // Stage 2: obs codegen.
201        let fds_bytes = self.produce_fds(&out_dir, &effective_includes)?;
202        let fds = FileDescriptorSet::decode_from_slice(&fds_bytes)
203            .map_err(|e| CodegenError::DescriptorDecode(e.to_string()))?;
204        let pool = DescriptorPool::from_file_descriptor_set(fds)
205            .map_err(|e| CodegenError::DescriptorDecode(e.to_string()))?;
206
207        let events = collect_event_decls(&pool)?;
208
209        let event_prefix = self
210            .event_prefix
211            .clone()
212            .or_else(|| std::env::var("OBS_EVENT_PREFIX").ok())
213            .unwrap_or_else(|| "Obs".to_string());
214
215        // Always emit schemas + builders + lints. Arrow stub gated by toggle.
216        std::fs::write(
217            out_dir.join("obs").join("schemas.rs"),
218            render_schemas(&events),
219        )
220        .map_err(CodegenError::OutputIo)?;
221        std::fs::write(
222            out_dir.join("obs").join("builders.rs"),
223            render_builders(&events),
224        )
225        .map_err(CodegenError::OutputIo)?;
226        std::fs::write(
227            out_dir.join("obs").join("lints.rs"),
228            render_lints(&events, &event_prefix),
229        )
230        .map_err(CodegenError::OutputIo)?;
231        if self.arrow_schema {
232            std::fs::write(
233                out_dir.join("obs").join("arrow_schema.rs"),
234                render_arrow_schema(&events),
235            )
236            .map_err(CodegenError::OutputIo)?;
237        } else {
238            // Always create the file with an empty stub so
239            // `include_schemas!` references resolve regardless of
240            // toggle. Spec 12 § 3.1.
241            std::fs::write(
242                out_dir.join("obs").join("arrow_schema.rs"),
243                "// arrow_schema disabled by `with_arrow_schema(false)`\n",
244            )
245            .map_err(CodegenError::OutputIo)?;
246        }
247        Ok(())
248    }
249
250    fn invoke_buffa_build(
251        &self,
252        _out_dir: &Path,
253        effective_includes: &[PathBuf],
254    ) -> Result<(), CodegenError> {
255        // buffa-build's `include_file` mode is path-aware:
256        //
257        // - When `out_dir` is **unset** (default), buffa reads `OUT_DIR` from the env and emits the
258        //   entry file with `include!(concat!(env!("OUT_DIR"), "/foo.rs"))` paths. The user can
259        //   therefore `include!` the entry from any source file in their crate.
260        // - When `out_dir` is **explicitly set**, buffa emits sibling-relative `include!("foo.rs")`
261        //   paths, which only resolve correctly when the user `mod foo;`'s the parent.
262        //
263        // We want the first form (env!("OUT_DIR")-rooted) so
264        // `obs::include_schemas!` can drop the entry into any
265        // `src/*.rs`. Therefore: only pass `.out_dir(...)` to
266        // buffa-build when the user explicitly overrode `obs-build`'s
267        // `Config::out_dir`. The OS env `OUT_DIR` cargo supplies
268        // covers the production path; tests that override `out_dir`
269        // are responsible for `include!`'ing via `mod`s instead.
270        let mut cfg = buffa_build::Config::new()
271            .files(&self.files)
272            .includes(effective_includes)
273            .include_file("obs_buffa.rs")
274            .generate_views(true);
275        if let Some(explicit_out) = &self.out_dir {
276            cfg = cfg.out_dir(explicit_out);
277        }
278        if let DescriptorSource::Precompiled(path) = &self.descriptor_source {
279            cfg = cfg.descriptor_set(path);
280        }
281        cfg.compile()
282            .map_err(|e| CodegenError::Buffa(e.to_string()))?;
283        Ok(())
284    }
285
286    fn produce_fds(
287        &self,
288        out_dir: &Path,
289        effective_includes: &[PathBuf],
290    ) -> Result<Vec<u8>, CodegenError> {
291        match &self.descriptor_source {
292            DescriptorSource::Protoc => self.invoke_protoc(out_dir, effective_includes),
293            DescriptorSource::Precompiled(path) => {
294                std::fs::read(path).map_err(CodegenError::DescriptorIo)
295            }
296        }
297    }
298
299    fn invoke_protoc(
300        &self,
301        out_dir: &Path,
302        effective_includes: &[PathBuf],
303    ) -> Result<Vec<u8>, CodegenError> {
304        let protoc = std::env::var("PROTOC").unwrap_or_else(|_| "protoc".to_string());
305        let fds_path = out_dir.join("obs").join("fds.bin");
306        let mut cmd = Command::new(&protoc);
307        cmd.arg("--include_imports");
308        cmd.arg(format!("--descriptor_set_out={}", fds_path.display()));
309        for inc in effective_includes {
310            cmd.arg(format!("--proto_path={}", inc.display()));
311        }
312        for f in &self.files {
313            cmd.arg(f);
314        }
315        let status = cmd
316            .status()
317            .map_err(|e| CodegenError::Protoc(format!("failed to spawn protoc: {e}")))?;
318        if !status.success() {
319            return Err(CodegenError::Protoc(format!("protoc exit status {status}")));
320        }
321        std::fs::read(&fds_path).map_err(CodegenError::DescriptorIo)
322    }
323}
324
325fn collect_event_decls(pool: &DescriptorPool) -> Result<Vec<EventDecl>, CodegenError> {
326    let mut events: Vec<EventDecl> = Vec::new();
327    for msg in pool.all_messages() {
328        let dp = msg.descriptor_proto();
329        if !dp.options.is_set() {
330            continue;
331        }
332        let mut bytes = Vec::new();
333        dp.options.__buffa_unknown_fields.write_to(&mut bytes);
334        let Some(event_opts) = read_event_options(&bytes, msg.full_name())? else {
335            continue;
336        };
337        let mut decl = EventDecl {
338            full_name: msg.full_name().to_string(),
339            event: event_opts,
340            fields: Vec::new(),
341        };
342        for f in msg.fields() {
343            let fdp = f.descriptor_proto();
344            let mut fbytes = Vec::new();
345            if fdp.options.is_set() {
346                fdp.options.__buffa_unknown_fields.write_to(&mut fbytes);
347            }
348            let opts = read_field_options(&fbytes, &format!("{}/{}", msg.full_name(), f.name()))?
349                .unwrap_or_default();
350            let proto_type = Some(map_kind_to_lint_type(&f.kind()));
351            let wire_rust_type = map_kind_to_rust_type(&f.kind());
352            let enum_rust_path = match f.kind() {
353                Kind::Enum(enum_desc) => Some(enum_to_rust_path(&enum_desc)),
354                _ => None,
355            };
356            decl.fields.push(FieldDecl {
357                name: f.name().to_string(),
358                number: f.number(),
359                options: opts,
360                proto_type,
361                wire_rust_type,
362                enum_rust_path,
363            });
364        }
365        events.push(decl);
366    }
367    // Stable order so generated bytes are deterministic across runs.
368    events.sort_by(|a, b| a.full_name.cmp(&b.full_name));
369    Ok(events)
370}
371
372/// Translate a `buffa_reflect::EnumDescriptor` into the Rust path
373/// buffa's message codegen emits for it. Top-level enums land at
374/// `<pkg>::EnumName`; nested enums get the parent message's
375/// snake_case name as the intermediate module, matching buffa's
376/// `pub mod <snake_parent> { pub enum EnumName { … } }` convention.
377fn enum_to_rust_path(enum_desc: &buffa_reflect::EnumDescriptor) -> String {
378    // Walk parent messages outermost-first, converting each parent's
379    // leaf `name()` to snake_case.
380    let mut parents: Vec<String> = Vec::new();
381    let mut cursor = enum_desc.parent_message();
382    while let Some(msg) = cursor {
383        parents.push(heck::AsSnakeCase(msg.name()).to_string());
384        cursor = msg.parent_message();
385    }
386    parents.reverse();
387
388    let file = enum_desc.parent_file();
389    let package = file.package().trim_start_matches('.');
390    let mut path = String::new();
391    if !package.is_empty() {
392        for seg in package.split('.') {
393            if !path.is_empty() {
394                path.push_str("::");
395            }
396            path.push_str(seg);
397        }
398    }
399    for p in &parents {
400        if !path.is_empty() {
401            path.push_str("::");
402        }
403        path.push_str(p);
404    }
405    if !path.is_empty() {
406        path.push_str("::");
407    }
408    path.push_str(enum_desc.name());
409    path
410}
411
412fn map_kind_to_rust_type(k: &Kind) -> Option<&'static str> {
413    match k {
414        Kind::Bool => Some("bool"),
415        Kind::Int32 | Kind::Sint32 | Kind::Sfixed32 => Some("i32"),
416        Kind::Int64 | Kind::Sint64 | Kind::Sfixed64 => Some("i64"),
417        Kind::Uint32 | Kind::Fixed32 => Some("u32"),
418        Kind::Uint64 | Kind::Fixed64 => Some("u64"),
419        Kind::Float => Some("f32"),
420        Kind::Double => Some("f64"),
421        _ => None,
422    }
423}
424
425fn map_kind_to_lint_type(k: &Kind) -> LintProtoType {
426    match k {
427        Kind::String => LintProtoType::String,
428        Kind::Bytes => LintProtoType::Bytes,
429        Kind::Bool => LintProtoType::Bool,
430        Kind::Double | Kind::Float => LintProtoType::Float,
431        Kind::Int32
432        | Kind::Int64
433        | Kind::Sint32
434        | Kind::Sint64
435        | Kind::Sfixed32
436        | Kind::Sfixed64 => LintProtoType::SignedInteger,
437        Kind::Uint32 | Kind::Uint64 | Kind::Fixed32 | Kind::Fixed64 => {
438            LintProtoType::UnsignedInteger
439        }
440        Kind::Enum(_) => LintProtoType::Other("enum".to_string()),
441        Kind::Message(_) => LintProtoType::Other("message".to_string()),
442        _ => LintProtoType::Other("unknown".to_string()),
443    }
444}
445
446// Vendored copies of the canonical protos in `obs-proto/proto/obs/v1/`.
447// `include_str!` paths must resolve inside the crate's packaged
448// tarball — `cargo publish` verifies each crate in isolation, so a
449// sibling-relative path like `../../obs-proto/proto/...` would break
450// on `cargo publish --dry-run` and on any consumer that pulls
451// `obs-build` from crates.io. The `build.rs` keeps these two copies
452// in sync with `obs-proto/proto/obs/v1/{options,enums}.proto` and
453// fails the build if they diverge.
454/// Embedded canonical text of `obs/v1/options.proto`, re-exported so
455/// downstream consumers (e.g. `obs-cli`'s `schema_source`) can reuse
456/// it without re-vendoring.
457pub const EMBEDDED_OPTIONS_PROTO: &str = include_str!("../proto/obs/v1/options.proto");
458/// Embedded canonical text of `obs/v1/enums.proto`, re-exported for
459/// the same reason as [`EMBEDDED_OPTIONS_PROTO`].
460pub const EMBEDDED_ENUMS_PROTO: &str = include_str!("../proto/obs/v1/enums.proto");
461
462/// Write the embedded `obs/v1/{options,enums}.proto` pair into
463/// `{dir}/obs/v1/` so a downstream protoc invocation can include
464/// them. Used by both `Config::compile` above and `obs-cli`'s
465/// schema-source helper.
466///
467/// # Errors
468///
469/// Returns any IO error encountered while creating the directory
470/// tree or writing the proto files.
471pub fn materialise_embedded_options(dir: &Path) -> std::io::Result<()> {
472    let target = dir.join("obs").join("v1");
473    std::fs::create_dir_all(&target)?;
474    std::fs::write(target.join("options.proto"), EMBEDDED_OPTIONS_PROTO)?;
475    std::fs::write(target.join("enums.proto"), EMBEDDED_ENUMS_PROTO)?;
476    Ok(())
477}