Skip to main content

obs_build/
lib.rs

1//! Build-time codegen helpers for the proto-first authoring path.
2//!
3//! Phase-1 surface (impl-plan task 1.10):
4//!
5//! - [`Config::compile`] runs `protoc` (or accepts a precompiled FDS), hands the FDS to
6//!   `buffa-build` for Rust-type generation, then walks the FDS via `buffa-reflect` to extract the
7//!   `(obs.v1.event)` and `(obs.v1.field)` custom options out of each message's
8//!   `MessageOptions.__buffa_unknown_fields`. From those bytes the codegen emits `EventSchema`
9//!   impls, `EventSchemaErased` impls, and `linkme` registrations into `OUT_DIR/obs/schemas.rs`.
10//! - The byte-identical-output property described in spec 12 § 1.2 (proto-first vs Rust-first land
11//!   on the same generated bytes when the schemas are equivalent) is verified via Phase-2 task 2.1
12//!   integration tests; Phase 1 ships the codegen, the test harness builds on top.
13//!
14//! See spec 12 § 4 + spec 14 § 7.
15
16#![forbid(unsafe_code)]
17#![warn(rust_2024_compatibility, missing_debug_implementations)]
18// `obs-build` runs in the user's `build.rs` (sync, before the async
19// runtime exists), so the workspace's tokio-fs / no-expect lints don't
20// apply here. Tests use `.unwrap()` freely.
21#![allow(
22    missing_docs,
23    clippy::disallowed_types,
24    clippy::disallowed_methods,
25    clippy::expect_used,
26    clippy::indexing_slicing
27)]
28#![cfg_attr(test, allow(clippy::unwrap_used))]
29
30mod codegen;
31mod config;
32pub mod lints;
33mod options;
34
35pub use config::{
36    Config, DescriptorSource, EMBEDDED_ENUMS_PROTO, EMBEDDED_OPTIONS_PROTO,
37    materialise_embedded_options,
38};
39pub use lints::{
40    LintError, LintField, LintInput, LintProtoType, emit_cross_event_lints, emit_lints,
41};
42pub use options::{CodegenError, EventOptions, FieldOptions, MetricSpec};
43
44/// Reflection-based reader of `(obs.v1.event)` and `(obs.v1.field)`
45/// options from a `buffa-reflect` descriptor pool. Re-exported so
46/// `obs-cli`'s `lint` / `validate` / `schema show` subcommands can
47/// share the parser with `Config::compile`.
48pub mod reflect {
49    use buffa_reflect::DescriptorPool;
50    use obs_proto::obs::v1::{Cardinality, Classification, FieldKind, Severity, Tier};
51
52    use crate::options::{
53        CodegenError, EventOptions, FieldOptions, read_event_options, read_field_options,
54    };
55
56    /// One annotated event scanned from a descriptor pool.
57    #[derive(Debug)]
58    #[non_exhaustive]
59    pub struct AnnotatedEvent {
60        /// Fully qualified message name (`myapp.v1.ObsXxx`).
61        pub full_name: String,
62        /// Decoded `(obs.v1.event)` options.
63        pub event: EventOptions,
64        /// Per-field `(obs.v1.field)` options paired with name + tag.
65        pub fields: Vec<AnnotatedField>,
66    }
67
68    /// One field of an [`AnnotatedEvent`].
69    #[derive(Debug)]
70    #[non_exhaustive]
71    pub struct AnnotatedField {
72        /// Field name in the proto.
73        pub name: String,
74        /// Proto tag number.
75        pub number: u32,
76        /// Decoded `(obs.v1.field)` options.
77        pub options: FieldOptions,
78    }
79
80    impl AnnotatedField {
81        /// Effective field kind, defaulting to ATTRIBUTE.
82        #[must_use]
83        pub fn kind(&self) -> FieldKind {
84            self.options.kind.unwrap_or(FieldKind::Attribute)
85        }
86        /// Effective cardinality, defaulting to UNSPECIFIED.
87        #[must_use]
88        pub fn cardinality(&self) -> Cardinality {
89            self.options.cardinality.unwrap_or(Cardinality::Unspecified)
90        }
91        /// Effective classification, defaulting to INTERNAL.
92        #[must_use]
93        pub fn classification(&self) -> Classification {
94            self.options
95                .classification
96                .unwrap_or(Classification::Internal)
97        }
98    }
99
100    impl AnnotatedEvent {
101        /// Effective tier, defaulting to LOG.
102        #[must_use]
103        pub fn tier(&self) -> Tier {
104            self.event.tier.unwrap_or(Tier::Log)
105        }
106        /// Effective default severity, defaulting to INFO.
107        #[must_use]
108        pub fn default_sev(&self) -> Severity {
109            self.event.default_sev.unwrap_or(Severity::Info)
110        }
111        /// First 8 bytes of BLAKE3 over the canonical descriptor string.
112        /// Mirrors `obs-build::codegen::EventDecl::schema_hash` and the
113        /// proc-macro's `compute_schema_hash` so codegen, runtime, and
114        /// migrate paths agree byte-for-byte. Spec 12 § 3.5 / spec 93
115        /// P1-9 (`obs migrate clickhouse` populates schema_hash).
116        #[must_use]
117        pub fn schema_hash(&self) -> u64 {
118            let mut s = String::new();
119            s.push_str(&self.full_name);
120            s.push('|');
121            s.push_str(self.tier().as_str());
122            s.push('|');
123            s.push_str(self.default_sev().as_str());
124            s.push('|');
125            for f in &self.fields {
126                s.push_str(&f.name);
127                s.push(':');
128                s.push_str(f.kind().as_str());
129                s.push(':');
130                s.push_str(f.cardinality().as_str());
131                s.push(':');
132                s.push_str(f.classification().as_str());
133                s.push(',');
134            }
135            let h = blake3::hash(s.as_bytes());
136            let bytes = h.as_bytes();
137            let arr = <[u8; 8]>::try_from(&bytes[..8]).expect("blake3 always produces 32 bytes");
138            u64::from_le_bytes(arr)
139        }
140    }
141
142    /// Walk a descriptor pool and collect every annotated `(obs.v1.event)`.
143    /// Skips messages without the option. Returns events in stable
144    /// `full_name` order (deterministic for diff/lint output).
145    ///
146    /// # Errors
147    ///
148    /// Returns [`CodegenError::OptionDecode`] when a sub-message has
149    /// malformed bytes.
150    pub fn scan_pool(pool: &DescriptorPool) -> Result<Vec<AnnotatedEvent>, CodegenError> {
151        let mut events: Vec<AnnotatedEvent> = Vec::new();
152        for msg in pool.all_messages() {
153            let dp = msg.descriptor_proto();
154            if !dp.options.is_set() {
155                continue;
156            }
157            let mut bytes = Vec::new();
158            dp.options.__buffa_unknown_fields.write_to(&mut bytes);
159            let Some(event_opts) = read_event_options(&bytes, msg.full_name())? else {
160                continue;
161            };
162            let mut decl = AnnotatedEvent {
163                full_name: msg.full_name().to_string(),
164                event: event_opts,
165                fields: Vec::new(),
166            };
167            for f in msg.fields() {
168                let fdp = f.descriptor_proto();
169                let mut fbytes = Vec::new();
170                if fdp.options.is_set() {
171                    fdp.options.__buffa_unknown_fields.write_to(&mut fbytes);
172                }
173                let opts =
174                    read_field_options(&fbytes, &format!("{}/{}", msg.full_name(), f.name()))?
175                        .unwrap_or_default();
176                decl.fields.push(AnnotatedField {
177                    name: f.name().to_string(),
178                    number: f.number(),
179                    options: opts,
180                });
181            }
182            events.push(decl);
183        }
184        events.sort_by(|a, b| a.full_name.cmp(&b.full_name));
185        Ok(events)
186    }
187}