Skip to main content

obs_build/
lib.rs

1//! Build-time codegen helpers for the proto-first authoring path.
2//!
3//! Phase-1 surface (impl-plan task 1.10):
4//!
5//! - [`Config::compile`] runs `protoc` (or accepts a precompiled FDS), hands the FDS to
6//!   `buffa-build` for Rust-type generation, then walks the FDS via `buffa-reflect` to extract the
7//!   `(obs.v1.event)` and `(obs.v1.field)` custom options out of each message's
8//!   `MessageOptions.__buffa_unknown_fields`. From those bytes the codegen emits `EventSchema`
9//!   impls, `EventSchemaErased` impls, and `linkme` registrations into `OUT_DIR/obs/schemas.rs`.
10//! - The byte-identical-output property described in spec 12 § 1.2 (proto-first vs Rust-first land
11//!   on the same generated bytes when the schemas are equivalent) is verified via Phase-2 task 2.1
12//!   integration tests; Phase 1 ships the codegen, the test harness builds on top.
13//!
14//! See spec 12 § 4 + spec 14 § 7.
15
16#![forbid(unsafe_code)]
17#![warn(rust_2024_compatibility, missing_debug_implementations)]
18// `obs-build` runs in the user's `build.rs` (sync, before the async
19// runtime exists), so the workspace's tokio-fs / no-expect lints don't
20// apply here. Tests use `.unwrap()` freely.
21#![allow(
22    missing_docs,
23    clippy::disallowed_types,
24    clippy::disallowed_methods,
25    clippy::expect_used,
26    clippy::indexing_slicing
27)]
28#![cfg_attr(test, allow(clippy::unwrap_used))]
29
30mod codegen;
31mod config;
32pub mod lints;
33mod options;
34
35pub use config::{
36    Config, DescriptorSource, EMBEDDED_ENUMS_PROTO, EMBEDDED_OPTIONS_PROTO,
37    materialise_embedded_options,
38};
39pub use lints::{
40    LintError, LintField, LintInput, LintProtoType, emit_cross_event_lints, emit_lints,
41};
42pub use options::{CodegenError, EventOptions, FieldOptions, MetricSpec};
43
44/// Reflection-based reader of `(obs.v1.event)` and `(obs.v1.field)`
45/// options from a `buffa-reflect` descriptor pool. Re-exported so
46/// `obs-cli`'s `lint` / `validate` / `schema show` subcommands can
47/// share the parser with `Config::compile`.
48pub mod reflect {
49    use buffa_reflect::{DescriptorPool, Kind};
50    use obs_proto::obs::v1::{Cardinality, Classification, FieldKind, Severity, Tier};
51
52    use crate::{
53        lints::LintProtoType,
54        options::{
55            CodegenError, EventOptions, FieldOptions, read_event_options, read_field_options,
56        },
57    };
58
59    /// One annotated event scanned from a descriptor pool.
60    #[derive(Debug)]
61    #[non_exhaustive]
62    pub struct AnnotatedEvent {
63        /// Fully qualified message name (`myapp.v1.ObsXxx`).
64        pub full_name: String,
65        /// Decoded `(obs.v1.event)` options.
66        pub event: EventOptions,
67        /// Per-field `(obs.v1.field)` options paired with name + tag.
68        pub fields: Vec<AnnotatedField>,
69    }
70
71    /// One field of an [`AnnotatedEvent`].
72    #[derive(Debug)]
73    #[non_exhaustive]
74    pub struct AnnotatedField {
75        /// Field name in the proto.
76        pub name: String,
77        /// Proto tag number.
78        pub number: u32,
79        /// Decoded `(obs.v1.field)` options.
80        pub options: FieldOptions,
81        /// Reflected proto scalar type, used by CLI lint/migrate/decode
82        /// paths that do not run codegen.
83        pub proto_type: LintProtoType,
84    }
85
86    impl AnnotatedField {
87        /// Effective field kind, defaulting to ATTRIBUTE.
88        #[must_use]
89        pub fn kind(&self) -> FieldKind {
90            self.options.kind.unwrap_or(FieldKind::Attribute)
91        }
92        /// Effective cardinality, defaulting to UNSPECIFIED.
93        #[must_use]
94        pub fn cardinality(&self) -> Cardinality {
95            self.options.cardinality.unwrap_or(Cardinality::Unspecified)
96        }
97        /// Effective classification, defaulting to INTERNAL.
98        #[must_use]
99        pub fn classification(&self) -> Classification {
100            self.options
101                .classification
102                .unwrap_or(Classification::Internal)
103        }
104    }
105
106    impl AnnotatedEvent {
107        /// Effective tier, defaulting to LOG.
108        #[must_use]
109        pub fn tier(&self) -> Tier {
110            self.event.tier.unwrap_or(Tier::Log)
111        }
112        /// Effective default severity, defaulting to INFO.
113        #[must_use]
114        pub fn default_sev(&self) -> Severity {
115            self.event.default_sev.unwrap_or(Severity::Info)
116        }
117        /// First 8 bytes of BLAKE3 over the canonical descriptor string.
118        /// Mirrors `obs-build::codegen::EventDecl::schema_hash` and the
119        /// proc-macro's `compute_schema_hash` so codegen, runtime, and
120        /// migrate paths agree byte-for-byte. Spec 12 § 3.5 / spec 93
121        /// P1-9 (`obs migrate clickhouse` populates schema_hash).
122        #[must_use]
123        pub fn schema_hash(&self) -> u64 {
124            let mut s = String::new();
125            s.push_str(&self.full_name);
126            s.push('|');
127            s.push_str(self.tier().as_str());
128            s.push('|');
129            s.push_str(self.default_sev().as_str());
130            s.push('|');
131            for f in &self.fields {
132                s.push_str(&f.name);
133                s.push(':');
134                s.push_str(f.kind().as_str());
135                s.push(':');
136                s.push_str(f.cardinality().as_str());
137                s.push(':');
138                s.push_str(f.classification().as_str());
139                s.push(',');
140            }
141            let h = blake3::hash(s.as_bytes());
142            let bytes = h.as_bytes();
143            let arr = <[u8; 8]>::try_from(&bytes[..8]).expect("blake3 always produces 32 bytes");
144            u64::from_le_bytes(arr)
145        }
146    }
147
148    /// Walk a descriptor pool and collect every annotated `(obs.v1.event)`.
149    /// Skips messages without the option. Returns events in stable
150    /// `full_name` order (deterministic for diff/lint output).
151    ///
152    /// # Errors
153    ///
154    /// Returns [`CodegenError::OptionDecode`] when a sub-message has
155    /// malformed bytes.
156    pub fn scan_pool(pool: &DescriptorPool) -> Result<Vec<AnnotatedEvent>, CodegenError> {
157        let mut events: Vec<AnnotatedEvent> = Vec::new();
158        for msg in pool.all_messages() {
159            let dp = msg.descriptor_proto();
160            if !dp.options.is_set() {
161                continue;
162            }
163            let mut bytes = Vec::new();
164            dp.options.__buffa_unknown_fields.write_to(&mut bytes);
165            let Some(event_opts) = read_event_options(&bytes, msg.full_name())? else {
166                continue;
167            };
168            let mut decl = AnnotatedEvent {
169                full_name: msg.full_name().to_string(),
170                event: event_opts,
171                fields: Vec::new(),
172            };
173            for f in msg.fields() {
174                let fdp = f.descriptor_proto();
175                let mut fbytes = Vec::new();
176                if fdp.options.is_set() {
177                    fdp.options.__buffa_unknown_fields.write_to(&mut fbytes);
178                }
179                let opts =
180                    read_field_options(&fbytes, &format!("{}/{}", msg.full_name(), f.name()))?
181                        .unwrap_or_default();
182                decl.fields.push(AnnotatedField {
183                    name: f.name().to_string(),
184                    number: f.number(),
185                    options: opts,
186                    proto_type: map_kind_to_lint_type(&f.kind()),
187                });
188            }
189            events.push(decl);
190        }
191        events.sort_by(|a, b| a.full_name.cmp(&b.full_name));
192        Ok(events)
193    }
194
195    fn map_kind_to_lint_type(kind: &Kind) -> LintProtoType {
196        match kind {
197            Kind::String => LintProtoType::String,
198            Kind::Bytes => LintProtoType::Bytes,
199            Kind::Bool => LintProtoType::Bool,
200            Kind::Double | Kind::Float => LintProtoType::Float,
201            Kind::Int32
202            | Kind::Int64
203            | Kind::Sint32
204            | Kind::Sint64
205            | Kind::Sfixed32
206            | Kind::Sfixed64 => LintProtoType::SignedInteger,
207            Kind::Uint32 | Kind::Uint64 | Kind::Fixed32 | Kind::Fixed64 => {
208                LintProtoType::UnsignedInteger
209            }
210            Kind::Enum(e) => LintProtoType::Other(e.full_name().to_string()),
211            Kind::Message(m) => LintProtoType::Other(m.full_name().to_string()),
212            other => LintProtoType::Other(format!("{other:?}")),
213        }
214    }
215}