obs-build 0.2.1

Build-time codegen helpers for the obs SDK; reads .proto via buffa-build + buffa-reflect.
Documentation
//! Build-time codegen helpers for the proto-first authoring path.
//!
//! Phase-1 surface (impl-plan task 1.10):
//!
//! - [`Config::compile`] runs `protoc` (or accepts a precompiled FDS), hands the FDS to
//!   `buffa-build` for Rust-type generation, then walks the FDS via `buffa-reflect` to extract the
//!   `(obs.v1.event)` and `(obs.v1.field)` custom options out of each message's
//!   `MessageOptions.__buffa_unknown_fields`. From those bytes the codegen emits `EventSchema`
//!   impls, `EventSchemaErased` impls, and `linkme` registrations into `OUT_DIR/obs/schemas.rs`.
//! - The byte-identical-output property described in spec 12 § 1.2 (proto-first vs Rust-first land
//!   on the same generated bytes when the schemas are equivalent) is verified via Phase-2 task 2.1
//!   integration tests; Phase 1 ships the codegen, the test harness builds on top.
//!
//! See spec 12 § 4 + spec 14 § 7.

#![forbid(unsafe_code)]
#![warn(rust_2024_compatibility, missing_debug_implementations)]
// `obs-build` runs in the user's `build.rs` (sync, before the async
// runtime exists), so the workspace's tokio-fs / no-expect lints don't
// apply here. Tests use `.unwrap()` freely.
#![allow(
    missing_docs,
    clippy::disallowed_types,
    clippy::disallowed_methods,
    clippy::expect_used,
    clippy::indexing_slicing
)]
#![cfg_attr(test, allow(clippy::unwrap_used))]

mod codegen;
mod config;
pub mod lints;
mod options;

pub use config::{
    Config, DescriptorSource, EMBEDDED_ENUMS_PROTO, EMBEDDED_OPTIONS_PROTO,
    materialise_embedded_options,
};
pub use lints::{
    LintError, LintField, LintInput, LintProtoType, emit_cross_event_lints, emit_lints,
};
pub use options::{CodegenError, EventOptions, FieldOptions, MetricSpec};

/// Reflection-based reader of `(obs.v1.event)` and `(obs.v1.field)`
/// options from a `buffa-reflect` descriptor pool. Re-exported so
/// `obs-cli`'s `lint` / `validate` / `schema show` subcommands can
/// share the parser with `Config::compile`.
pub mod reflect {
    use buffa_reflect::{DescriptorPool, Kind};
    use obs_proto::obs::v1::{Cardinality, Classification, FieldKind, Severity, Tier};

    use crate::{
        lints::LintProtoType,
        options::{
            CodegenError, EventOptions, FieldOptions, read_event_options, read_field_options,
        },
    };

    /// One annotated event scanned from a descriptor pool.
    #[derive(Debug)]
    #[non_exhaustive]
    pub struct AnnotatedEvent {
        /// Fully qualified message name (`myapp.v1.ObsXxx`).
        pub full_name: String,
        /// Decoded `(obs.v1.event)` options.
        pub event: EventOptions,
        /// Per-field `(obs.v1.field)` options paired with name + tag.
        pub fields: Vec<AnnotatedField>,
    }

    /// One field of an [`AnnotatedEvent`].
    #[derive(Debug)]
    #[non_exhaustive]
    pub struct AnnotatedField {
        /// Field name in the proto.
        pub name: String,
        /// Proto tag number.
        pub number: u32,
        /// Decoded `(obs.v1.field)` options.
        pub options: FieldOptions,
        /// Reflected proto scalar type, used by CLI lint/migrate/decode
        /// paths that do not run codegen.
        pub proto_type: LintProtoType,
    }

    impl AnnotatedField {
        /// Effective field kind, defaulting to ATTRIBUTE.
        #[must_use]
        pub fn kind(&self) -> FieldKind {
            self.options.kind.unwrap_or(FieldKind::Attribute)
        }
        /// Effective cardinality, defaulting to UNSPECIFIED.
        #[must_use]
        pub fn cardinality(&self) -> Cardinality {
            self.options.cardinality.unwrap_or(Cardinality::Unspecified)
        }
        /// Effective classification, defaulting to INTERNAL.
        #[must_use]
        pub fn classification(&self) -> Classification {
            self.options
                .classification
                .unwrap_or(Classification::Internal)
        }
    }

    impl AnnotatedEvent {
        /// Effective tier, defaulting to LOG.
        #[must_use]
        pub fn tier(&self) -> Tier {
            self.event.tier.unwrap_or(Tier::Log)
        }
        /// Effective default severity, defaulting to INFO.
        #[must_use]
        pub fn default_sev(&self) -> Severity {
            self.event.default_sev.unwrap_or(Severity::Info)
        }
        /// First 8 bytes of BLAKE3 over the canonical descriptor string.
        /// Mirrors `obs-build::codegen::EventDecl::schema_hash` and the
        /// proc-macro's `compute_schema_hash` so codegen, runtime, and
        /// migrate paths agree byte-for-byte. Spec 12 § 3.5 / spec 93
        /// P1-9 (`obs migrate clickhouse` populates schema_hash).
        #[must_use]
        pub fn schema_hash(&self) -> u64 {
            let mut s = String::new();
            s.push_str(&self.full_name);
            s.push('|');
            s.push_str(self.tier().as_str());
            s.push('|');
            s.push_str(self.default_sev().as_str());
            s.push('|');
            for f in &self.fields {
                s.push_str(&f.name);
                s.push(':');
                s.push_str(f.kind().as_str());
                s.push(':');
                s.push_str(f.cardinality().as_str());
                s.push(':');
                s.push_str(f.classification().as_str());
                s.push(',');
            }
            let h = blake3::hash(s.as_bytes());
            let bytes = h.as_bytes();
            let arr = <[u8; 8]>::try_from(&bytes[..8]).expect("blake3 always produces 32 bytes");
            u64::from_le_bytes(arr)
        }
    }

    /// Walk a descriptor pool and collect every annotated `(obs.v1.event)`.
    /// Skips messages without the option. Returns events in stable
    /// `full_name` order (deterministic for diff/lint output).
    ///
    /// # Errors
    ///
    /// Returns [`CodegenError::OptionDecode`] when a sub-message has
    /// malformed bytes.
    pub fn scan_pool(pool: &DescriptorPool) -> Result<Vec<AnnotatedEvent>, CodegenError> {
        let mut events: Vec<AnnotatedEvent> = Vec::new();
        for msg in pool.all_messages() {
            let dp = msg.descriptor_proto();
            if !dp.options.is_set() {
                continue;
            }
            let mut bytes = Vec::new();
            dp.options.__buffa_unknown_fields.write_to(&mut bytes);
            let Some(event_opts) = read_event_options(&bytes, msg.full_name())? else {
                continue;
            };
            let mut decl = AnnotatedEvent {
                full_name: msg.full_name().to_string(),
                event: event_opts,
                fields: Vec::new(),
            };
            for f in msg.fields() {
                let fdp = f.descriptor_proto();
                let mut fbytes = Vec::new();
                if fdp.options.is_set() {
                    fdp.options.__buffa_unknown_fields.write_to(&mut fbytes);
                }
                let opts =
                    read_field_options(&fbytes, &format!("{}/{}", msg.full_name(), f.name()))?
                        .unwrap_or_default();
                decl.fields.push(AnnotatedField {
                    name: f.name().to_string(),
                    number: f.number(),
                    options: opts,
                    proto_type: map_kind_to_lint_type(&f.kind()),
                });
            }
            events.push(decl);
        }
        events.sort_by(|a, b| a.full_name.cmp(&b.full_name));
        Ok(events)
    }

    fn map_kind_to_lint_type(kind: &Kind) -> LintProtoType {
        match kind {
            Kind::String => LintProtoType::String,
            Kind::Bytes => LintProtoType::Bytes,
            Kind::Bool => LintProtoType::Bool,
            Kind::Double | Kind::Float => LintProtoType::Float,
            Kind::Int32
            | Kind::Int64
            | Kind::Sint32
            | Kind::Sint64
            | Kind::Sfixed32
            | Kind::Sfixed64 => LintProtoType::SignedInteger,
            Kind::Uint32 | Kind::Uint64 | Kind::Fixed32 | Kind::Fixed64 => {
                LintProtoType::UnsignedInteger
            }
            Kind::Enum(e) => LintProtoType::Other(e.full_name().to_string()),
            Kind::Message(m) => LintProtoType::Other(m.full_name().to_string()),
            other => LintProtoType::Other(format!("{other:?}")),
        }
    }
}