Skip to main content

obs_build/
lints.rs

1//! Shared lint emission for the proto-first (`obs-build::codegen`) and
2//! Rust-first (`obs-macros::derive_event`) authoring paths.
3//!
4//! Decision D8-1 (spec 95 § 5): both paths build a [`LintInput`] and
5//! call [`emit_lints`]. Each [`LintError`] carries a stable code
6//! (`L001`..`L014`) and the same human-readable message regardless of
7//! which path produced it; the consumer (codegen.rs / derive_event.rs)
8//! formats the errors into either Rust source text or `proc_macro2`
9//! tokens.
10//!
11//! This module has no `proc_macro2`/`quote`/`syn` dep so it can be
12//! linked from both an ordinary library crate (obs-build's codegen
13//! path) and a proc-macro crate (obs-macros's derive path) without
14//! pulling each other's heavy transitive deps.
15
16use obs_proto::obs::v1::{Cardinality, Classification, FieldKind, Severity, Tier};
17
18/// Proto wire type a field declares. The proto-first path fills this
19/// from `FieldDescriptorProto::r#type`; the derive path fills it from
20/// the Rust syntactic type via [`LintProtoType::from_rust_token`]. When
21/// the type cannot be inferred it is `Other(_)` and the type-checking
22/// portion of L014 simply does not fire.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub enum LintProtoType {
25    /// `string` / `&str` / `String`.
26    String,
27    /// `bytes` / `Vec<u8>` / `Bytes`.
28    Bytes,
29    /// Signed integer scalar.
30    SignedInteger,
31    /// Unsigned integer scalar.
32    UnsignedInteger,
33    /// Floating-point scalar.
34    Float,
35    /// Numeric scalar whose signedness/width is not known.
36    Numeric,
37    /// `bool`.
38    Bool,
39    /// Unknown; type-aware lints skip.
40    Other(String),
41}
42
43impl LintProtoType {
44    /// Map a Rust type's `ToTokens` form to a `LintProtoType`. Heuristic
45    /// only — covers the common cases (`String`, `Vec<u8>`, integers,
46    /// `bool`); anything else lands in `Other(_)` and L014's
47    /// type-validation skips.
48    #[must_use]
49    pub fn from_rust_token(s: &str) -> Self {
50        let normalised: String = s.chars().filter(|c| !c.is_whitespace()).collect();
51        if normalised == "String"
52            || normalised.ends_with("::String")
53            || normalised == "&str"
54            || normalised.ends_with("::str")
55        {
56            return Self::String;
57        }
58        if normalised == "Vec<u8>"
59            || normalised.ends_with("::Vec<u8>")
60            || normalised == "Bytes"
61            || normalised.ends_with("::Bytes")
62        {
63            return Self::Bytes;
64        }
65        if normalised == "bool" {
66            return Self::Bool;
67        }
68        if matches!(normalised.as_str(), "i8" | "i16" | "i32" | "i64" | "isize") {
69            return Self::SignedInteger;
70        }
71        if matches!(normalised.as_str(), "u8" | "u16" | "u32" | "u64" | "usize") {
72            return Self::UnsignedInteger;
73        }
74        if matches!(normalised.as_str(), "f32" | "f64") {
75            return Self::Float;
76        }
77        Self::Other(normalised)
78    }
79
80    /// Whether the type is any numeric scalar.
81    #[must_use]
82    pub fn is_numeric(&self) -> bool {
83        matches!(
84            self,
85            Self::SignedInteger | Self::UnsignedInteger | Self::Float | Self::Numeric
86        )
87    }
88
89    /// Human-readable scalar family used in lint diagnostics.
90    #[must_use]
91    pub fn label(&self) -> &str {
92        match self {
93            Self::String => "string",
94            Self::Bytes => "bytes",
95            Self::SignedInteger => "signed integer",
96            Self::UnsignedInteger => "unsigned integer",
97            Self::Float => "float",
98            Self::Numeric => "numeric",
99            Self::Bool => "bool",
100            Self::Other(_) => "unknown",
101        }
102    }
103}
104
105/// One field's worth of lint input.
106#[derive(Debug, Clone)]
107pub struct LintField {
108    /// Field name as authored (`String` for proto, the Rust ident for
109    /// the derive path).
110    pub name: String,
111    /// Effective field kind (defaulted to `Attribute` when absent).
112    pub kind: FieldKind,
113    /// Effective cardinality.
114    pub cardinality: Cardinality,
115    /// Effective classification.
116    pub classification: Classification,
117    /// `true` when a `Measurement`-kind field declares a metric kind
118    /// (counter / gauge / histogram). The lint pass only needs the
119    /// presence bit to fire L004; the actual kind/unit/bounds are
120    /// consumed downstream by the codegen.
121    pub has_metric: bool,
122    /// Proto/Rust type the field declares. `None` for callers that
123    /// cannot infer it; type-aware lints skip in that case.
124    pub proto_type: Option<LintProtoType>,
125}
126
127/// Input for the shared lint pass — one event's worth.
128#[derive(Debug, Clone)]
129pub struct LintInput {
130    /// Display name used in lint messages (e.g. `"ObsRequestStarted"`).
131    pub event_name: String,
132    /// Effective tier.
133    pub tier: Tier,
134    /// Workspace event prefix for L011 (default `"Obs"`).
135    pub event_prefix: String,
136    /// Fields in proto declaration order.
137    pub fields: Vec<LintField>,
138}
139
140/// One lint failure.
141#[derive(Debug, Clone)]
142pub struct LintError {
143    /// Stable lint code (`"L001"`..`"L014"`).
144    pub code: &'static str,
145    /// Human-readable message, multi-line, ready to embed verbatim into
146    /// a `panic!("…")` call. The `\n` is preserved.
147    pub message: String,
148}
149
150impl LintError {
151    fn new(code: &'static str, message: String) -> Self {
152        Self { code, message }
153    }
154}
155
156/// Run every lint in the catalogue against one event. Returns one
157/// `LintError` per violation, in stable code order so generated output
158/// is deterministic. Spec 95 § 2.1 (D8-1).
159#[must_use]
160pub fn emit_lints(input: &LintInput) -> Vec<LintError> {
161    let mut out: Vec<LintError> = Vec::new();
162    check_l011(input, &mut out);
163    check_l009(input, &mut out);
164    for f in &input.fields {
165        check_per_field(input, f, &mut out);
166    }
167    out
168}
169
170/// Cross-event lints — currently L013 (schema_hash uniqueness within
171/// a codegen unit). The codegen path passes every event's `(full_name,
172/// schema_hash)`; the derive path runs once per event so it cannot
173/// detect collisions and skips L013.
174#[must_use]
175pub fn emit_cross_event_lints(events: &[(String, u64)]) -> Vec<LintError> {
176    let mut out = Vec::new();
177    for (i, a) in events.iter().enumerate() {
178        for b in events.iter().skip(i + 1) {
179            if a.1 == b.1 {
180                let msg = format!(
181                    "obs L013: schema_hash collision: `{a}` and `{b}` both hash to \
182                     {hash:#018x}.\nhelp: rename one event so the canonical descriptor differs \
183                     (any field rename / reorder will do).",
184                    a = a.0,
185                    b = b.0,
186                    hash = a.1,
187                );
188                out.push(LintError::new("L013", msg));
189            }
190        }
191    }
192    out
193}
194
195fn check_l011(input: &LintInput, out: &mut Vec<LintError>) {
196    if !input.event_name.starts_with(&input.event_prefix) {
197        let msg = format!(
198            "obs L011: event type name `{name}` must start with `{prefix}`\nnote: the `{prefix}` \
199             prefix gives every event type a unique visual identity at call sites.\nhelp: rename \
200             to `{prefix}{name}`.",
201            name = input.event_name,
202            prefix = input.event_prefix,
203        );
204        out.push(LintError::new("L011", msg));
205    }
206}
207
208fn check_l009(input: &LintInput, out: &mut Vec<LintError>) {
209    if input.fields.is_empty() {
210        let msg = format!(
211            "obs L009: event `{name}` has no fields\nnote: empty events make analytics joins \
212             meaningless and indicate an unfinished schema.\nhelp: declare at least one field or \
213             rethink whether the event should exist.",
214            name = input.event_name,
215        );
216        out.push(LintError::new("L009", msg));
217    }
218}
219
220fn check_per_field(input: &LintInput, f: &LintField, out: &mut Vec<LintError>) {
221    // L001: LABEL must be Low or Medium cardinality.
222    if matches!(f.kind, FieldKind::Label) && !f.cardinality.is_label_compatible() {
223        let msg = format!(
224            "obs L001: field `{name}` is LABEL but cardinality is not label-compatible\nnote: \
225             LABEL fields must be Low or Medium cardinality. High and Unbounded are illegal \
226             because they would explode the metric attribute set.\nhelp: change `kind: LABEL` to \
227             `kind: ATTRIBUTE` if the value is high-cardinality (an ATTRIBUTE is logged but never \
228             becomes a metric dim).",
229            name = f.name,
230        );
231        out.push(LintError::new("L001", msg));
232    }
233
234    // L002: PII fields must not be LABEL.
235    if matches!(f.kind, FieldKind::Label) && matches!(f.classification, Classification::Pii) {
236        let msg = format!(
237            "obs L002: field `{name}` is LABEL with classification PII\nnote: PII fields cannot \
238             be LABEL because labels become metric attributes that are kept indefinitely and leak \
239             into vendor backends.\nhelp: change kind to ATTRIBUTE so the value is logged + \
240             analytics-only, and the redactor can scrub it on the durable path.",
241            name = f.name,
242        );
243        out.push(LintError::new("L002", msg));
244    }
245
246    // L003: SECRET on a LOG/AUDIT tier event.
247    if matches!(f.classification, Classification::Secret)
248        && matches!(input.tier, Tier::Log | Tier::Audit)
249    {
250        let msg = format!(
251            "obs L003: field `{name}` is SECRET on a `{tier}` tier event\nnote: SECRET fields are \
252             forbidden on LOG/AUDIT tiers because those tiers persist payloads to long-retained \
253             sinks.\nhelp: move the field to a non-secret column, or move the event to \
254             TRACE/METRIC tier (which do not persist payload bytes).",
255            name = f.name,
256            tier = input.tier.as_str(),
257        );
258        out.push(LintError::new("L003", msg));
259    }
260
261    // L004: MEASUREMENT requires a metric kind.
262    if matches!(f.kind, FieldKind::Measurement) && !f.has_metric {
263        let msg = format!(
264            "obs L004: field `{name}` is MEASUREMENT without a metric kind\nnote: MEASUREMENT \
265             fields must declare a metric kind (counter / gauge / histogram) so the OTLP metric \
266             sink can dispatch correctly.\nhelp: annotate the proto field with a metric option \
267             such as kind=METRIC_KIND_COUNTER and a unit string.",
268            name = f.name,
269        );
270        out.push(LintError::new("L004", msg));
271    }
272
273    // L006: AUDIT tier forbids any PII / SECRET on any field.
274    if matches!(input.tier, Tier::Audit)
275        && matches!(
276            f.classification,
277            Classification::Pii | Classification::Secret
278        )
279    {
280        let cls = match f.classification {
281            Classification::Pii => "PII",
282            Classification::Secret => "SECRET",
283            _ => "classified",
284        };
285        let msg = format!(
286            "obs L006: AUDIT-tier event must not carry `{cls}` field `{name}`\nnote: AUDIT events \
287             ship to long-retained immutable sinks; classified data must be redacted at the \
288             source.\nhelp: drop the field or move the event to a non-AUDIT tier.",
289            name = f.name,
290        );
291        out.push(LintError::new("L006", msg));
292    }
293
294    // L007: snake_case field names.
295    if !is_snake_case(&f.name) {
296        let msg = format!(
297            "obs L007: field `{name}` is not snake_case\nnote: every obs field name maps 1:1 to a \
298             proto field, OTLP attribute, and analytics column; snake_case is required so the \
299             projection round-trips deterministically.\nhelp: rename to `{suggest}`.",
300            name = f.name,
301            suggest = to_snake_case(&f.name),
302        );
303        out.push(LintError::new("L007", msg));
304    }
305
306    // L012: field name must not shadow envelope-reserved name. Skip
307    // TRACE_ID / SPAN_ID / PARENT_SPAN_ID — those are *meant* to
308    // project onto envelope slots of the same name.
309    const RESERVED: &[&str] = &[
310        "ts_ns",
311        "service",
312        "instance",
313        "schema_hash",
314        "callsite_id",
315        "sev",
316        "tier",
317        "labels",
318        "payload",
319        "sampling_reason",
320    ];
321    if !matches!(
322        f.kind,
323        FieldKind::TraceId | FieldKind::SpanId | FieldKind::ParentSpanId
324    ) && RESERVED.contains(&f.name.as_str())
325    {
326        let msg = format!(
327            "obs L012: field `{name}` shadows envelope-reserved name\nnote: `{name}` is one of \
328             the obs envelope's first-class fields. A payload field by the same name would clash \
329             on the analytics surface.\nhelp: rename the field; if the intent was to project onto \
330             the envelope slot, set the appropriate kind (e.g. `kind: TRACE_ID`).",
331            name = f.name,
332        );
333        out.push(LintError::new("L012", msg));
334    }
335
336    // L014: TRACE_ID / SPAN_ID / PARENT_SPAN_ID kind fields must be
337    // named with the matching envelope slot AND have proto type
338    // `string`. Spec 95 § 2.2.
339    if let Some(expected) = expected_correlation_name(f.kind) {
340        if f.name != expected {
341            let msg = format!(
342                "obs L014: field `{name}` declares `kind` as a correlation slot but is not named \
343                 `{expected}`\nnote: codegen projects fields whose kind is TRACE_ID / SPAN_ID / \
344                 PARENT_SPAN_ID into the envelope slot of the same name; renaming keeps the \
345                 analytics column predictable.\nhelp: rename the field to `{expected}` or change \
346                 the `kind` to ATTRIBUTE.",
347                name = f.name,
348            );
349            out.push(LintError::new("L014", msg));
350        }
351        if let Some(t) = &f.proto_type
352            && !matches!(t, LintProtoType::String | LintProtoType::Other(_))
353        {
354            let actual = t.label();
355            let msg = format!(
356                "obs L014: field `{name}` has kind {kind} but proto type is {actual}; expected \
357                 string\nnote: correlation slots are projected into \
358                 `env.trace_id`/`env.span_id`/`env.parent_span_id` which are typed `string`; a \
359                 non-string proto type would require a runtime cast.\nhelp: change the field's \
360                 proto type to `string`.",
361                name = f.name,
362                kind = correlation_kind_label(f.kind),
363            );
364            out.push(LintError::new("L014", msg));
365        }
366    }
367}
368
369fn expected_correlation_name(k: FieldKind) -> Option<&'static str> {
370    match k {
371        FieldKind::TraceId => Some("trace_id"),
372        FieldKind::SpanId => Some("span_id"),
373        FieldKind::ParentSpanId => Some("parent_span_id"),
374        _ => None,
375    }
376}
377
378fn correlation_kind_label(k: FieldKind) -> &'static str {
379    match k {
380        FieldKind::TraceId => "TRACE_ID",
381        FieldKind::SpanId => "SPAN_ID",
382        FieldKind::ParentSpanId => "PARENT_SPAN_ID",
383        _ => "",
384    }
385}
386
387fn is_snake_case(s: &str) -> bool {
388    !s.is_empty()
389        && s.bytes()
390            .all(|b| b.is_ascii_lowercase() || b.is_ascii_digit() || b == b'_')
391        && !s.starts_with('_')
392        && !s.ends_with('_')
393        && !s.contains("__")
394}
395
396fn to_snake_case(s: &str) -> String {
397    use heck::ToSnakeCase;
398    s.to_snake_case()
399}
400
401/// Severity helper: an enum's `as_str` impl (`Severity::as_str` etc.)
402/// already exists for `Tier`. The lint module re-exports a small marker
403/// trait here so callers don't need to depend on `obs_types`'s `as_str`
404/// directly. (Implementation note — both `Tier` and `Severity` already
405/// expose `as_str` in `obs_types`, so the explicit `_` parameter
406/// silences the unused-import lint.)
407#[doc(hidden)]
408pub fn _ensure_severity_link(_: Severity) {}
409
410#[cfg(test)]
411mod tests {
412    use obs_proto::obs::v1::{Cardinality, Classification, FieldKind, Tier};
413
414    use super::*;
415
416    fn input(prefix: &str, name: &str, tier: Tier, fields: Vec<LintField>) -> LintInput {
417        LintInput {
418            event_name: name.to_string(),
419            tier,
420            event_prefix: prefix.to_string(),
421            fields,
422        }
423    }
424
425    fn field(name: &str, kind: FieldKind) -> LintField {
426        LintField {
427            name: name.to_string(),
428            kind,
429            cardinality: Cardinality::Low,
430            classification: Classification::Internal,
431            has_metric: false,
432            proto_type: Some(LintProtoType::String),
433        }
434    }
435
436    #[test]
437    fn test_should_flag_l011_when_prefix_missing() {
438        let i = input(
439            "Obs",
440            "RequestStarted",
441            Tier::Log,
442            vec![field("a", FieldKind::Attribute)],
443        );
444        let errs = emit_lints(&i);
445        assert!(errs.iter().any(|e| e.code == "L011"));
446    }
447
448    #[test]
449    fn test_should_flag_l009_when_no_fields() {
450        let i = input("Obs", "ObsX", Tier::Log, vec![]);
451        let errs = emit_lints(&i);
452        assert!(errs.iter().any(|e| e.code == "L009"));
453    }
454
455    #[test]
456    fn test_should_flag_l001_when_label_high_cardinality() {
457        let mut f = field("user_id", FieldKind::Label);
458        f.cardinality = Cardinality::High;
459        let i = input("Obs", "ObsX", Tier::Log, vec![f]);
460        let errs = emit_lints(&i);
461        assert!(errs.iter().any(|e| e.code == "L001"));
462    }
463
464    #[test]
465    fn test_should_flag_l003_secret_on_log() {
466        let mut f = field("token", FieldKind::Attribute);
467        f.classification = Classification::Secret;
468        let i = input("Obs", "ObsX", Tier::Log, vec![f]);
469        let errs = emit_lints(&i);
470        assert!(errs.iter().any(|e| e.code == "L003"));
471    }
472
473    #[test]
474    fn test_should_flag_l014_when_wrong_name() {
475        let f = field("trc_id", FieldKind::TraceId);
476        let i = input("Obs", "ObsX", Tier::Log, vec![f]);
477        let errs = emit_lints(&i);
478        assert!(errs.iter().any(|e| e.code == "L014"));
479    }
480
481    #[test]
482    fn test_should_flag_l014_when_wrong_proto_type() {
483        let mut f = field("trace_id", FieldKind::TraceId);
484        f.proto_type = Some(LintProtoType::Bytes);
485        let i = input("Obs", "ObsX", Tier::Log, vec![f]);
486        let errs = emit_lints(&i);
487        assert!(errs.iter().any(|e| e.code == "L014"));
488    }
489
490    #[test]
491    fn test_should_pass_when_correlation_field_correct() {
492        let f = field("trace_id", FieldKind::TraceId);
493        let i = input("Obs", "ObsX", Tier::Log, vec![f]);
494        let errs = emit_lints(&i);
495        assert!(errs.iter().all(|e| e.code != "L014"));
496    }
497
498    #[test]
499    fn test_should_detect_l013_collision() {
500        let pairs = vec![("a.v1.X".to_string(), 1u64), ("a.v1.Y".to_string(), 1u64)];
501        let errs = emit_cross_event_lints(&pairs);
502        assert_eq!(errs.len(), 1);
503        assert_eq!(errs[0].code, "L013");
504    }
505
506    #[test]
507    fn test_should_skip_l013_when_unique() {
508        let pairs = vec![("a.v1.X".to_string(), 1u64), ("a.v1.Y".to_string(), 2u64)];
509        assert!(emit_cross_event_lints(&pairs).is_empty());
510    }
511
512    #[test]
513    fn test_should_recognize_string_rust_token() {
514        assert_eq!(
515            LintProtoType::from_rust_token("String"),
516            LintProtoType::String
517        );
518        assert_eq!(
519            LintProtoType::from_rust_token("::std::string::String"),
520            LintProtoType::String
521        );
522        assert!(matches!(
523            LintProtoType::from_rust_token("Vec < u8 >"),
524            LintProtoType::Bytes
525        ));
526    }
527}