Skip to main content

obs_build/
lints.rs

1//! Shared lint emission for the proto-first (`obs-build::codegen`) and
2//! Rust-first (`obs-macros::derive_event`) authoring paths.
3//!
4//! Decision D8-1 (spec 95 § 5): both paths build a [`LintInput`] and
5//! call [`emit_lints`]. Each [`LintError`] carries a stable code
6//! (`L001`..`L014`) and the same human-readable message regardless of
7//! which path produced it; the consumer (codegen.rs / derive_event.rs)
8//! formats the errors into either Rust source text or `proc_macro2`
9//! tokens.
10//!
11//! This module has no `proc_macro2`/`quote`/`syn` dep so it can be
12//! linked from both an ordinary library crate (obs-build's codegen
13//! path) and a proc-macro crate (obs-macros's derive path) without
14//! pulling each other's heavy transitive deps.
15
16use obs_proto::obs::v1::{Cardinality, Classification, FieldKind, Severity, Tier};
17
18/// Proto wire type a field declares. The proto-first path fills this
19/// from `FieldDescriptorProto::r#type`; the derive path fills it from
20/// the Rust syntactic type via [`LintProtoType::from_rust_token`]. When
21/// the type cannot be inferred it is `Other(_)` and the type-checking
22/// portion of L014 simply does not fire.
23#[derive(Debug, Clone, PartialEq, Eq)]
24pub enum LintProtoType {
25    /// `string` / `&str` / `String`.
26    String,
27    /// `bytes` / `Vec<u8>` / `Bytes`.
28    Bytes,
29    /// Any integer or float scalar.
30    Numeric,
31    /// `bool`.
32    Bool,
33    /// Unknown; type-aware lints skip.
34    Other(String),
35}
36
37impl LintProtoType {
38    /// Map a Rust type's `ToTokens` form to a `LintProtoType`. Heuristic
39    /// only — covers the common cases (`String`, `Vec<u8>`, integers,
40    /// `bool`); anything else lands in `Other(_)` and L014's
41    /// type-validation skips.
42    #[must_use]
43    pub fn from_rust_token(s: &str) -> Self {
44        let normalised: String = s.chars().filter(|c| !c.is_whitespace()).collect();
45        if normalised == "String"
46            || normalised.ends_with("::String")
47            || normalised == "&str"
48            || normalised.ends_with("::str")
49        {
50            return Self::String;
51        }
52        if normalised == "Vec<u8>"
53            || normalised.ends_with("::Vec<u8>")
54            || normalised == "Bytes"
55            || normalised.ends_with("::Bytes")
56        {
57            return Self::Bytes;
58        }
59        if matches!(
60            normalised.as_str(),
61            "bool"
62                | "i8"
63                | "i16"
64                | "i32"
65                | "i64"
66                | "u8"
67                | "u16"
68                | "u32"
69                | "u64"
70                | "f32"
71                | "f64"
72                | "usize"
73                | "isize"
74        ) {
75            return if normalised == "bool" {
76                Self::Bool
77            } else {
78                Self::Numeric
79            };
80        }
81        Self::Other(normalised)
82    }
83}
84
85/// One field's worth of lint input.
86#[derive(Debug, Clone)]
87pub struct LintField {
88    /// Field name as authored (`String` for proto, the Rust ident for
89    /// the derive path).
90    pub name: String,
91    /// Effective field kind (defaulted to `Attribute` when absent).
92    pub kind: FieldKind,
93    /// Effective cardinality.
94    pub cardinality: Cardinality,
95    /// Effective classification.
96    pub classification: Classification,
97    /// `true` when a `Measurement`-kind field declares a metric kind
98    /// (counter / gauge / histogram). The lint pass only needs the
99    /// presence bit to fire L004; the actual kind/unit/bounds are
100    /// consumed downstream by the codegen.
101    pub has_metric: bool,
102    /// Proto/Rust type the field declares. `None` for callers that
103    /// cannot infer it; type-aware lints skip in that case.
104    pub proto_type: Option<LintProtoType>,
105}
106
107/// Input for the shared lint pass — one event's worth.
108#[derive(Debug, Clone)]
109pub struct LintInput {
110    /// Display name used in lint messages (e.g. `"ObsRequestStarted"`).
111    pub event_name: String,
112    /// Effective tier.
113    pub tier: Tier,
114    /// Workspace event prefix for L011 (default `"Obs"`).
115    pub event_prefix: String,
116    /// Fields in proto declaration order.
117    pub fields: Vec<LintField>,
118}
119
120/// One lint failure.
121#[derive(Debug, Clone)]
122pub struct LintError {
123    /// Stable lint code (`"L001"`..`"L014"`).
124    pub code: &'static str,
125    /// Human-readable message, multi-line, ready to embed verbatim into
126    /// a `panic!("…")` call. The `\n` is preserved.
127    pub message: String,
128}
129
130impl LintError {
131    fn new(code: &'static str, message: String) -> Self {
132        Self { code, message }
133    }
134}
135
136/// Run every lint in the catalogue against one event. Returns one
137/// `LintError` per violation, in stable code order so generated output
138/// is deterministic. Spec 95 § 2.1 (D8-1).
139#[must_use]
140pub fn emit_lints(input: &LintInput) -> Vec<LintError> {
141    let mut out: Vec<LintError> = Vec::new();
142    check_l011(input, &mut out);
143    check_l009(input, &mut out);
144    for f in &input.fields {
145        check_per_field(input, f, &mut out);
146    }
147    out
148}
149
150/// Cross-event lints — currently L013 (schema_hash uniqueness within
151/// a codegen unit). The codegen path passes every event's `(full_name,
152/// schema_hash)`; the derive path runs once per event so it cannot
153/// detect collisions and skips L013.
154#[must_use]
155pub fn emit_cross_event_lints(events: &[(String, u64)]) -> Vec<LintError> {
156    let mut out = Vec::new();
157    for (i, a) in events.iter().enumerate() {
158        for b in events.iter().skip(i + 1) {
159            if a.1 == b.1 {
160                let msg = format!(
161                    "obs L013: schema_hash collision: `{a}` and `{b}` both hash to \
162                     {hash:#018x}.\nhelp: rename one event so the canonical descriptor differs \
163                     (any field rename / reorder will do).",
164                    a = a.0,
165                    b = b.0,
166                    hash = a.1,
167                );
168                out.push(LintError::new("L013", msg));
169            }
170        }
171    }
172    out
173}
174
175fn check_l011(input: &LintInput, out: &mut Vec<LintError>) {
176    if !input.event_name.starts_with(&input.event_prefix) {
177        let msg = format!(
178            "obs L011: event type name `{name}` must start with `{prefix}`\nnote: the `{prefix}` \
179             prefix gives every event type a unique visual identity at call sites.\nhelp: rename \
180             to `{prefix}{name}`.",
181            name = input.event_name,
182            prefix = input.event_prefix,
183        );
184        out.push(LintError::new("L011", msg));
185    }
186}
187
188fn check_l009(input: &LintInput, out: &mut Vec<LintError>) {
189    if input.fields.is_empty() {
190        let msg = format!(
191            "obs L009: event `{name}` has no fields\nnote: empty events make analytics joins \
192             meaningless and indicate an unfinished schema.\nhelp: declare at least one field or \
193             rethink whether the event should exist.",
194            name = input.event_name,
195        );
196        out.push(LintError::new("L009", msg));
197    }
198}
199
200fn check_per_field(input: &LintInput, f: &LintField, out: &mut Vec<LintError>) {
201    // L001: LABEL must be Low or Medium cardinality.
202    if matches!(f.kind, FieldKind::Label) && !f.cardinality.is_label_compatible() {
203        let msg = format!(
204            "obs L001: field `{name}` is LABEL but cardinality is not label-compatible\nnote: \
205             LABEL fields must be Low or Medium cardinality. High and Unbounded are illegal \
206             because they would explode the metric attribute set.\nhelp: change `kind: LABEL` to \
207             `kind: ATTRIBUTE` if the value is high-cardinality (an ATTRIBUTE is logged but never \
208             becomes a metric dim).",
209            name = f.name,
210        );
211        out.push(LintError::new("L001", msg));
212    }
213
214    // L002: PII fields must not be LABEL.
215    if matches!(f.kind, FieldKind::Label) && matches!(f.classification, Classification::Pii) {
216        let msg = format!(
217            "obs L002: field `{name}` is LABEL with classification PII\nnote: PII fields cannot \
218             be LABEL because labels become metric attributes that are kept indefinitely and leak \
219             into vendor backends.\nhelp: change kind to ATTRIBUTE so the value is logged + \
220             analytics-only, and the redactor can scrub it on the durable path.",
221            name = f.name,
222        );
223        out.push(LintError::new("L002", msg));
224    }
225
226    // L003: SECRET on a LOG/AUDIT tier event.
227    if matches!(f.classification, Classification::Secret)
228        && matches!(input.tier, Tier::Log | Tier::Audit)
229    {
230        let msg = format!(
231            "obs L003: field `{name}` is SECRET on a `{tier}` tier event\nnote: SECRET fields are \
232             forbidden on LOG/AUDIT tiers because those tiers persist payloads to long-retained \
233             sinks.\nhelp: move the field to a non-secret column, or move the event to \
234             TRACE/METRIC tier (which do not persist payload bytes).",
235            name = f.name,
236            tier = input.tier.as_str(),
237        );
238        out.push(LintError::new("L003", msg));
239    }
240
241    // L004: MEASUREMENT requires a metric kind.
242    if matches!(f.kind, FieldKind::Measurement) && !f.has_metric {
243        let msg = format!(
244            "obs L004: field `{name}` is MEASUREMENT without a metric kind\nnote: MEASUREMENT \
245             fields must declare a metric kind (counter / gauge / histogram) so the OTLP metric \
246             sink can dispatch correctly.\nhelp: annotate the proto field with a metric option \
247             such as kind=METRIC_KIND_COUNTER and a unit string.",
248            name = f.name,
249        );
250        out.push(LintError::new("L004", msg));
251    }
252
253    // L006: AUDIT tier forbids any PII / SECRET on any field.
254    if matches!(input.tier, Tier::Audit)
255        && matches!(
256            f.classification,
257            Classification::Pii | Classification::Secret
258        )
259    {
260        let cls = match f.classification {
261            Classification::Pii => "PII",
262            Classification::Secret => "SECRET",
263            _ => "classified",
264        };
265        let msg = format!(
266            "obs L006: AUDIT-tier event must not carry `{cls}` field `{name}`\nnote: AUDIT events \
267             ship to long-retained immutable sinks; classified data must be redacted at the \
268             source.\nhelp: drop the field or move the event to a non-AUDIT tier.",
269            name = f.name,
270        );
271        out.push(LintError::new("L006", msg));
272    }
273
274    // L007: snake_case field names.
275    if !is_snake_case(&f.name) {
276        let msg = format!(
277            "obs L007: field `{name}` is not snake_case\nnote: every obs field name maps 1:1 to a \
278             proto field, OTLP attribute, and analytics column; snake_case is required so the \
279             projection round-trips deterministically.\nhelp: rename to `{suggest}`.",
280            name = f.name,
281            suggest = to_snake_case(&f.name),
282        );
283        out.push(LintError::new("L007", msg));
284    }
285
286    // L012: field name must not shadow envelope-reserved name. Skip
287    // TRACE_ID / SPAN_ID / PARENT_SPAN_ID — those are *meant* to
288    // project onto envelope slots of the same name.
289    const RESERVED: &[&str] = &[
290        "ts_ns",
291        "service",
292        "instance",
293        "schema_hash",
294        "callsite_id",
295        "sev",
296        "tier",
297        "labels",
298        "payload",
299        "sampling_reason",
300    ];
301    if !matches!(
302        f.kind,
303        FieldKind::TraceId | FieldKind::SpanId | FieldKind::ParentSpanId
304    ) && RESERVED.contains(&f.name.as_str())
305    {
306        let msg = format!(
307            "obs L012: field `{name}` shadows envelope-reserved name\nnote: `{name}` is one of \
308             the obs envelope's first-class fields. A payload field by the same name would clash \
309             on the analytics surface.\nhelp: rename the field; if the intent was to project onto \
310             the envelope slot, set the appropriate kind (e.g. `kind: TRACE_ID`).",
311            name = f.name,
312        );
313        out.push(LintError::new("L012", msg));
314    }
315
316    // L014: TRACE_ID / SPAN_ID / PARENT_SPAN_ID kind fields must be
317    // named with the matching envelope slot AND have proto type
318    // `string`. Spec 95 § 2.2.
319    if let Some(expected) = expected_correlation_name(f.kind) {
320        if f.name != expected {
321            let msg = format!(
322                "obs L014: field `{name}` declares `kind` as a correlation slot but is not named \
323                 `{expected}`\nnote: codegen projects fields whose kind is TRACE_ID / SPAN_ID / \
324                 PARENT_SPAN_ID into the envelope slot of the same name; renaming keeps the \
325                 analytics column predictable.\nhelp: rename the field to `{expected}` or change \
326                 the `kind` to ATTRIBUTE.",
327                name = f.name,
328            );
329            out.push(LintError::new("L014", msg));
330        }
331        if let Some(t) = &f.proto_type
332            && !matches!(t, LintProtoType::String | LintProtoType::Other(_))
333        {
334            let actual = match t {
335                LintProtoType::Bytes => "bytes",
336                LintProtoType::Numeric => "numeric",
337                LintProtoType::Bool => "bool",
338                _ => "unknown",
339            };
340            let msg = format!(
341                "obs L014: field `{name}` has kind {kind} but proto type is {actual}; expected \
342                 string\nnote: correlation slots are projected into \
343                 `env.trace_id`/`env.span_id`/`env.parent_span_id` which are typed `string`; a \
344                 non-string proto type would require a runtime cast.\nhelp: change the field's \
345                 proto type to `string`.",
346                name = f.name,
347                kind = correlation_kind_label(f.kind),
348            );
349            out.push(LintError::new("L014", msg));
350        }
351    }
352}
353
354fn expected_correlation_name(k: FieldKind) -> Option<&'static str> {
355    match k {
356        FieldKind::TraceId => Some("trace_id"),
357        FieldKind::SpanId => Some("span_id"),
358        FieldKind::ParentSpanId => Some("parent_span_id"),
359        _ => None,
360    }
361}
362
363fn correlation_kind_label(k: FieldKind) -> &'static str {
364    match k {
365        FieldKind::TraceId => "TRACE_ID",
366        FieldKind::SpanId => "SPAN_ID",
367        FieldKind::ParentSpanId => "PARENT_SPAN_ID",
368        _ => "",
369    }
370}
371
372fn is_snake_case(s: &str) -> bool {
373    !s.is_empty()
374        && s.bytes()
375            .all(|b| b.is_ascii_lowercase() || b.is_ascii_digit() || b == b'_')
376        && !s.starts_with('_')
377        && !s.ends_with('_')
378        && !s.contains("__")
379}
380
381fn to_snake_case(s: &str) -> String {
382    use heck::ToSnakeCase;
383    s.to_snake_case()
384}
385
386/// Severity helper: an enum's `as_str` impl (`Severity::as_str` etc.)
387/// already exists for `Tier`. The lint module re-exports a small marker
388/// trait here so callers don't need to depend on `obs_types`'s `as_str`
389/// directly. (Implementation note — both `Tier` and `Severity` already
390/// expose `as_str` in `obs_types`, so the explicit `_` parameter
391/// silences the unused-import lint.)
392#[doc(hidden)]
393pub fn _ensure_severity_link(_: Severity) {}
394
395#[cfg(test)]
396mod tests {
397    use obs_proto::obs::v1::{Cardinality, Classification, FieldKind, Tier};
398
399    use super::*;
400
401    fn input(prefix: &str, name: &str, tier: Tier, fields: Vec<LintField>) -> LintInput {
402        LintInput {
403            event_name: name.to_string(),
404            tier,
405            event_prefix: prefix.to_string(),
406            fields,
407        }
408    }
409
410    fn field(name: &str, kind: FieldKind) -> LintField {
411        LintField {
412            name: name.to_string(),
413            kind,
414            cardinality: Cardinality::Low,
415            classification: Classification::Internal,
416            has_metric: false,
417            proto_type: Some(LintProtoType::String),
418        }
419    }
420
421    #[test]
422    fn test_should_flag_l011_when_prefix_missing() {
423        let i = input(
424            "Obs",
425            "RequestStarted",
426            Tier::Log,
427            vec![field("a", FieldKind::Attribute)],
428        );
429        let errs = emit_lints(&i);
430        assert!(errs.iter().any(|e| e.code == "L011"));
431    }
432
433    #[test]
434    fn test_should_flag_l009_when_no_fields() {
435        let i = input("Obs", "ObsX", Tier::Log, vec![]);
436        let errs = emit_lints(&i);
437        assert!(errs.iter().any(|e| e.code == "L009"));
438    }
439
440    #[test]
441    fn test_should_flag_l001_when_label_high_cardinality() {
442        let mut f = field("user_id", FieldKind::Label);
443        f.cardinality = Cardinality::High;
444        let i = input("Obs", "ObsX", Tier::Log, vec![f]);
445        let errs = emit_lints(&i);
446        assert!(errs.iter().any(|e| e.code == "L001"));
447    }
448
449    #[test]
450    fn test_should_flag_l003_secret_on_log() {
451        let mut f = field("token", FieldKind::Attribute);
452        f.classification = Classification::Secret;
453        let i = input("Obs", "ObsX", Tier::Log, vec![f]);
454        let errs = emit_lints(&i);
455        assert!(errs.iter().any(|e| e.code == "L003"));
456    }
457
458    #[test]
459    fn test_should_flag_l014_when_wrong_name() {
460        let f = field("trc_id", FieldKind::TraceId);
461        let i = input("Obs", "ObsX", Tier::Log, vec![f]);
462        let errs = emit_lints(&i);
463        assert!(errs.iter().any(|e| e.code == "L014"));
464    }
465
466    #[test]
467    fn test_should_flag_l014_when_wrong_proto_type() {
468        let mut f = field("trace_id", FieldKind::TraceId);
469        f.proto_type = Some(LintProtoType::Bytes);
470        let i = input("Obs", "ObsX", Tier::Log, vec![f]);
471        let errs = emit_lints(&i);
472        assert!(errs.iter().any(|e| e.code == "L014"));
473    }
474
475    #[test]
476    fn test_should_pass_when_correlation_field_correct() {
477        let f = field("trace_id", FieldKind::TraceId);
478        let i = input("Obs", "ObsX", Tier::Log, vec![f]);
479        let errs = emit_lints(&i);
480        assert!(errs.iter().all(|e| e.code != "L014"));
481    }
482
483    #[test]
484    fn test_should_detect_l013_collision() {
485        let pairs = vec![("a.v1.X".to_string(), 1u64), ("a.v1.Y".to_string(), 1u64)];
486        let errs = emit_cross_event_lints(&pairs);
487        assert_eq!(errs.len(), 1);
488        assert_eq!(errs[0].code, "L013");
489    }
490
491    #[test]
492    fn test_should_skip_l013_when_unique() {
493        let pairs = vec![("a.v1.X".to_string(), 1u64), ("a.v1.Y".to_string(), 2u64)];
494        assert!(emit_cross_event_lints(&pairs).is_empty());
495    }
496
497    #[test]
498    fn test_should_recognize_string_rust_token() {
499        assert_eq!(
500            LintProtoType::from_rust_token("String"),
501            LintProtoType::String
502        );
503        assert_eq!(
504            LintProtoType::from_rust_token("::std::string::String"),
505            LintProtoType::String
506        );
507        assert!(matches!(
508            LintProtoType::from_rust_token("Vec < u8 >"),
509            LintProtoType::Bytes
510        ));
511    }
512}