Skip to main content

bext_plugin_api/
tracer.rs

1//! Tracer capability trait for distributed-tracing exporters.
2//!
3//! A `Tracer` plugin turns span events produced by the bext runtime (and
4//! other plugins) into whatever wire format its backend expects — OTLP,
5//! Datadog, Honeycomb, line-oriented stdout for development, and so on.
6//! See `plan/ecosystem/02-capabilities.md` for the design rationale and
7//! the list of reference implementations landing in E1.
8//!
9//! # Design rules
10//!
11//! - **No `opentelemetry` crate types in the trait.** Trace IDs, span IDs,
12//!   attribute values, kinds, and status codes are all plain data. A non-otel
13//!   implementation (like `@bext/tracer-stdout`) must be able to satisfy
14//!   the trait without pulling in `opentelemetry` as a transitive dep. An
15//!   OTLP-based implementation maps these types 1:1 into the otel SDK.
16//! - **Aligned with OpenTelemetry semantic conventions.** IDs are the same
17//!   widths as the OTel wire format (16 bytes trace id, 8 bytes span id),
18//!   status codes match OTel's `Unset`/`Ok`/`Error`, span kinds match
19//!   OTel's `Internal`/`Server`/`Client`/`Producer`/`Consumer`, and
20//!   attribute value types cover the OTel semantic-convention allowed set
21//!   (string / bool / int / float / arrays). Plugin authors are expected
22//!   to use the OTel semconv attribute keys (e.g. `http.request.method`,
23//!   `http.response.status_code`); the trait does not enforce this.
24//! - **Explicit parent context, not thread-local.** A span is started with
25//!   an explicit `Option<SpanHandle>` parent, so callers control propagation
26//!   and the trait stays async-runtime-agnostic. Thread-local propagation,
27//!   if wanted, is a concern for a helper layer above this trait, not the
28//!   trait itself.
29//! - **Infallible recording, fallible flush.** `start_span` / `set_attribute`
30//!   / `set_status` / `add_event` / `end_span` never return errors — a
31//!   tracing failure must never break the caller. Only `flush` (which does
32//!   real I/O) returns `Result`. Implementations that hit transient errors
33//!   record them internally and surface them at flush time or via their
34//!   own metrics.
35//! - **Sync and object-safe.** Matches the rest of the plugin API: all
36//!   methods are synchronous, there are no generic parameters on trait
37//!   methods, and `SpanHandle` is a plain `Copy` value — so `dyn Tracer`
38//!   works across the WASM ABI and the in-process host-function table
39//!   alike.
40
41/// An opaque handle returned by [`TracerPlugin::start_span`]. Passing a
42/// `SpanHandle` back to the same tracer is how the runtime attaches
43/// attributes, events, and status to an active span. Handles are cheap
44/// `Copy` values — they are just the id pair the tracer already emits
45/// on the wire.
46///
47/// A handle is only valid for the tracer that created it. Passing a
48/// handle produced by one tracer to another tracer is a programmer
49/// error; implementations should treat unknown handles as a no-op
50/// rather than panicking.
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
52pub struct SpanHandle {
53    /// Trace id (W3C trace-context `trace-id`, 16 bytes big-endian).
54    pub trace_id: [u8; 16],
55    /// Span id (W3C trace-context `parent-id` / `span-id`, 8 bytes).
56    pub span_id: [u8; 8],
57}
58
59impl SpanHandle {
60    /// Invalid / "not sampled" handle. Tracer implementations return this
61    /// from `start_span` when a sampling decision rejects the span; the
62    /// runtime treats it as a no-op but still threads it through as the
63    /// parent of any child spans so the shape of the caller's code does
64    /// not change between sampled and non-sampled runs.
65    pub const INVALID: SpanHandle = SpanHandle {
66        trace_id: [0u8; 16],
67        span_id: [0u8; 8],
68    };
69
70    /// Returns `true` if the handle is the invalid/no-op sentinel.
71    pub fn is_invalid(&self) -> bool {
72        self.trace_id == [0u8; 16] && self.span_id == [0u8; 8]
73    }
74}
75
76/// OpenTelemetry-aligned span kind. Mirrors `opentelemetry::trace::SpanKind`
77/// without depending on it. Used by backends to colour spans in the UI
78/// and to apply kind-specific semantic conventions.
79#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
80#[serde(rename_all = "snake_case")]
81pub enum SpanKind {
82    /// Default. Internal operation within an application.
83    Internal,
84    /// An incoming request the application is handling.
85    Server,
86    /// An outgoing request to another service.
87    Client,
88    /// Async producer (e.g. enqueue a message).
89    Producer,
90    /// Async consumer (e.g. process a message from a queue).
91    Consumer,
92}
93
94impl Default for SpanKind {
95    fn default() -> Self {
96        SpanKind::Internal
97    }
98}
99
100/// OpenTelemetry-aligned span status. Mirrors `opentelemetry::trace::Status`
101/// without depending on it.
102#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
103#[serde(tag = "code", rename_all = "snake_case")]
104pub enum SpanStatus {
105    /// The span has not yet been given an explicit status.
106    Unset,
107    /// The operation the span represents completed successfully.
108    Ok,
109    /// The operation failed. `description` is a human-readable explanation
110    /// (per OTel, this field is only set when the status is `Error`).
111    Error { description: String },
112}
113
114impl Default for SpanStatus {
115    fn default() -> Self {
116        SpanStatus::Unset
117    }
118}
119
120/// Attribute value for spans and events. Matches the value types allowed
121/// by the OpenTelemetry semantic-convention specification: string, bool,
122/// signed 64-bit int, 64-bit float, and homogeneous arrays of each.
123///
124/// Kept as a plain enum (no `opentelemetry::Value` indirection) so that
125/// non-otel backends can encode it directly without a mapping layer.
126#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
127#[serde(untagged)]
128pub enum AttrValue {
129    String(String),
130    Bool(bool),
131    I64(i64),
132    F64(f64),
133    StringArray(Vec<String>),
134    BoolArray(Vec<bool>),
135    I64Array(Vec<i64>),
136    F64Array(Vec<f64>),
137}
138
139impl From<&str> for AttrValue {
140    fn from(s: &str) -> Self {
141        AttrValue::String(s.to_owned())
142    }
143}
144
145impl From<String> for AttrValue {
146    fn from(s: String) -> Self {
147        AttrValue::String(s)
148    }
149}
150
151impl From<bool> for AttrValue {
152    fn from(b: bool) -> Self {
153        AttrValue::Bool(b)
154    }
155}
156
157impl From<i64> for AttrValue {
158    fn from(i: i64) -> Self {
159        AttrValue::I64(i)
160    }
161}
162
163impl From<u32> for AttrValue {
164    fn from(i: u32) -> Self {
165        AttrValue::I64(i as i64)
166    }
167}
168
169impl From<f64> for AttrValue {
170    fn from(f: f64) -> Self {
171        AttrValue::F64(f)
172    }
173}
174
175/// Arguments for [`TracerPlugin::start_span`]. A struct rather than a long
176/// positional parameter list so the trait stays forward-compatible: new
177/// optional fields can be added without breaking existing implementations
178/// that use `..Default::default()`.
179#[derive(Debug, Clone, Default)]
180pub struct SpanStart<'a> {
181    /// Span name. Should follow OTel semantic conventions (e.g., for a
182    /// server span handling `GET /users/:id`, use `"GET /users/:id"`).
183    pub name: &'a str,
184    /// Span kind. Defaults to [`SpanKind::Internal`].
185    pub kind: SpanKind,
186    /// Parent span handle, or `None` for a root span. Non-`None` parents
187    /// belonging to a different tracer are treated as unknown and the new
188    /// span is rooted.
189    pub parent: Option<SpanHandle>,
190    /// Initial attributes set atomically with span creation. Equivalent
191    /// to calling [`TracerPlugin::set_attribute`] for each pair after
192    /// `start_span`, but gives implementations a chance to include them
193    /// in the initial emission without a buffer flush.
194    pub attributes: Vec<(String, AttrValue)>,
195    /// Start time override, in nanoseconds since the Unix epoch. `None`
196    /// means "use the tracer's current clock" — this is the common case.
197    /// Set this when recording historical or externally-sourced spans.
198    pub start_time_unix_nanos: Option<u64>,
199}
200
201/// A point-in-time event recorded inside a span. Events do not have
202/// durations; they are the OTel equivalent of a structured log line
203/// scoped to a span.
204#[derive(Debug, Clone)]
205pub struct SpanEvent<'a> {
206    /// Event name (e.g. `"exception"`, `"cache.miss"`).
207    pub name: &'a str,
208    /// Event attributes. Semantic-convention keys apply here too; for
209    /// example, `exception.message` / `exception.type` on an `"exception"`
210    /// event.
211    pub attributes: Vec<(String, AttrValue)>,
212    /// Event time override, nanoseconds since the Unix epoch. `None` means
213    /// "use the tracer's current clock".
214    pub time_unix_nanos: Option<u64>,
215}
216
217/// A distributed-tracing exporter plugin.
218///
219/// Bext's observability hooks (and any plugin that wants to emit custom
220/// spans) call into the active `TracerPlugin` through host functions.
221/// There is at most one active `TracerPlugin` per site; multiple backends
222/// are achieved by chaining (a fan-out tracer that forwards to several
223/// children) rather than by running several implementations side-by-side.
224///
225/// Implementations must be cheap on the hot path. `start_span`,
226/// `set_attribute`, `set_status`, `add_event`, and `end_span` run inline
227/// with the operation they describe; they should buffer rather than
228/// perform network I/O. Actual export happens during [`TracerPlugin::flush`],
229/// which the runtime calls on an interval and at shutdown.
230pub trait TracerPlugin: Send + Sync {
231    /// Unique plugin identifier (e.g. `"tracer-otlp"`, `"tracer-stdout"`).
232    fn name(&self) -> &str;
233
234    /// Begin a new span and return a handle the caller can use to record
235    /// further state on it. A handle equal to [`SpanHandle::INVALID`]
236    /// means the span was not sampled; the caller should still thread it
237    /// through as the parent of any children so that sampled/non-sampled
238    /// codepaths stay structurally identical.
239    fn start_span(&self, span: SpanStart<'_>) -> SpanHandle;
240
241    /// Attach or overwrite a single attribute on a live span. Keys should
242    /// follow the OTel semantic conventions (e.g. `"http.request.method"`,
243    /// `"db.system"`). Calls against an unknown or invalid handle are a
244    /// no-op.
245    fn set_attribute(&self, span: SpanHandle, key: &str, value: AttrValue);
246
247    /// Set the span's final status. Most backends treat the last call
248    /// before [`TracerPlugin::end_span`] as authoritative. Calls against an
249    /// unknown or invalid handle are a no-op.
250    fn set_status(&self, span: SpanHandle, status: SpanStatus);
251
252    /// Record a point-in-time event on a live span. Calls against an
253    /// unknown or invalid handle are a no-op.
254    fn add_event(&self, span: SpanHandle, event: SpanEvent<'_>);
255
256    /// End a live span. `end_time_unix_nanos` is an optional override —
257    /// pass `None` to use the tracer's current clock. After this call
258    /// the handle must not be used again; implementations are free to
259    /// recycle its id space.
260    fn end_span(&self, span: SpanHandle, end_time_unix_nanos: Option<u64>);
261
262    /// Flush any buffered spans to the backend. The runtime calls this
263    /// on a periodic timer and once during graceful shutdown. A flush
264    /// that cannot reach its backend should return an error string so
265    /// the host can surface it through the observability layer; buffered
266    /// spans remain the plugin's responsibility (drop, retry, or spill
267    /// to disk, at the plugin's discretion).
268    fn flush(&self) -> Result<(), String>;
269}