bext_plugin_api/tracer.rs
1//! Tracer capability trait for distributed-tracing exporters.
2//!
3//! A `Tracer` plugin turns span events produced by the bext runtime (and
4//! other plugins) into whatever wire format its backend expects — OTLP,
5//! Datadog, Honeycomb, line-oriented stdout for development, and so on.
6//! See `plan/ecosystem/02-capabilities.md` for the design rationale and
7//! the list of reference implementations landing in E1.
8//!
9//! # Design rules
10//!
11//! - **No `opentelemetry` crate types in the trait.** Trace IDs, span IDs,
12//! attribute values, kinds, and status codes are all plain data. A non-otel
13//! implementation (like `@bext/tracer-stdout`) must be able to satisfy
14//! the trait without pulling in `opentelemetry` as a transitive dep. An
15//! OTLP-based implementation maps these types 1:1 into the otel SDK.
16//! - **Aligned with OpenTelemetry semantic conventions.** IDs are the same
17//! widths as the OTel wire format (16 bytes trace id, 8 bytes span id),
18//! status codes match OTel's `Unset`/`Ok`/`Error`, span kinds match
19//! OTel's `Internal`/`Server`/`Client`/`Producer`/`Consumer`, and
20//! attribute value types cover the OTel semantic-convention allowed set
21//! (string / bool / int / float / arrays). Plugin authors are expected
22//! to use the OTel semconv attribute keys (e.g. `http.request.method`,
23//! `http.response.status_code`); the trait does not enforce this.
24//! - **Explicit parent context, not thread-local.** A span is started with
25//! an explicit `Option<SpanHandle>` parent, so callers control propagation
26//! and the trait stays async-runtime-agnostic. Thread-local propagation,
27//! if wanted, is a concern for a helper layer above this trait, not the
28//! trait itself.
29//! - **Infallible recording, fallible flush.** `start_span` / `set_attribute`
30//! / `set_status` / `add_event` / `end_span` never return errors — a
31//! tracing failure must never break the caller. Only `flush` (which does
32//! real I/O) returns `Result`. Implementations that hit transient errors
33//! record them internally and surface them at flush time or via their
34//! own metrics.
35//! - **Sync and object-safe.** Matches the rest of the plugin API: all
36//! methods are synchronous, there are no generic parameters on trait
37//! methods, and `SpanHandle` is a plain `Copy` value — so `dyn Tracer`
38//! works across the WASM ABI and the in-process host-function table
39//! alike.
40
41/// An opaque handle returned by [`TracerPlugin::start_span`]. Passing a
42/// `SpanHandle` back to the same tracer is how the runtime attaches
43/// attributes, events, and status to an active span. Handles are cheap
44/// `Copy` values — they are just the id pair the tracer already emits
45/// on the wire.
46///
47/// A handle is only valid for the tracer that created it. Passing a
48/// handle produced by one tracer to another tracer is a programmer
49/// error; implementations should treat unknown handles as a no-op
50/// rather than panicking.
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
52pub struct SpanHandle {
53 /// Trace id (W3C trace-context `trace-id`, 16 bytes big-endian).
54 pub trace_id: [u8; 16],
55 /// Span id (W3C trace-context `parent-id` / `span-id`, 8 bytes).
56 pub span_id: [u8; 8],
57}
58
59impl SpanHandle {
60 /// Invalid / "not sampled" handle. Tracer implementations return this
61 /// from `start_span` when a sampling decision rejects the span; the
62 /// runtime treats it as a no-op but still threads it through as the
63 /// parent of any child spans so the shape of the caller's code does
64 /// not change between sampled and non-sampled runs.
65 pub const INVALID: SpanHandle = SpanHandle {
66 trace_id: [0u8; 16],
67 span_id: [0u8; 8],
68 };
69
70 /// Returns `true` if the handle is the invalid/no-op sentinel.
71 pub fn is_invalid(&self) -> bool {
72 self.trace_id == [0u8; 16] && self.span_id == [0u8; 8]
73 }
74}
75
76/// OpenTelemetry-aligned span kind. Mirrors `opentelemetry::trace::SpanKind`
77/// without depending on it. Used by backends to colour spans in the UI
78/// and to apply kind-specific semantic conventions.
79#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
80#[serde(rename_all = "snake_case")]
81pub enum SpanKind {
82 /// Default. Internal operation within an application.
83 Internal,
84 /// An incoming request the application is handling.
85 Server,
86 /// An outgoing request to another service.
87 Client,
88 /// Async producer (e.g. enqueue a message).
89 Producer,
90 /// Async consumer (e.g. process a message from a queue).
91 Consumer,
92}
93
94impl Default for SpanKind {
95 fn default() -> Self {
96 SpanKind::Internal
97 }
98}
99
100/// OpenTelemetry-aligned span status. Mirrors `opentelemetry::trace::Status`
101/// without depending on it.
102#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
103#[serde(tag = "code", rename_all = "snake_case")]
104pub enum SpanStatus {
105 /// The span has not yet been given an explicit status.
106 Unset,
107 /// The operation the span represents completed successfully.
108 Ok,
109 /// The operation failed. `description` is a human-readable explanation
110 /// (per OTel, this field is only set when the status is `Error`).
111 Error { description: String },
112}
113
114impl Default for SpanStatus {
115 fn default() -> Self {
116 SpanStatus::Unset
117 }
118}
119
120/// Attribute value for spans and events. Matches the value types allowed
121/// by the OpenTelemetry semantic-convention specification: string, bool,
122/// signed 64-bit int, 64-bit float, and homogeneous arrays of each.
123///
124/// Kept as a plain enum (no `opentelemetry::Value` indirection) so that
125/// non-otel backends can encode it directly without a mapping layer.
126#[derive(Debug, Clone, PartialEq, serde::Serialize, serde::Deserialize)]
127#[serde(untagged)]
128pub enum AttrValue {
129 String(String),
130 Bool(bool),
131 I64(i64),
132 F64(f64),
133 StringArray(Vec<String>),
134 BoolArray(Vec<bool>),
135 I64Array(Vec<i64>),
136 F64Array(Vec<f64>),
137}
138
139impl From<&str> for AttrValue {
140 fn from(s: &str) -> Self {
141 AttrValue::String(s.to_owned())
142 }
143}
144
145impl From<String> for AttrValue {
146 fn from(s: String) -> Self {
147 AttrValue::String(s)
148 }
149}
150
151impl From<bool> for AttrValue {
152 fn from(b: bool) -> Self {
153 AttrValue::Bool(b)
154 }
155}
156
157impl From<i64> for AttrValue {
158 fn from(i: i64) -> Self {
159 AttrValue::I64(i)
160 }
161}
162
163impl From<u32> for AttrValue {
164 fn from(i: u32) -> Self {
165 AttrValue::I64(i as i64)
166 }
167}
168
169impl From<f64> for AttrValue {
170 fn from(f: f64) -> Self {
171 AttrValue::F64(f)
172 }
173}
174
175/// Arguments for [`TracerPlugin::start_span`]. A struct rather than a long
176/// positional parameter list so the trait stays forward-compatible: new
177/// optional fields can be added without breaking existing implementations
178/// that use `..Default::default()`.
179#[derive(Debug, Clone, Default)]
180pub struct SpanStart<'a> {
181 /// Span name. Should follow OTel semantic conventions (e.g., for a
182 /// server span handling `GET /users/:id`, use `"GET /users/:id"`).
183 pub name: &'a str,
184 /// Span kind. Defaults to [`SpanKind::Internal`].
185 pub kind: SpanKind,
186 /// Parent span handle, or `None` for a root span. Non-`None` parents
187 /// belonging to a different tracer are treated as unknown and the new
188 /// span is rooted.
189 pub parent: Option<SpanHandle>,
190 /// Initial attributes set atomically with span creation. Equivalent
191 /// to calling [`TracerPlugin::set_attribute`] for each pair after
192 /// `start_span`, but gives implementations a chance to include them
193 /// in the initial emission without a buffer flush.
194 pub attributes: Vec<(String, AttrValue)>,
195 /// Start time override, in nanoseconds since the Unix epoch. `None`
196 /// means "use the tracer's current clock" — this is the common case.
197 /// Set this when recording historical or externally-sourced spans.
198 pub start_time_unix_nanos: Option<u64>,
199}
200
201/// A point-in-time event recorded inside a span. Events do not have
202/// durations; they are the OTel equivalent of a structured log line
203/// scoped to a span.
204#[derive(Debug, Clone)]
205pub struct SpanEvent<'a> {
206 /// Event name (e.g. `"exception"`, `"cache.miss"`).
207 pub name: &'a str,
208 /// Event attributes. Semantic-convention keys apply here too; for
209 /// example, `exception.message` / `exception.type` on an `"exception"`
210 /// event.
211 pub attributes: Vec<(String, AttrValue)>,
212 /// Event time override, nanoseconds since the Unix epoch. `None` means
213 /// "use the tracer's current clock".
214 pub time_unix_nanos: Option<u64>,
215}
216
217/// A distributed-tracing exporter plugin.
218///
219/// Bext's observability hooks (and any plugin that wants to emit custom
220/// spans) call into the active `TracerPlugin` through host functions.
221/// There is at most one active `TracerPlugin` per site; multiple backends
222/// are achieved by chaining (a fan-out tracer that forwards to several
223/// children) rather than by running several implementations side-by-side.
224///
225/// Implementations must be cheap on the hot path. `start_span`,
226/// `set_attribute`, `set_status`, `add_event`, and `end_span` run inline
227/// with the operation they describe; they should buffer rather than
228/// perform network I/O. Actual export happens during [`TracerPlugin::flush`],
229/// which the runtime calls on an interval and at shutdown.
230pub trait TracerPlugin: Send + Sync {
231 /// Unique plugin identifier (e.g. `"tracer-otlp"`, `"tracer-stdout"`).
232 fn name(&self) -> &str;
233
234 /// Begin a new span and return a handle the caller can use to record
235 /// further state on it. A handle equal to [`SpanHandle::INVALID`]
236 /// means the span was not sampled; the caller should still thread it
237 /// through as the parent of any children so that sampled/non-sampled
238 /// codepaths stay structurally identical.
239 fn start_span(&self, span: SpanStart<'_>) -> SpanHandle;
240
241 /// Attach or overwrite a single attribute on a live span. Keys should
242 /// follow the OTel semantic conventions (e.g. `"http.request.method"`,
243 /// `"db.system"`). Calls against an unknown or invalid handle are a
244 /// no-op.
245 fn set_attribute(&self, span: SpanHandle, key: &str, value: AttrValue);
246
247 /// Set the span's final status. Most backends treat the last call
248 /// before [`TracerPlugin::end_span`] as authoritative. Calls against an
249 /// unknown or invalid handle are a no-op.
250 fn set_status(&self, span: SpanHandle, status: SpanStatus);
251
252 /// Record a point-in-time event on a live span. Calls against an
253 /// unknown or invalid handle are a no-op.
254 fn add_event(&self, span: SpanHandle, event: SpanEvent<'_>);
255
256 /// End a live span. `end_time_unix_nanos` is an optional override —
257 /// pass `None` to use the tracer's current clock. After this call
258 /// the handle must not be used again; implementations are free to
259 /// recycle its id space.
260 fn end_span(&self, span: SpanHandle, end_time_unix_nanos: Option<u64>);
261
262 /// Flush any buffered spans to the backend. The runtime calls this
263 /// on a periodic timer and once during graceful shutdown. A flush
264 /// that cannot reach its backend should return an error string so
265 /// the host can surface it through the observability layer; buffered
266 /// spans remain the plugin's responsibility (drop, retry, or spill
267 /// to disk, at the plugin's discretion).
268 fn flush(&self) -> Result<(), String>;
269}