Skip to main content

trace_weft_core/
lib.rs

1use serde::{Deserialize, Serialize};
2use std::collections::HashMap;
3
4pub mod redactor;
5
6/// Shared OpenTelemetry GenAI semantic-convention attribute keys.
7///
8/// Defined once so the exporter (`trace-weft-otel`) and the OTLP ingest adapter
9/// (`trace-weft-ingest`) agree on attribute names instead of each repeating the
10/// string literals.
11pub mod semconv {
12    pub const GEN_AI_PROVIDER_NAME: &str = "gen_ai.provider.name";
13    pub const GEN_AI_REQUEST_MODEL: &str = "gen_ai.request.model";
14    pub const GEN_AI_TOOL_NAME: &str = "gen_ai.tool.name";
15    pub const GEN_AI_USAGE_INPUT_TOKENS: &str = "gen_ai.usage.input_tokens";
16    pub const GEN_AI_USAGE_OUTPUT_TOKENS: &str = "gen_ai.usage.output_tokens";
17    pub const GEN_AI_USAGE_REASONING_TOKENS: &str = "gen_ai.usage.reasoning_tokens";
18
19    /// TraceWeft span kind, serialized as the Rust variant name (e.g. `LlmCall`).
20    pub const TRACE_WEFT_SPAN_KIND: &str = "trace_weft.span.kind";
21}
22
23#[cfg(any(test, feature = "test-util"))]
24pub mod test_util;
25
26// --- IDs ---
27
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
29#[serde(transparent)]
30pub struct TraceId(pub uuid::Uuid);
31
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
33#[serde(transparent)]
34pub struct SpanId(pub uuid::Uuid);
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
37#[serde(transparent)]
38pub struct RunId(pub uuid::Uuid);
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
41#[serde(transparent)]
42pub struct SessionId(pub uuid::Uuid);
43
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
45#[serde(transparent)]
46pub struct EventId(pub uuid::Uuid);
47
48#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
49#[serde(transparent)]
50pub struct BlobHash(pub String);
51
52/// Generate `new()` (fresh UUIDv7, time-ordered) and `Default` for a UUID
53/// newtype, so integrators don't have to depend on `uuid` directly.
54macro_rules! uuid_id {
55    ($($t:ident),+ $(,)?) => {
56        $(
57            impl $t {
58                /// Create a fresh, time-ordered identifier.
59                pub fn new() -> Self {
60                    Self(uuid::Uuid::now_v7())
61                }
62            }
63            impl Default for $t {
64                fn default() -> Self {
65                    Self::new()
66                }
67            }
68        )+
69    };
70}
71
72uuid_id!(TraceId, SpanId, RunId, SessionId, EventId);
73
74// --- Enums ---
75
76#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
77#[serde(rename_all = "snake_case")]
78pub enum TraceWeftSpanKind {
79    Workflow,
80    Agent,
81    LlmCall,
82    Embedding,
83    Retrieval,
84    Rerank,
85    Tool,
86    Memory,
87    State,
88    Planner,
89    Router,
90    Guardrail,
91    Evaluator,
92    Handoff,
93    Checkpoint,
94    Replay,
95    Error,
96}
97
98#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
99#[serde(rename_all = "snake_case")]
100pub enum SpanStatus {
101    Ok,
102    Error,
103    InProgress,
104    Skipped,
105    Cancelled,
106    PendingApproval,
107}
108
109#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
110#[serde(rename_all = "snake_case")]
111pub enum CapturePolicy {
112    MetadataOnly,
113    RedactedPreview,
114    FullContentLocalOnly,
115    FullContentExportable,
116}
117
118#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
119#[serde(rename_all = "snake_case")]
120pub enum RedactionStatus {
121    Unredacted,
122    Redacted,
123    RedactionFailed,
124}
125
126#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
127#[serde(rename_all = "snake_case")]
128pub enum ReplayMode {
129    Cached,
130    Reexecute,
131    Mocked,
132    Skipped,
133    BlockedSideEffect,
134}
135
136#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
137#[serde(rename_all = "snake_case")]
138pub enum SideEffectPolicy {
139    None,
140    ReadOnly,
141    IdempotentWrite,
142    ExternalWrite,
143    PaymentOrSensitiveAction,
144    Unknown,
145}
146
147/// Kind of an intra-span event — a point-in-time occurrence within a span's
148/// lifetime (a retry, a budget check, a guardrail trip, an REPL step), as
149/// opposed to a span, which has a duration.
150#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
151#[serde(rename_all = "snake_case")]
152pub enum EventKind {
153    LlmCall,
154    ToolCall,
155    ReplExec,
156    Rpc,
157    Budget,
158    Guardrail,
159    Retry,
160    Termination,
161    Log,
162    Custom,
163}
164
165// --- Structs ---
166
167#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
168pub struct TokenUsage {
169    pub input: u64,
170    pub output: u64,
171    pub reasoning: Option<u64>,
172    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
173    pub breakdown: HashMap<String, u64>,
174}
175
176#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
177pub struct CostEstimate {
178    pub currency: String,
179    pub amount: f64,
180}
181
182#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
183pub struct BlobRef {
184    pub hash: BlobHash,
185    pub content_type: String,
186    pub size_bytes: u64,
187    pub created_at_timestamp: u64, // Unix timestamp in ms
188    pub redaction_status: RedactionStatus,
189    pub encryption_status: String,
190    pub storage_backend: String,
191    pub preview_text_redacted: Option<String>,
192}
193
194#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
195pub struct SpanRecord {
196    pub trace_id: TraceId,
197    pub span_id: SpanId,
198    pub parent_span_id: Option<SpanId>,
199    pub run_id: RunId,
200    pub session_id: Option<SessionId>,
201    pub user_id_hash: Option<String>,
202    /// Tenant the span belongs to. Set server-side from the authenticated API
203    /// key at ingest (clients cannot assert it); `None` for local-first
204    /// single-tenant recording. Used to scope trace queries per project.
205    #[serde(default, skip_serializing_if = "Option::is_none")]
206    pub project_id: Option<String>,
207    pub span_kind: TraceWeftSpanKind,
208    pub name: String,
209    pub start_time: u64,       // ms timestamp
210    pub end_time: Option<u64>, // ms timestamp
211    pub status: SpanStatus,
212    pub status_message: Option<String>,
213    pub error_type: Option<String>,
214    pub error_message_redacted: Option<String>,
215
216    // Core Attributes
217    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
218    pub attributes: HashMap<String, serde_json::Value>,
219    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
220    pub otel_attributes: HashMap<String, serde_json::Value>,
221    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
222    pub openinference_attributes: HashMap<String, serde_json::Value>,
223
224    pub memory_state: Option<serde_json::Value>,
225
226    // LLM / Tool specific
227    pub input_ref: Option<BlobRef>,
228    pub output_ref: Option<BlobRef>,
229    pub prompt_template_id: Option<String>,
230    pub prompt_version: Option<String>,
231    pub model_provider: Option<String>,
232    pub model_name: Option<String>,
233    pub tool_name: Option<String>,
234    pub tool_schema_hash: Option<String>,
235
236    // Retrieval specific
237    pub retrieval_query_hash: Option<String>,
238    #[serde(default, skip_serializing_if = "Vec::is_empty")]
239    pub retrieved_document_refs: Vec<BlobRef>,
240
241    // Usage/Performance
242    pub token_usage: Option<TokenUsage>,
243    pub cost_estimate: Option<CostEstimate>,
244    pub latency_ms: Option<u64>,
245    pub retry_count: Option<u32>,
246    pub cache_hit: Option<bool>,
247
248    // Metadata
249    pub redaction_policy: CapturePolicy,
250    pub schema_version: String,
251}
252
253#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
254pub struct TraceRecord {
255    pub trace_id: TraceId,
256    pub run_id: RunId,
257    #[serde(default, skip_serializing_if = "Vec::is_empty")]
258    pub spans: Vec<SpanRecord>,
259}
260
261/// A point-in-time event recorded within a span. Events carry their parent
262/// span and an ordering `seq` so an event stream (retries, budget checks,
263/// guardrail trips, REPL steps) survives without collapsing into many tiny
264/// spans.
265#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
266pub struct EventRecord {
267    pub event_id: EventId,
268    pub trace_id: TraceId,
269    pub run_id: RunId,
270    pub parent_span_id: Option<SpanId>,
271    /// Monotonic ordering hint within the process/trace.
272    pub seq: u64,
273    pub event_kind: EventKind,
274    pub name: String,
275    pub timestamp: u64, // ms
276    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
277    pub attributes: HashMap<String, serde_json::Value>,
278    pub schema_version: String,
279}
280
281#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
282pub struct CheckpointRecord {
283    pub id: uuid::Uuid,
284    pub trace_id: TraceId,
285    pub span_id: SpanId,
286    pub sequence: u64,
287    pub state_hash: String,
288    pub input_hash: BlobHash,
289    pub output_hash: BlobHash,
290    pub side_effect_policy: SideEffectPolicy,
291    pub replay_mode: ReplayMode,
292    pub created_at: u64,
293}
294
295// --- Traits ---
296
297pub struct RedactionResult {
298    pub redacted_text: String,
299    pub status: RedactionStatus,
300}
301
302pub trait Redactor: Send + Sync {
303    fn redact(&self, input: &str) -> RedactionResult;
304}
305
306#[async_trait::async_trait]
307pub trait BlobStore: Send + Sync {
308    async fn put_blob(
309        &self,
310        hash: &BlobHash,
311        content_type: &str,
312        content: &[u8],
313    ) -> anyhow::Result<()>;
314    async fn get_blob(&self, hash: &BlobHash) -> anyhow::Result<Option<Vec<u8>>>;
315}
316
317#[cfg(test)]
318mod tests {
319    use super::test_util::{
320        sample_blob_ref, sample_checkpoint, sample_event, sample_event_minimal, sample_span_full,
321        sample_span_minimal,
322    };
323    use super::*;
324    use serde_json::json;
325
326    fn roundtrip<T>(value: &T) -> T
327    where
328        T: serde::Serialize + serde::de::DeserializeOwned,
329    {
330        let json = serde_json::to_string(value).unwrap();
331        serde_json::from_str(&json).unwrap()
332    }
333
334    #[test]
335    fn span_record_full_roundtrip() {
336        let span = sample_span_full();
337        assert_eq!(roundtrip(&span), span);
338    }
339
340    #[test]
341    fn span_record_minimal_roundtrip() {
342        let span = sample_span_minimal();
343        assert_eq!(roundtrip(&span), span);
344    }
345
346    #[test]
347    fn trace_record_roundtrip() {
348        let trace = TraceRecord {
349            trace_id: sample_span_full().trace_id,
350            run_id: sample_span_full().run_id,
351            spans: vec![sample_span_full(), sample_span_minimal()],
352        };
353        assert_eq!(roundtrip(&trace), trace);
354    }
355
356    #[test]
357    fn checkpoint_record_roundtrip() {
358        let checkpoint = sample_checkpoint();
359        assert_eq!(roundtrip(&checkpoint), checkpoint);
360    }
361
362    #[test]
363    fn event_record_roundtrip() {
364        let event = sample_event();
365        assert_eq!(roundtrip(&event), event);
366    }
367
368    #[test]
369    fn event_record_minimal_roundtrip() {
370        let event = sample_event_minimal();
371        assert_eq!(roundtrip(&event), event);
372        // Empty attributes are omitted from the wire format.
373        let value = serde_json::to_value(&event).unwrap();
374        assert!(!value.as_object().unwrap().contains_key("attributes"));
375    }
376
377    #[test]
378    fn event_kind_uses_snake_case_wire_format() {
379        assert_eq!(
380            serde_json::to_value(EventKind::LlmCall).unwrap(),
381            json!("llm_call")
382        );
383        assert_eq!(
384            serde_json::to_value(EventKind::ReplExec).unwrap(),
385            json!("repl_exec")
386        );
387    }
388
389    #[test]
390    fn blob_ref_roundtrip() {
391        let blob = sample_blob_ref(7);
392        assert_eq!(roundtrip(&blob), blob);
393    }
394
395    #[test]
396    fn ids_serialize_as_plain_uuid_strings() {
397        let id = TraceId(uuid::Uuid::from_u128(1));
398        assert_eq!(
399            serde_json::to_value(id).unwrap(),
400            json!("00000000-0000-0000-0000-000000000001")
401        );
402    }
403
404    #[test]
405    fn id_constructors_make_distinct_v7_uuids() {
406        let a = TraceId::new();
407        let b = TraceId::new();
408        assert_ne!(a, b);
409        assert_eq!(a.0.get_version_num(), 7);
410        // Default delegates to new().
411        assert_ne!(SpanId::default(), SpanId::default());
412    }
413
414    #[test]
415    fn blob_hash_serializes_as_plain_string() {
416        let hash = BlobHash("sha256:abc".into());
417        assert_eq!(serde_json::to_value(&hash).unwrap(), json!("sha256:abc"));
418    }
419
420    #[test]
421    fn enums_use_snake_case_wire_format() {
422        assert_eq!(
423            serde_json::to_value(TraceWeftSpanKind::LlmCall).unwrap(),
424            json!("llm_call")
425        );
426        assert_eq!(
427            serde_json::to_value(SpanStatus::PendingApproval).unwrap(),
428            json!("pending_approval")
429        );
430        assert_eq!(
431            serde_json::to_value(CapturePolicy::FullContentLocalOnly).unwrap(),
432            json!("full_content_local_only")
433        );
434        assert_eq!(
435            serde_json::to_value(RedactionStatus::RedactionFailed).unwrap(),
436            json!("redaction_failed")
437        );
438        assert_eq!(
439            serde_json::to_value(ReplayMode::BlockedSideEffect).unwrap(),
440            json!("blocked_side_effect")
441        );
442        assert_eq!(
443            serde_json::to_value(SideEffectPolicy::PaymentOrSensitiveAction).unwrap(),
444            json!("payment_or_sensitive_action")
445        );
446    }
447
448    #[test]
449    fn all_span_kinds_roundtrip() {
450        use TraceWeftSpanKind::*;
451        for kind in [
452            Workflow, Agent, LlmCall, Embedding, Retrieval, Rerank, Tool, Memory, State, Planner,
453            Router, Guardrail, Evaluator, Handoff, Checkpoint, Replay, Error,
454        ] {
455            assert_eq!(roundtrip(&kind), kind);
456        }
457    }
458
459    #[test]
460    fn empty_collections_are_omitted_from_json() {
461        let value = serde_json::to_value(sample_span_minimal()).unwrap();
462        let object = value.as_object().unwrap();
463        for key in [
464            "attributes",
465            "otel_attributes",
466            "openinference_attributes",
467            "retrieved_document_refs",
468        ] {
469            assert!(
470                !object.contains_key(key),
471                "{key} should be omitted when empty"
472            );
473        }
474    }
475
476    #[test]
477    fn token_usage_empty_breakdown_is_omitted() {
478        let usage = TokenUsage {
479            input: 1,
480            output: 2,
481            reasoning: None,
482            breakdown: HashMap::new(),
483        };
484        let value = serde_json::to_value(&usage).unwrap();
485        assert!(!value.as_object().unwrap().contains_key("breakdown"));
486        assert_eq!(roundtrip(&usage), usage);
487    }
488
489    #[test]
490    fn span_record_deserializes_from_minimal_wire_payload() {
491        // A producer that omits every optional field must still parse.
492        let payload = json!({
493            "trace_id": "00000000-0000-0000-0000-000000000001",
494            "span_id": "00000000-0000-0000-0000-000000000002",
495            "run_id": "00000000-0000-0000-0000-000000000004",
496            "span_kind": "tool",
497            "name": "kb_search",
498            "start_time": 1_715_000_000_000u64,
499            "status": "in_progress",
500            "redaction_policy": "metadata_only",
501            "schema_version": "1.0"
502        });
503        let parsed: SpanRecord = serde_json::from_value(payload).unwrap();
504        assert_eq!(parsed.span_kind, TraceWeftSpanKind::Tool);
505        assert_eq!(parsed.status, SpanStatus::InProgress);
506        assert!(parsed.parent_span_id.is_none());
507        assert!(parsed.attributes.is_empty());
508        assert!(parsed.retrieved_document_refs.is_empty());
509        assert!(parsed.token_usage.is_none());
510    }
511}