Skip to main content

oxios_kernel/
observability.rs

1//! Observability — oxi-sdk 0.26.2 tracing, cost tracking, and audit.
2//!
3//! Provides global instances of oxi-sdk's `Tracer`, `CostTracker`, and `AuditLog`
4//! for use across the kernel. These complement the existing `metrics` module
5//! (Prometheus counters/gauges) with distributed tracing, per-agent cost
6//! accounting, and structured audit logging.
7//!
8//! # Architecture
9//!
10//! ```text
11//! Global instances (OnceLock):
12//!   tracer()     → Tracer      (distributed spans: AgentSpan, ToolSpan, etc.)
13//!   cost_tracker() → CostTracker (per-agent token/cost tracking)
14//!   audit_log()  → AuditLog    (structured security audit entries)
15//! ```
16//!
17//! # Usage
18//!
19//! ```no_run
20//! use oxios_kernel::observability;
21//!
22//! // Start a span for an agent execution
23//! let _span = observability::tracer().start("seed-execution", observability::SpanKind::Agent);
24//!
25//! // Log audit entry
26//! observability::audit_log()
27//!     .log(observability::AuditEntry::tool_execution(
28//!         "agent-1".into(),
29//!         "exec".into(),
30//!         "ls -la".into(),
31//!         true,
32//!         42,
33//!     ));
34//! ```
35
36use oxi_sdk::ModelRegistry;
37// Re-exports grouped by concern. All names are part of the kernel's public
38// surface (re-exported via `lib.rs`) — do not remove or rename without
39// auditing downstream consumers.
40//
41// `audit_trail::*` types (AuditTrail, AuditAction, HashDigest, ...) are
42// intentionally NOT re-exported here: they live in the dormant `audit_trail`
43// module of oxi-sdk and will be activated in Phase F (RFC-014).
44pub use oxi_sdk::{
45    // ── Audit (in-memory) ──────────────────────────────────────────────
46    // Simple structured audit log. Replaced by `audit_trail` (blake3 chain)
47    // in Phase F.
48    AuditEntry,
49    AuditFilter,
50    AuditLog,
51    // ── Cost ───────────────────────────────────────────────────────────
52    // Per-agent token usage and cost accounting.
53    CostBreakdown,
54    CostSnapshot,
55    CostTracker,
56    CostTrackerConfig,
57    GlobalCostSnapshot,
58    // ── Tracing ────────────────────────────────────────────────────────
59    // Distributed spans for agent/tool/kernel operations.
60    Span,
61    SpanContext,
62    SpanGuard,
63    SpanId,
64    SpanKind,
65    SpanStatus,
66    TokenUsage,
67    TraceId,
68    Tracer,
69};
70use std::sync::Arc;
71
72/// Global Tracer instance.
73static TRACER: std::sync::OnceLock<Tracer> = std::sync::OnceLock::new();
74
75/// Global CostTracker instance.
76static COST_TRACKER: std::sync::OnceLock<CostTracker> = std::sync::OnceLock::new();
77
78/// Global AuditLog instance.
79static AUDIT_LOG: std::sync::OnceLock<AuditLog> = std::sync::OnceLock::new();
80
81/// Get the global Tracer.
82///
83/// The tracer is lazily initialized on first access.
84/// Used for distributed tracing of agent executions, tool calls, and kernel operations.
85pub fn tracer() -> &'static Tracer {
86    TRACER.get_or_init(Tracer::new)
87}
88
89/// Get the global CostTracker.
90///
91/// The cost tracker uses a minimal ModelRegistry for token cost estimation.
92/// Record per-agent token usage after each LLM call.
93pub fn cost_tracker() -> &'static CostTracker {
94    COST_TRACKER.get_or_init(|| {
95        let registry = Arc::new(ModelRegistry::from_static());
96        CostTracker::new(registry, CostTrackerConfig::default())
97    })
98}
99
100/// Get the global AuditLog.
101///
102/// The audit log stores structured security events (tool calls, access decisions,
103/// lifecycle events). Entries can be queried by agent, action type, or time range.
104pub fn audit_log() -> &'static AuditLog {
105    AUDIT_LOG.get_or_init(|| AuditLog::new(1024))
106}
107
108/// Initialize all observability instances.
109///
110/// Call during kernel startup to ensure all instances are warm.
111/// Non-blocking — just triggers lazy initialization.
112pub fn init() {
113    let _ = tracer();
114    let _ = cost_tracker();
115    let _ = audit_log();
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121
122    #[test]
123    fn test_tracer_smoke() {
124        let t = tracer();
125        let _guard = t.start("test-span", SpanKind::Agent);
126        // Span is active while guard is in scope
127        drop(_guard);
128    }
129
130    #[test]
131    fn test_cost_tracker_smoke() {
132        let ct = cost_tracker();
133        let model = oxi_sdk::Model::new(
134            "test/model",
135            "Test",
136            oxi_sdk::Api::OpenAiCompletions,
137            "test",
138            "https://test.com",
139        );
140        ct.record(
141            "test-agent",
142            &model,
143            TokenUsage {
144                input: 100,
145                output: 50,
146                cache_read: 0,
147                cache_write: 0,
148            },
149        );
150        let snap = ct.snapshot("test-agent");
151        assert!(snap.is_some());
152    }
153
154    #[test]
155    fn test_audit_log_smoke() {
156        let al = audit_log();
157        al.log(AuditEntry::lifecycle("test-agent".into(), "started".into()));
158        let entries = al.query(AuditFilter {
159            agent_id: Some("test-agent".to_string()),
160            entry_type: None,
161            after_ms: None,
162        });
163        assert!(!entries.is_empty());
164    }
165
166    #[test]
167    fn test_init_idempotent() {
168        init();
169        init(); // Should not panic
170    }
171}