use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::LazyLock;
use tokio::sync::RwLock;
#[derive(Debug, Clone, Copy)]
pub enum TokenFormat {
OpenAi,
Anthropic,
Google,
Cohere,
}
#[derive(Debug, Clone)]
pub struct LlmProvider {
pub name: &'static str,
pub token_format: TokenFormat,
}
pub struct LlmDomainRegistry {
domains: HashMap<String, LlmProvider>,
}
impl LlmDomainRegistry {
pub fn default_registry() -> Self {
let mut domains = HashMap::new();
let providers: &[(&str, &str, TokenFormat)] = &[
("api.openai.com", "openai", TokenFormat::OpenAi),
("api.anthropic.com", "anthropic", TokenFormat::Anthropic),
(
"generativelanguage.googleapis.com",
"google",
TokenFormat::Google,
),
("api.deepseek.com", "deepseek", TokenFormat::OpenAi),
("api.groq.com", "groq", TokenFormat::OpenAi),
("api.mistral.ai", "mistral", TokenFormat::OpenAi),
("api.cohere.com", "cohere", TokenFormat::Cohere),
("api.together.xyz", "together", TokenFormat::OpenAi),
("api.fireworks.ai", "fireworks", TokenFormat::OpenAi),
];
for &(domain, name, format) in providers {
domains.insert(
domain.to_string(),
LlmProvider {
name,
token_format: format,
},
);
}
Self { domains }
}
pub fn empty() -> Self {
Self {
domains: HashMap::new(),
}
}
pub fn with_custom_domains(mut self, extras: &[String]) -> Self {
for domain in extras {
self.domains.insert(
domain.clone(),
LlmProvider {
name: "custom",
token_format: TokenFormat::OpenAi,
},
);
}
self
}
pub fn lookup(&self, host: &str) -> Option<&LlmProvider> {
let host_only = host.split(':').next().unwrap_or(host);
self.domains.get(host_only)
}
#[allow(dead_code)]
pub fn is_empty(&self) -> bool {
self.domains.is_empty()
}
}
#[derive(Debug, Clone, Serialize)]
pub struct LlmEvent {
pub timestamp: String,
pub sandbox: String,
pub provider: String,
pub host: String,
pub method: String,
pub path: String,
pub model: Option<String>,
pub status: Option<u16>,
pub latency_ms: Option<u64>,
pub input_tokens: Option<u64>,
pub output_tokens: Option<u64>,
pub total_tokens: Option<u64>,
pub streaming: bool,
pub secret_injected: bool,
pub key_source: String,
}
pub fn extract_model_from_request(body: &[u8]) -> Option<String> {
#[derive(Deserialize)]
struct Partial {
model: Option<String>,
}
let cap = body.len().min(8192);
serde_json::from_slice::<Partial>(&body[..cap]).ok()?.model
}
pub fn extract_streaming_from_request(body: &[u8]) -> bool {
#[derive(Deserialize)]
struct Partial {
stream: Option<bool>,
}
let cap = body.len().min(8192);
serde_json::from_slice::<Partial>(&body[..cap])
.ok()
.and_then(|p| p.stream)
.unwrap_or(false)
}
#[derive(Debug, Clone, Default, Serialize)]
pub struct TokenUsage {
pub input_tokens: Option<u64>,
pub output_tokens: Option<u64>,
pub total_tokens: Option<u64>,
}
pub fn extract_token_usage(body: &[u8], format: &TokenFormat) -> TokenUsage {
let val: serde_json::Value = match serde_json::from_slice(body) {
Ok(v) => v,
Err(_) => return TokenUsage::default(),
};
match format {
TokenFormat::OpenAi => {
let usage = &val["usage"];
TokenUsage {
input_tokens: usage["prompt_tokens"].as_u64(),
output_tokens: usage["completion_tokens"].as_u64(),
total_tokens: usage["total_tokens"].as_u64(),
}
}
TokenFormat::Anthropic => {
let usage = &val["usage"];
let input = usage["input_tokens"].as_u64();
let output = usage["output_tokens"].as_u64();
TokenUsage {
input_tokens: input,
output_tokens: output,
total_tokens: match (input, output) {
(Some(i), Some(o)) => Some(i + o),
_ => None,
},
}
}
TokenFormat::Google => {
let meta = &val["usageMetadata"];
TokenUsage {
input_tokens: meta["promptTokenCount"].as_u64(),
output_tokens: meta["candidatesTokenCount"].as_u64(),
total_tokens: meta["totalTokenCount"].as_u64(),
}
}
TokenFormat::Cohere => {
let tokens = &val["meta"]["tokens"];
let input = tokens["input_tokens"].as_u64();
let output = tokens["output_tokens"].as_u64();
TokenUsage {
input_tokens: input,
output_tokens: output,
total_tokens: match (input, output) {
(Some(i), Some(o)) => Some(i + o),
_ => None,
},
}
}
}
}
pub static LLM_USAGE: LazyLock<RwLock<LlmUsageStore>> =
LazyLock::new(|| RwLock::new(LlmUsageStore::default()));
#[derive(Debug, Default, Clone, Serialize)]
pub struct LlmUsageStore {
pub by_sandbox: HashMap<String, Vec<LlmUsageEntry>>,
}
#[derive(Debug, Clone, Serialize)]
pub struct LlmUsageEntry {
pub provider: String,
pub model: String,
pub request_count: u64,
pub streaming_count: u64,
pub total_input_tokens: u64,
pub total_output_tokens: u64,
pub total_tokens: u64,
pub last_request: String,
}
impl LlmUsageStore {
pub fn record(&mut self, event: &LlmEvent) {
let model = event.model.clone().unwrap_or_else(|| "unknown".to_string());
let entries = self.by_sandbox.entry(event.sandbox.clone()).or_default();
if let Some(entry) = entries
.iter_mut()
.find(|e| e.provider == event.provider && e.model == model)
{
entry.request_count += 1;
if event.streaming {
entry.streaming_count += 1;
}
entry.total_input_tokens += event.input_tokens.unwrap_or(0);
entry.total_output_tokens += event.output_tokens.unwrap_or(0);
entry.total_tokens += event.total_tokens.unwrap_or(0);
entry.last_request.clone_from(&event.timestamp);
} else {
entries.push(LlmUsageEntry {
provider: event.provider.clone(),
model,
request_count: 1,
streaming_count: if event.streaming { 1 } else { 0 },
total_input_tokens: event.input_tokens.unwrap_or(0),
total_output_tokens: event.output_tokens.unwrap_or(0),
total_tokens: event.total_tokens.unwrap_or(0),
last_request: event.timestamp.clone(),
});
}
}
pub fn usage_for_sandbox(&self, sandbox: &str) -> Vec<LlmUsageEntry> {
self.by_sandbox.get(sandbox).cloned().unwrap_or_default()
}
pub fn all_usage(&self) -> &HashMap<String, Vec<LlmUsageEntry>> {
&self.by_sandbox
}
pub fn clear_sandbox(&mut self, sandbox: &str) {
self.by_sandbox.remove(sandbox);
}
}
pub async fn record_llm_event(event: &LlmEvent) {
LLM_USAGE.write().await.record(event);
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_default_registry_has_known_providers() {
let reg = LlmDomainRegistry::default_registry();
assert!(reg.lookup("api.openai.com").is_some());
assert!(reg.lookup("api.anthropic.com").is_some());
assert!(reg.lookup("generativelanguage.googleapis.com").is_some());
assert!(reg.lookup("api.deepseek.com").is_some());
assert!(reg.lookup("api.groq.com").is_some());
assert!(reg.lookup("api.mistral.ai").is_some());
assert!(reg.lookup("api.cohere.com").is_some());
assert!(reg.lookup("example.com").is_none());
}
#[test]
fn test_lookup_strips_port() {
let reg = LlmDomainRegistry::default_registry();
assert!(reg.lookup("api.openai.com:443").is_some());
}
#[test]
fn test_custom_domains() {
let reg = LlmDomainRegistry::default_registry()
.with_custom_domains(&["my-llm.internal.com".to_string()]);
assert!(reg.lookup("my-llm.internal.com").is_some());
assert_eq!(reg.lookup("my-llm.internal.com").unwrap().name, "custom");
}
#[test]
fn test_empty_registry() {
let reg = LlmDomainRegistry::empty();
assert!(reg.is_empty());
assert!(reg.lookup("api.openai.com").is_none());
}
#[test]
fn test_extract_model_openai() {
let body = br#"{"model":"gpt-4","messages":[{"role":"user","content":"hi"}]}"#;
assert_eq!(extract_model_from_request(body), Some("gpt-4".to_string()));
}
#[test]
fn test_extract_model_anthropic() {
let body = br#"{"model":"claude-3-opus-20240229","max_tokens":1024}"#;
assert_eq!(
extract_model_from_request(body),
Some("claude-3-opus-20240229".to_string())
);
}
#[test]
fn test_extract_model_missing() {
let body = br#"{"prompt":"hello"}"#;
assert_eq!(extract_model_from_request(body), None);
}
#[test]
fn test_extract_model_invalid_json() {
assert_eq!(extract_model_from_request(b"not json"), None);
}
#[test]
fn test_extract_streaming_true() {
let body = br#"{"model":"gpt-4","stream":true}"#;
assert!(extract_streaming_from_request(body));
}
#[test]
fn test_extract_streaming_false() {
let body = br#"{"model":"gpt-4","stream":false}"#;
assert!(!extract_streaming_from_request(body));
}
#[test]
fn test_extract_streaming_missing() {
let body = br#"{"model":"gpt-4"}"#;
assert!(!extract_streaming_from_request(body));
}
#[test]
fn test_extract_tokens_openai() {
let body = br#"{"usage":{"prompt_tokens":10,"completion_tokens":20,"total_tokens":30}}"#;
let usage = extract_token_usage(body, &TokenFormat::OpenAi);
assert_eq!(usage.input_tokens, Some(10));
assert_eq!(usage.output_tokens, Some(20));
assert_eq!(usage.total_tokens, Some(30));
}
#[test]
fn test_extract_tokens_anthropic() {
let body = br#"{"usage":{"input_tokens":5,"output_tokens":15}}"#;
let usage = extract_token_usage(body, &TokenFormat::Anthropic);
assert_eq!(usage.input_tokens, Some(5));
assert_eq!(usage.output_tokens, Some(15));
assert_eq!(usage.total_tokens, Some(20));
}
#[test]
fn test_extract_tokens_google() {
let body = br#"{"usageMetadata":{"promptTokenCount":8,"candidatesTokenCount":12,"totalTokenCount":20}}"#;
let usage = extract_token_usage(body, &TokenFormat::Google);
assert_eq!(usage.input_tokens, Some(8));
assert_eq!(usage.output_tokens, Some(12));
assert_eq!(usage.total_tokens, Some(20));
}
#[test]
fn test_extract_tokens_cohere() {
let body = br#"{"meta":{"tokens":{"input_tokens":3,"output_tokens":7}}}"#;
let usage = extract_token_usage(body, &TokenFormat::Cohere);
assert_eq!(usage.input_tokens, Some(3));
assert_eq!(usage.output_tokens, Some(7));
assert_eq!(usage.total_tokens, Some(10));
}
#[test]
fn test_extract_tokens_invalid_json() {
let usage = extract_token_usage(b"not json", &TokenFormat::OpenAi);
assert_eq!(usage.input_tokens, None);
assert_eq!(usage.output_tokens, None);
assert_eq!(usage.total_tokens, None);
}
#[test]
fn test_extract_tokens_missing_usage() {
let body = br#"{"id":"chatcmpl-123","choices":[]}"#;
let usage = extract_token_usage(body, &TokenFormat::OpenAi);
assert_eq!(usage.input_tokens, None);
}
#[test]
fn test_usage_store_record_and_query() {
let mut store = LlmUsageStore::default();
let event = LlmEvent {
timestamp: "2026-01-01T00:00:00Z".into(),
sandbox: "test-sb".into(),
provider: "openai".into(),
host: "api.openai.com".into(),
method: "POST".into(),
path: "/v1/chat/completions".into(),
model: Some("gpt-4".into()),
status: Some(200),
latency_ms: Some(500),
input_tokens: Some(10),
output_tokens: Some(20),
total_tokens: Some(30),
streaming: false,
secret_injected: true,
key_source: "sandbox".into(),
};
store.record(&event);
store.record(&event);
let entries = store.usage_for_sandbox("test-sb");
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].request_count, 2);
assert_eq!(entries[0].total_input_tokens, 20);
assert_eq!(entries[0].total_output_tokens, 40);
assert_eq!(entries[0].total_tokens, 60);
assert_eq!(entries[0].streaming_count, 0);
}
#[test]
fn test_usage_store_streaming_count() {
let mut store = LlmUsageStore::default();
let event = LlmEvent {
timestamp: "2026-01-01T00:00:00Z".into(),
sandbox: "test-sb".into(),
provider: "openai".into(),
host: "api.openai.com".into(),
method: "POST".into(),
path: "/v1/chat/completions".into(),
model: Some("gpt-4".into()),
status: Some(200),
latency_ms: Some(500),
input_tokens: None,
output_tokens: None,
total_tokens: None,
streaming: true,
secret_injected: true,
key_source: "sandbox".into(),
};
store.record(&event);
let entries = store.usage_for_sandbox("test-sb");
assert_eq!(entries[0].streaming_count, 1);
}
#[test]
fn test_usage_store_multiple_models() {
let mut store = LlmUsageStore::default();
let base = LlmEvent {
timestamp: "2026-01-01T00:00:00Z".into(),
sandbox: "test-sb".into(),
provider: "openai".into(),
host: "api.openai.com".into(),
method: "POST".into(),
path: "/v1/chat/completions".into(),
model: Some("gpt-4".into()),
status: Some(200),
latency_ms: Some(500),
input_tokens: Some(10),
output_tokens: Some(20),
total_tokens: Some(30),
streaming: false,
secret_injected: true,
key_source: "sandbox".into(),
};
store.record(&base);
let mut turbo = base.clone();
turbo.model = Some("gpt-4-turbo".into());
store.record(&turbo);
let entries = store.usage_for_sandbox("test-sb");
assert_eq!(entries.len(), 2);
}
#[test]
fn test_usage_store_clear() {
let mut store = LlmUsageStore::default();
let event = LlmEvent {
timestamp: "2026-01-01T00:00:00Z".into(),
sandbox: "test-sb".into(),
provider: "openai".into(),
host: "api.openai.com".into(),
method: "POST".into(),
path: "/v1/chat/completions".into(),
model: Some("gpt-4".into()),
status: Some(200),
latency_ms: Some(500),
input_tokens: Some(10),
output_tokens: Some(20),
total_tokens: Some(30),
streaming: false,
secret_injected: true,
key_source: "sandbox".into(),
};
store.record(&event);
store.clear_sandbox("test-sb");
assert!(store.usage_for_sandbox("test-sb").is_empty());
}
#[test]
fn test_usage_store_unknown_sandbox() {
let store = LlmUsageStore::default();
assert!(store.usage_for_sandbox("nonexistent").is_empty());
}
}