pub const INPUT_TOKEN_KEYS: &[&str] = &[
"gen_ai.usage.input_tokens",
"gen_ai.usage.prompt_tokens",
"llm.usage.prompt_tokens",
"llm.token_count.prompt",
"prompt_tokens",
"input_tokens",
];
pub const OUTPUT_TOKEN_KEYS: &[&str] = &[
"gen_ai.usage.output_tokens",
"gen_ai.usage.completion_tokens",
"llm.usage.completion_tokens",
"llm.token_count.completion",
"completion_tokens",
"output_tokens",
];
pub const CACHE_CREATION_TOKEN_KEYS: &[&str] = &[
"gen_ai.usage.cache_creation.input_tokens",
"gen_ai.usage.cache_creation_input_tokens",
"cache_creation_input_tokens",
"cache_creation_tokens",
];
pub const CACHE_READ_TOKEN_KEYS: &[&str] = &[
"gen_ai.usage.cache_read.input_tokens",
"gen_ai.usage.cache_read_input_tokens",
"cache_read_input_tokens",
"cache_read_tokens",
];
pub const MODEL_KEYS: &[&str] = &[
"gen_ai.request.model",
"gen_ai.response.model",
"llm.request.model",
"llm.model_name",
"model",
];
pub const SYSTEM_KEYS: &[&str] = &[
"gen_ai.provider.name",
"gen_ai.system",
"llm.system",
"llm.vendor",
];
pub const LLM_SPAN_MARKER_KEYS: &[&str] = &[
"gen_ai.system",
"gen_ai.provider.name",
"llm.system",
"llm.vendor",
"llm.request.model",
];
pub const OPENINFERENCE_LLM_KINDS: &[&str] = &["LLM", "EMBEDDING"];
pub fn coalesce_extract(attributes_col: &str, keys: &[&str]) -> String {
coalesce_inner(attributes_col, keys, None)
}
pub fn coalesce_extract_cast(attributes_col: &str, keys: &[&str], cast: &str) -> String {
coalesce_inner(attributes_col, keys, Some(cast))
}
fn coalesce_inner(attributes_col: &str, keys: &[&str], cast: Option<&str>) -> String {
assert!(!keys.is_empty(), "coalesce over empty key list");
let parts: Vec<String> = keys
.iter()
.map(|k| match cast {
Some(c) => format!(
"CAST(json_extract({col}, '$.\"{k}\"') AS {c})",
col = attributes_col,
k = k,
c = c
),
None => format!(
"json_extract({col}, '$.\"{k}\"')",
col = attributes_col,
k = k
),
})
.collect();
format!("COALESCE({})", parts.join(", "))
}
pub fn llm_span_guard(attributes_col: &str) -> String {
let mut clauses: Vec<String> = LLM_SPAN_MARKER_KEYS
.iter()
.map(|k| {
format!(
"json_extract({col}, '$.\"{k}\"') IS NOT NULL",
col = attributes_col,
k = k
)
})
.collect();
let kinds = OPENINFERENCE_LLM_KINDS
.iter()
.map(|k| format!("'{}'", k))
.collect::<Vec<_>>()
.join(", ");
clauses.push(format!(
"json_extract({col}, '$.\"openinference.span.kind\"') IN ({kinds})",
col = attributes_col,
kinds = kinds
));
format!("({})", clauses.join(" OR "))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn coalesce_extract_formats_keys() {
let sql = coalesce_extract("attributes", &["a.b", "c"]);
assert_eq!(
sql,
"COALESCE(json_extract(attributes, '$.\"a.b\"'), json_extract(attributes, '$.\"c\"'))"
);
}
#[test]
fn coalesce_extract_cast_wraps_each_clause() {
let sql = coalesce_extract_cast("attributes", INPUT_TOKEN_KEYS, "INTEGER");
assert!(sql.starts_with("COALESCE(CAST(json_extract(attributes, "));
assert!(sql.contains("gen_ai.usage.input_tokens"));
assert!(sql.contains("input_tokens"));
assert!(sql.contains("AS INTEGER"));
}
#[test]
fn llm_span_guard_includes_openinference_kinds() {
let sql = llm_span_guard("attributes");
assert!(sql.contains("gen_ai.system"));
assert!(sql.contains("llm.request.model"));
assert!(sql.contains("openinference.span.kind"));
assert!(sql.contains("'LLM'"));
assert!(sql.contains("'EMBEDDING'"));
assert!(sql.starts_with('(') && sql.ends_with(')'));
}
}