forge-guardrails 0.1.2

Foundation types for an LLM-agent workflow framework
Documentation
use std::sync::{Arc, Mutex};

use ::anyllm_proxy::backend::RateLimitHeaders as AnyLlmRateLimitHeaders;
use reqwest::header::HeaderMap;

use crate::clients::base::{LLMCallInfo, LLMRateLimitInfo};

pub(super) fn record_call_info_cell(cell: &Arc<Mutex<Option<LLMCallInfo>>>, info: LLMCallInfo) {
    if let Ok(mut guard) = cell.lock() {
        *guard = Some(info);
    }
}

pub(super) fn runtime_call_info(
    metadata: &::anyllm_proxy::runtime::ChatCompletionMetadata,
    rate_limits: &AnyLlmRateLimitHeaders,
    warnings: &anyllm_translate::TranslationWarnings,
    response_model: Option<String>,
    usage: Option<&anyllm_translate::openai::ChatUsage>,
) -> LLMCallInfo {
    LLMCallInfo {
        requested_model: Some(metadata.requested_model.clone()),
        response_model,
        selected_backend: Some(metadata.selected_backend.clone()),
        mapped_model: Some(metadata.mapped_model.clone()),
        backend_kind: Some(format!("{:?}", metadata.backend_kind)),
        provider_id: metadata.provider_id.clone(),
        used_responses_api: metadata.used_responses_api,
        degradation_warnings: warnings.as_header_value(),
        cache_status: None,
        rate_limits: rate_limit_info_from_anyllm(rate_limits),
        estimated_cost_usd: estimate_cost_usd(Some(&metadata.mapped_model), usage),
    }
}

pub(super) fn sidecar_call_info(
    requested_model: &str,
    headers: &HeaderMap,
    response_model: Option<String>,
    usage: Option<&anyllm_translate::openai::ChatUsage>,
) -> LLMCallInfo {
    let header_cost =
        header_value(headers, "x-anyllm-cost-usd").and_then(|v| v.parse::<f64>().ok());
    let cost_model = response_model.as_deref().or(Some(requested_model));
    let estimated_cost_usd = header_cost.or_else(|| estimate_cost_usd(cost_model, usage));
    LLMCallInfo {
        requested_model: Some(requested_model.to_string()),
        response_model,
        selected_backend: None,
        mapped_model: None,
        backend_kind: None,
        provider_id: None,
        used_responses_api: false,
        degradation_warnings: header_value(headers, "x-anyllm-degradation"),
        cache_status: header_value(headers, "x-anyllm-cache"),
        rate_limits: rate_limit_info_from_sidecar(headers),
        estimated_cost_usd,
    }
}

fn rate_limit_info_from_anyllm(rate_limits: &AnyLlmRateLimitHeaders) -> LLMRateLimitInfo {
    LLMRateLimitInfo {
        requests_limit: rate_limits.requests_limit.clone(),
        requests_remaining: rate_limits.requests_remaining.clone(),
        requests_reset: rate_limits.requests_reset.clone(),
        tokens_limit: rate_limits.tokens_limit.clone(),
        tokens_remaining: rate_limits.tokens_remaining.clone(),
        tokens_reset: rate_limits.tokens_reset.clone(),
        retry_after: rate_limits.retry_after.clone(),
        organization_id: rate_limits.organization_id.clone(),
    }
}

fn rate_limit_info_from_sidecar(headers: &HeaderMap) -> LLMRateLimitInfo {
    LLMRateLimitInfo {
        requests_limit: header_value(headers, "anthropic-ratelimit-requests-limit"),
        requests_remaining: header_value(headers, "anthropic-ratelimit-requests-remaining"),
        requests_reset: header_value(headers, "anthropic-ratelimit-requests-reset"),
        tokens_limit: header_value(headers, "anthropic-ratelimit-tokens-limit"),
        tokens_remaining: header_value(headers, "anthropic-ratelimit-tokens-remaining"),
        tokens_reset: header_value(headers, "anthropic-ratelimit-tokens-reset"),
        retry_after: header_value(headers, "retry-after"),
        organization_id: header_value(headers, "anthropic-organization-id"),
    }
}

fn header_value(headers: &HeaderMap, name: &str) -> Option<String> {
    headers
        .get(name)
        .and_then(|value| value.to_str().ok())
        .map(|value| value.trim().to_string())
        .filter(|value| !value.is_empty())
}

pub(super) fn estimate_cost_usd(
    model: Option<&str>,
    usage: Option<&anyllm_translate::openai::ChatUsage>,
) -> Option<f64> {
    let model = model?;
    let usage = usage?;
    let pricing = ::anyllm_proxy::cost::pricing();
    pricing.price_for_model(model)?;
    Some(pricing.cost_for_usage(
        model,
        usage.prompt_tokens as u64,
        usage.completion_tokens as u64,
    ))
}

pub(super) fn observe_stream_call_info(
    cell: &Arc<Mutex<Option<LLMCallInfo>>>,
    response_model: &str,
    cost_model: &str,
    usage: Option<&anyllm_translate::openai::ChatUsage>,
) {
    if let Ok(mut guard) = cell.lock() {
        observe_stream_call_info_value(&mut guard, response_model, cost_model, usage);
    }
}

pub(super) fn observe_stream_call_info_value(
    info: &mut Option<LLMCallInfo>,
    response_model: &str,
    cost_model: &str,
    usage: Option<&anyllm_translate::openai::ChatUsage>,
) {
    let info = info.get_or_insert_with(LLMCallInfo::default);
    if info.response_model.is_none() {
        info.response_model = Some(response_model.to_string());
    }
    if info.estimated_cost_usd.is_none() {
        info.estimated_cost_usd = estimate_cost_usd(Some(cost_model), usage);
    }
}