use std::collections::HashMap as StdHashMap;
use std::sync::{Mutex as StdMutex, OnceLock as StdOnceLock};
use super::auth::apply_auth_headers;
type ContextWindowKey = (String, String);
type ContextWindowCache = StdMutex<StdHashMap<ContextWindowKey, Option<usize>>>;
fn context_window_cache() -> &'static ContextWindowCache {
static CACHE: StdOnceLock<ContextWindowCache> = StdOnceLock::new();
CACHE.get_or_init(|| StdMutex::new(StdHashMap::new()))
}
pub async fn fetch_provider_max_context(
provider: &str,
model: &str,
api_key: &str,
) -> Option<usize> {
let pdef = crate::llm_config::provider_config(provider);
let base_url = pdef
.as_ref()
.map(crate::llm_config::resolve_base_url)
.unwrap_or_else(|| "https://api.openai.com/v1".to_string());
let cache_key = (base_url.clone(), model.to_string());
if let Ok(cache) = context_window_cache().lock() {
if let Some(value) = cache.get(&cache_key) {
return *value;
}
}
let fetched = fetch_provider_max_context_uncached(provider, model, api_key, &base_url).await;
if let Ok(mut cache) = context_window_cache().lock() {
cache.insert(cache_key, fetched);
}
fetched
}
fn known_model_context_window(model: &str) -> Option<usize> {
if model.starts_with("claude-") {
return Some(200_000);
}
if model.starts_with("gpt-4o") || model.starts_with("gpt-4.1") || model.starts_with("chatgpt-")
{
return Some(128_000);
}
if model.starts_with("gpt-4-turbo")
|| model == "gpt-4-0125-preview"
|| model == "gpt-4-1106-preview"
{
return Some(128_000);
}
if model.starts_with("gpt-4") {
return Some(8_192);
}
if model.starts_with("gpt-3.5-turbo") {
return Some(16_385);
}
if model.starts_with("o1") || model.starts_with("o3") || model.starts_with("o4") {
return Some(200_000);
}
if model.contains("gemini-2") || model.contains("gemini-1.5") {
return Some(1_000_000);
}
if model.contains("gemini") {
return Some(128_000);
}
None
}
async fn fetch_ollama_context_window(model: &str, base_url: &str) -> Option<usize> {
let client = crate::llm::shared_utility_client();
let url = format!("{}/api/show", base_url.trim_end_matches('/'));
let body = serde_json::json!({"name": model});
let response = client
.post(&url)
.json(&body)
.timeout(std::time::Duration::from_secs(5))
.send()
.await
.ok()?;
if !response.status().is_success() {
return None;
}
let json: serde_json::Value = response.json().await.ok()?;
if let Some(n) = json
.pointer("/model_info/general.context_length")
.or_else(|| json.pointer("/model_info/context_length"))
.and_then(|v| v.as_u64())
{
return Some(n as usize);
}
Some(super::ollama::ollama_runtime_settings_from_env().num_ctx as usize)
}
async fn fetch_openai_compatible_context_window(
provider: &str,
model: &str,
api_key: &str,
base_url: &str,
) -> Option<usize> {
let pdef = crate::llm_config::provider_config(provider);
let client = crate::llm::shared_utility_client();
let url = pdef
.as_ref()
.and_then(|def| super::readiness::build_models_url(def).ok())
.unwrap_or_else(|| format!("{}/models", base_url.trim_end_matches('/')));
let req = client
.get(&url)
.header("Content-Type", "application/json")
.timeout(std::time::Duration::from_secs(10));
let req = apply_auth_headers(req, api_key, pdef.as_ref());
let response = req.send().await.ok()?;
if !response.status().is_success() {
return None;
}
let json: serde_json::Value = response.json().await.ok()?;
let data = json.get("data").and_then(|d| d.as_array())?;
for entry in data {
let id = entry.get("id").and_then(|v| v.as_str()).unwrap_or("");
if id != model {
continue;
}
if let Some(n) = entry.get("max_model_len").and_then(|v| v.as_u64()) {
return Some(n as usize);
}
if let Some(n) = entry.get("context_length").and_then(|v| v.as_u64()) {
return Some(n as usize);
}
if let Some(n) = entry.get("max_context_length").and_then(|v| v.as_u64()) {
return Some(n as usize);
}
if let Some(n) = entry.get("n_ctx").and_then(|v| v.as_u64()) {
return Some(n as usize);
}
if let Some(n) = entry
.get("top_provider")
.and_then(|tp| tp.get("context_length"))
.and_then(|v| v.as_u64())
{
return Some(n as usize);
}
break;
}
None
}
async fn fetch_provider_max_context_uncached(
provider: &str,
model: &str,
api_key: &str,
base_url: &str,
) -> Option<usize> {
if let Some(n) = known_model_context_window(model) {
return Some(n);
}
if provider == "ollama" {
return fetch_ollama_context_window(model, base_url).await;
}
let is_openai_compatible = matches!(
provider,
"local"
| "openai"
| "mlx"
| "vllm"
| "groq"
| "together"
| "openrouter"
| "deepinfra"
| "fireworks"
| "huggingface"
);
if is_openai_compatible {
return fetch_openai_compatible_context_window(provider, model, api_key, base_url).await;
}
None
}
pub(crate) fn effective_threshold_from_max_context(max_context: usize) -> usize {
let bounded = max_context.max(4_096);
(bounded * 3) / 4
}
pub(crate) async fn adapt_auto_compact_to_provider(
ac: &mut crate::orchestration::AutoCompactConfig,
user_specified_threshold: bool,
user_specified_hard_limit: bool,
provider: &str,
model: &str,
api_key: &str,
) {
let Some(max_ctx) = fetch_provider_max_context(provider, model, api_key).await else {
return;
};
let effective = effective_threshold_from_max_context(max_ctx);
if !user_specified_hard_limit {
ac.hard_limit_tokens = Some(effective);
} else if let Some(ref mut hl) = ac.hard_limit_tokens {
if *hl > effective {
*hl = effective;
}
}
if user_specified_threshold {
if ac.token_threshold > effective {
ac.token_threshold = effective;
}
} else {
let tier1_from_context = (max_ctx * 13) / 20; if ac.token_threshold > tier1_from_context {
ac.token_threshold = tier1_from_context;
}
}
}