use anyhow::{Context, Result, bail};
use base64::Engine;
use chrono::{Local, TimeZone};
use clap::{Args, Parser, Subcommand};
use dirs::home_dir;
use reqwest::blocking::Client;
use serde::{Deserialize, Serialize};
use std::borrow::Cow;
use std::cmp::Reverse;
use std::collections::hash_map::DefaultHasher;
use std::collections::{BTreeMap, BTreeSet, VecDeque};
use std::env;
use std::ffi::OsString;
use std::fs;
use std::hash::{Hash, Hasher};
use std::io::{self, Cursor, Read, Write};
use std::net::{SocketAddr, TcpStream, ToSocketAddrs};
use std::path::{Path, PathBuf};
use std::process::{Command, ExitStatus, Stdio};
use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering};
use std::sync::mpsc::{self, Receiver, RecvTimeoutError, SyncSender, TrySendError};
use std::sync::{Arc, Condvar, Mutex, OnceLock};
use std::thread;
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
use tiny_http::{
Header as TinyHeader, ReadWrite as TinyReadWrite, Response as TinyResponse,
Server as TinyServer, StatusCode as TinyStatusCode,
};
use tokio::runtime::{Builder as TokioRuntimeBuilder, Runtime as TokioRuntime};
use tungstenite::client::IntoClientRequest;
use tungstenite::error::UrlError as WsUrlError;
use tungstenite::handshake::derive_accept_key;
use tungstenite::http::{HeaderName as WsHeaderName, HeaderValue as WsHeaderValue};
use tungstenite::protocol::Role as WsRole;
use tungstenite::stream::MaybeTlsStream;
use tungstenite::{
Error as WsError, HandshakeError as WsHandshakeError, Message as WsMessage,
WebSocket as WsSocket, client_tls_with_config,
};
mod audit_log;
mod housekeeping;
mod profile_commands;
mod profile_identity;
mod quota_support;
#[path = "runtime_tuning.rs"]
mod runtime_config;
mod runtime_doctor;
mod runtime_metrics;
mod runtime_policy;
mod runtime_store;
mod secret_store;
mod shared_codex_fs;
#[path = "cli_render.rs"]
mod terminal_ui;
mod update_notice;
use audit_log::*;
use housekeeping::*;
use profile_commands::*;
use profile_identity::*;
use quota_support::*;
use runtime_config::*;
use runtime_doctor::*;
use runtime_policy::*;
use runtime_store::*;
use shared_codex_fs::*;
use terminal_ui::*;
use update_notice::*;
const DEFAULT_PRODEX_DIR: &str = ".prodex";
const DEFAULT_CODEX_DIR: &str = ".codex";
const DEFAULT_CHATGPT_BASE_URL: &str = "https://chatgpt.com/backend-api";
const RUNTIME_PROXY_OPENAI_UPSTREAM_PATH: &str = "/backend-api/codex";
const RUNTIME_PROXY_OPENAI_MOUNT_PATH: &str = "/backend-api/prodex";
const RUNTIME_PROXY_ANTHROPIC_MESSAGES_PATH: &str = "/v1/messages";
const RUNTIME_PROXY_ANTHROPIC_MODELS_PATH: &str = "/v1/models";
const RUNTIME_PROXY_ANTHROPIC_HEALTH_PATH: &str = "/health";
const LEGACY_RUNTIME_PROXY_OPENAI_MOUNT_PATH_PREFIX: &str = "/backend-api/prodex/v";
const PRODEX_CLAUDE_PROXY_API_KEY: &str = "prodex-runtime-proxy";
const PRODEX_INTERNAL_REQUEST_ORIGIN_HEADER: &str = "X-Prodex-Internal-Request-Origin";
const PRODEX_INTERNAL_REQUEST_ORIGIN_ANTHROPIC_MESSAGES: &str = "anthropic_messages";
const DEFAULT_PRODEX_CLAUDE_MODEL: &str = "gpt-5";
const PRODEX_CLAUDE_CONFIG_DIR_NAME: &str = ".claude-code";
const PRODEX_SHARED_CLAUDE_DIR_NAME: &str = "claude";
const DEFAULT_CLAUDE_CONFIG_DIR_NAME: &str = ".claude";
const DEFAULT_CLAUDE_CONFIG_FILE_NAME: &str = ".claude.json";
const PRODEX_CLAUDE_LEGACY_IMPORT_MARKER_NAME: &str = ".prodex-legacy-imported";
const RUNTIME_PROXY_ANTHROPIC_MODEL_CREATED_AT: &str = "2026-01-01T00:00:00Z";
const DEFAULT_WATCH_INTERVAL_SECONDS: u64 = 5;
const RUN_SELECTION_NEAR_OPTIMAL_BPS: i64 = 1_000;
const RUN_SELECTION_HYSTERESIS_BPS: i64 = 500;
const RUN_SELECTION_COOLDOWN_SECONDS: i64 = 15 * 60;
const RESPONSE_PROFILE_BINDING_LIMIT: usize = 65_536;
const TURN_STATE_PROFILE_BINDING_LIMIT: usize = 4_096;
const SESSION_ID_PROFILE_BINDING_LIMIT: usize = 4_096;
const APP_STATE_LAST_RUN_RETENTION_SECONDS: i64 = if cfg!(test) { 60 } else { 90 * 24 * 60 * 60 };
const APP_STATE_SESSION_BINDING_RETENTION_SECONDS: i64 =
if cfg!(test) { 60 } else { 30 * 24 * 60 * 60 };
const RUNTIME_SCORE_RETENTION_SECONDS: i64 = if cfg!(test) { 120 } else { 14 * 24 * 60 * 60 };
const RUNTIME_USAGE_SNAPSHOT_RETENTION_SECONDS: i64 =
if cfg!(test) { 120 } else { 7 * 24 * 60 * 60 };
const PROD_EX_TMP_LOGIN_RETENTION_SECONDS: i64 = if cfg!(test) { 60 } else { 24 * 60 * 60 };
const ORPHAN_MANAGED_PROFILE_AUDIT_RETENTION_SECONDS: i64 =
if cfg!(test) { 60 } else { 7 * 24 * 60 * 60 };
const RUNTIME_PROXY_LOG_RETENTION_SECONDS: i64 = if cfg!(test) { 120 } else { 7 * 24 * 60 * 60 };
const RUNTIME_PROXY_LOG_RETENTION_COUNT: usize = if cfg!(test) { 4 } else { 40 };
const RUNTIME_PREVIOUS_RESPONSE_RETRY_DELAYS_MS: [u64; 3] = [75, 200, 500];
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RuntimeProxyClaudeModelAlias {
Opus,
Sonnet,
Haiku,
}
#[derive(Debug, Clone, Copy)]
struct RuntimeProxyResponsesModelDescriptor {
id: &'static str,
display_name: &'static str,
description: &'static str,
claude_alias: Option<RuntimeProxyClaudeModelAlias>,
claude_picker_model: Option<&'static str>,
supports_xhigh: bool,
}
const RUNTIME_PROXY_PRECOMMIT_ATTEMPT_LIMIT: usize = if cfg!(test) { 4 } else { 12 };
const RUNTIME_PROXY_PRECOMMIT_BUDGET_MS: u64 = if cfg!(test) { 500 } else { 3_000 };
const RUNTIME_PROXY_PRECOMMIT_CONTINUATION_ATTEMPT_LIMIT: usize =
RUNTIME_PROXY_PRECOMMIT_ATTEMPT_LIMIT * 2;
const RUNTIME_PROXY_PRECOMMIT_CONTINUATION_BUDGET_MS: u64 = RUNTIME_PROXY_PRECOMMIT_BUDGET_MS * 4;
const RUNTIME_PROFILE_RETRY_BACKOFF_SECONDS: i64 = if cfg!(test) { 2 } else { 20 };
const RUNTIME_PROFILE_TRANSPORT_BACKOFF_SECONDS: i64 = if cfg!(test) { 2 } else { 15 };
const RUNTIME_PROFILE_TRANSPORT_BACKOFF_MAX_SECONDS: i64 = if cfg!(test) { 8 } else { 120 };
const RUNTIME_PROXY_LOCAL_OVERLOAD_BACKOFF_SECONDS: i64 = if cfg!(test) { 1 } else { 3 };
const RUNTIME_PROXY_ADMISSION_WAIT_BUDGET_MS: u64 = if cfg!(test) { 80 } else { 750 };
const RUNTIME_PROXY_LONG_LIVED_QUEUE_WAIT_BUDGET_MS: u64 = if cfg!(test) { 80 } else { 750 };
const RUNTIME_PROXY_PRESSURE_ADMISSION_WAIT_BUDGET_MS: u64 = if cfg!(test) { 25 } else { 200 };
const RUNTIME_PROXY_PRESSURE_LONG_LIVED_QUEUE_WAIT_BUDGET_MS: u64 =
if cfg!(test) { 25 } else { 200 };
const RUNTIME_PROXY_INTERACTIVE_WAIT_MULTIPLIER: u64 = 2;
const RUNTIME_PROXY_PRESSURE_PRECOMMIT_BUDGET_MS: u64 = if cfg!(test) { 150 } else { 800 };
#[allow(dead_code)]
const RUNTIME_PROXY_PRESSURE_PRECOMMIT_CONTINUATION_BUDGET_MS: u64 =
if cfg!(test) { 250 } else { 1_500 };
const RUNTIME_PROXY_PRESSURE_PRECOMMIT_ATTEMPT_LIMIT: usize = if cfg!(test) { 2 } else { 6 };
#[allow(dead_code)]
const RUNTIME_PROXY_PRESSURE_PRECOMMIT_CONTINUATION_ATTEMPT_LIMIT: usize =
if cfg!(test) { 4 } else { 8 };
const RUNTIME_PROXY_COMPACT_OWNER_RETRY_DELAY_MS: u64 = if cfg!(test) { 5 } else { 150 };
const RUNTIME_PROFILE_INFLIGHT_SOFT_LIMIT: usize = if cfg!(test) { 1 } else { 4 };
const RUNTIME_PROFILE_INFLIGHT_HARD_LIMIT: usize = if cfg!(test) { 2 } else { 8 };
const RUNTIME_PROFILE_HEALTH_DECAY_SECONDS: i64 = if cfg!(test) { 2 } else { 60 };
const RUNTIME_PROFILE_USAGE_CACHE_FRESH_SECONDS: i64 = if cfg!(test) { 30 } else { 300 };
const UPDATE_CHECK_CACHE_TTL_SECONDS: i64 = if cfg!(test) { 5 } else { 21_600 };
const UPDATE_CHECK_STALE_CURRENT_TTL_SECONDS: i64 = if cfg!(test) { 1 } else { 300 };
const UPDATE_CHECK_HTTP_CONNECT_TIMEOUT_MS: u64 = if cfg!(test) { 200 } else { 800 };
const UPDATE_CHECK_HTTP_READ_TIMEOUT_MS: u64 = if cfg!(test) { 400 } else { 1200 };
const RUNTIME_PROFILE_USAGE_CACHE_STALE_GRACE_SECONDS: i64 = if cfg!(test) { 300 } else { 1800 };
const RUNTIME_PROFILE_QUOTA_QUARANTINE_FALLBACK_SECONDS: i64 = if cfg!(test) { 10 } else { 300 };
const RUNTIME_PROFILE_SYNC_PROBE_FALLBACK_LIMIT: usize = 3;
const RUNTIME_STARTUP_PROBE_WARM_LIMIT: usize = 3;
const RUNTIME_STARTUP_SYNC_PROBE_WARM_LIMIT: usize = if cfg!(test) {
RUNTIME_STARTUP_PROBE_WARM_LIMIT
} else {
1
};
const RUNTIME_STATE_SAVE_DEBOUNCE_MS: u64 = if cfg!(test) { 5 } else { 150 };
const RUNTIME_STATE_SAVE_QUEUE_PRESSURE_THRESHOLD: usize = 8;
const RUNTIME_CONTINUATION_JOURNAL_QUEUE_PRESSURE_THRESHOLD: usize = 8;
const RUNTIME_PROBE_REFRESH_QUEUE_PRESSURE_THRESHOLD: usize = 16;
const RUNTIME_PROFILE_AUTH_FAILURE_DECAY_SECONDS: i64 = if cfg!(test) { 5 } else { 300 };
const RUNTIME_PROFILE_AUTH_FAILURE_401_SCORE: u32 = if cfg!(test) { 3 } else { 12 };
const RUNTIME_PROFILE_AUTH_FAILURE_403_SCORE: u32 = if cfg!(test) { 1 } else { 2 };
const RUNTIME_BINDING_TOUCH_PERSIST_INTERVAL_SECONDS: i64 = if cfg!(test) { 1 } else { 60 };
const RUNTIME_PROFILE_BAD_PAIRING_DECAY_SECONDS: i64 = if cfg!(test) { 4 } else { 180 };
const RUNTIME_PROFILE_SUCCESS_STREAK_DECAY_SECONDS: i64 = if cfg!(test) { 8 } else { 300 };
const RUNTIME_PROFILE_PERFORMANCE_DECAY_SECONDS: i64 = if cfg!(test) { 8 } else { 300 };
const RUNTIME_PROFILE_TRANSPORT_FAILURE_HEALTH_PENALTY: u32 = 4;
const RUNTIME_PROFILE_CONNECT_FAILURE_HEALTH_PENALTY: u32 = 5;
const RUNTIME_PROFILE_OVERLOAD_HEALTH_PENALTY: u32 = 2;
const RUNTIME_PROFILE_LATENCY_PENALTY_MAX: u32 = 12;
const RUNTIME_PROFILE_HEALTH_SUCCESS_RECOVERY_SCORE: u32 = 2;
const RUNTIME_PROFILE_BAD_PAIRING_PENALTY: u32 = 2;
const RUNTIME_PROFILE_HEALTH_MAX_SCORE: u32 = 16;
const RUNTIME_PROFILE_SUCCESS_STREAK_MAX: u32 = 3;
const QUOTA_HTTP_CONNECT_TIMEOUT_MS: u64 = if cfg!(test) { 250 } else { 5_000 };
const QUOTA_HTTP_READ_TIMEOUT_MS: u64 = if cfg!(test) { 500 } else { 10_000 };
// Match Codex's default Responses stream idle timeout so the local proxy stays transport-transparent.
const RUNTIME_PROXY_STREAM_IDLE_TIMEOUT_MS: u64 = if cfg!(test) { 250 } else { 300_000 };
const RUNTIME_PROXY_HTTP_CONNECT_TIMEOUT_MS: u64 = if cfg!(test) { 250 } else { 5_000 };
const RUNTIME_PROXY_WEBSOCKET_CONNECT_TIMEOUT_MS: u64 = if cfg!(test) { 250 } else { 15_000 };
const RUNTIME_PROXY_WEBSOCKET_HAPPY_EYEBALLS_DELAY_MS: u64 = if cfg!(test) { 10 } else { 200 };
const RUNTIME_PROXY_WEBSOCKET_PRECOMMIT_PROGRESS_TIMEOUT_MS: u64 =
if cfg!(test) { 120 } else { 8_000 };
const RUNTIME_PROXY_WEBSOCKET_PREVIOUS_RESPONSE_REUSE_STALE_MS: u64 =
if cfg!(test) { 60_000 } else { 60_000 };
const RUNTIME_PROXY_SSE_LOOKAHEAD_TIMEOUT_MS: u64 = if cfg!(test) { 50 } else { 1_000 };
const RUNTIME_PROXY_SSE_LOOKAHEAD_BYTES: usize = 8 * 1024;
const RUNTIME_PROXY_PREFETCH_QUEUE_CAPACITY: usize = 2;
const RUNTIME_PROXY_PREFETCH_MAX_CHUNK_BYTES: usize = 512 * 1024;
const RUNTIME_PROXY_PREFETCH_MAX_BUFFERED_BYTES: usize = 768 * 1024;
const RUNTIME_PROXY_PREFETCH_BACKPRESSURE_RETRY_MS: u64 = if cfg!(test) { 2 } else { 10 };
const RUNTIME_PROXY_PREFETCH_BACKPRESSURE_TIMEOUT_MS: u64 = if cfg!(test) { 40 } else { 1_000 };
const RUNTIME_PROXY_BUFFERED_RESPONSE_MAX_BYTES: usize = 4 * 1024 * 1024;
const RUNTIME_PROXY_LOG_FILE_PREFIX: &str = "prodex-runtime";
const RUNTIME_PROXY_LATEST_LOG_POINTER: &str = "prodex-runtime-latest.path";
const RUNTIME_PROXY_DOCTOR_TAIL_BYTES: usize = 128 * 1024;
const PRODEX_SECRET_BACKEND_ENV: &str = "PRODEX_SECRET_BACKEND";
const PRODEX_SECRET_KEYRING_SERVICE_ENV: &str = "PRODEX_SECRET_KEYRING_SERVICE";
const INFO_RUNTIME_LOG_TAIL_BYTES: usize = if cfg!(test) { 64 * 1024 } else { 512 * 1024 };
const INFO_FORECAST_LOOKBACK_SECONDS: i64 = if cfg!(test) { 3_600 } else { 3 * 60 * 60 };
const INFO_FORECAST_MIN_SPAN_SECONDS: i64 = if cfg!(test) { 60 } else { 5 * 60 };
const INFO_RECENT_LOAD_WINDOW_SECONDS: i64 = if cfg!(test) { 600 } else { 30 * 60 };
const LAST_GOOD_FILE_SUFFIX: &str = ".last-good";
const RUNTIME_PREVIOUS_RESPONSE_NEGATIVE_CACHE_SECONDS: i64 = if cfg!(test) { 5 } else { 180 };
const RUNTIME_PREVIOUS_RESPONSE_NEGATIVE_CACHE_FAILURE_THRESHOLD: u32 = 2;
const RUNTIME_CONTINUATION_SUSPECT_GRACE_SECONDS: i64 = if cfg!(test) { 5 } else { 120 };
const RUNTIME_CONTINUATION_DEAD_GRACE_SECONDS: i64 = if cfg!(test) { 5 } else { 900 };
const RUNTIME_CONTINUATION_VERIFIED_STALE_SECONDS: i64 = if cfg!(test) { 10 } else { 1_800 };
const RUNTIME_CONTINUATION_SUSPECT_NOT_FOUND_STREAK_LIMIT: u32 = 2;
const RUNTIME_CONTINUATION_CONFIDENCE_MAX: u32 = 8;
const RUNTIME_CONTINUATION_VERIFIED_CONFIDENCE_BONUS: u32 = 2;
const RUNTIME_CONTINUATION_TOUCH_CONFIDENCE_BONUS: u32 = 1;
const RUNTIME_CONTINUATION_SUSPECT_CONFIDENCE_PENALTY: u32 = 1;
const RUNTIME_SIDECAR_STALE_SAVE_RETRY_LIMIT: usize = if cfg!(test) { 3 } else { 6 };
const RUNTIME_BROKER_READY_TIMEOUT_MS: u64 = if cfg!(test) { 3_000 } else { 15_000 };
const RUNTIME_BROKER_HEALTH_CONNECT_TIMEOUT_MS: u64 = if cfg!(test) { 250 } else { 750 };
const RUNTIME_BROKER_HEALTH_READ_TIMEOUT_MS: u64 = if cfg!(test) { 400 } else { 1_500 };
const RUNTIME_BROKER_POLL_INTERVAL_MS: u64 = if cfg!(test) { 25 } else { 100 };
const RUNTIME_BROKER_LEASE_SCAN_INTERVAL_MS: u64 = if cfg!(test) { 125 } else { 1_000 };
const RUNTIME_BROKER_IDLE_GRACE_SECONDS: i64 = if cfg!(test) { 1 } else { 5 };
const CLI_WIDTH: usize = 110;
const CLI_MIN_WIDTH: usize = 60;
const CLI_LABEL_WIDTH: usize = 16;
const CLI_MIN_LABEL_WIDTH: usize = 10;
const CLI_MAX_LABEL_WIDTH: usize = 24;
const CLI_TABLE_GAP: &str = " ";
const CLI_TOP_LEVEL_AFTER_HELP: &str = "\
Tips:
Bare `prodex` invocation defaults to `prodex run`.
Use `prodex quota --all --detail` for the clearest quota view across profiles.
Use `prodex <command> -h` to see every parameter for that command.
Examples:
prodex
prodex exec \"review this repo\"
prodex profile list
prodex quota --all --detail
prodex run --profile main";
const CLI_PROFILE_AFTER_HELP: &str = "\
Examples:
prodex profile list
prodex profile add main --activate
prodex profile export
prodex profile export backup.json
prodex profile import backup.json
prodex profile import-current main
prodex profile remove main";
const CLI_LOGIN_AFTER_HELP: &str = "\
Examples:
prodex login
prodex login --profile main
prodex login --device-auth";
const CLI_QUOTA_AFTER_HELP: &str = "\
Best practice:
Use `prodex quota --all --detail` for the clearest live quota view across profiles.
Examples:
prodex quota
prodex quota --profile main --detail
prodex quota --all --detail
prodex quota --all --once
prodex quota --raw --profile main";
const CLI_RUN_AFTER_HELP: &str = "\
Examples:
prodex
prodex run
prodex exec \"review this repo\"
prodex run --profile main
prodex run exec \"review this repo\"
prodex run 019c9e3d-45a0-7ad0-a6ee-b194ac2d44f9
Notes:
Auto-rotate is enabled by default.
Bare `prodex <args>` is treated as `prodex run <args>`.
A lone session id is forwarded as `codex resume <session-id>`.";
const CLI_CLAUDE_AFTER_HELP: &str = "\
Examples:
prodex claude --print \"summarize this repo\"
prodex claude --profile main --print \"review the latest changes\"
prodex claude --skip-quota-check -- --help
Notes:
Prodex injects a local Anthropic-compatible proxy via `ANTHROPIC_BASE_URL`.
Use `PRODEX_CLAUDE_BIN` to point prodex at a specific Claude Code binary.
Claude defaults to the current Codex model from `config.toml` when available.
Use `PRODEX_CLAUDE_MODEL` to override the upstream Responses model mapping.
Use `PRODEX_CLAUDE_REASONING_EFFORT` to force the upstream Responses reasoning effort.";
const CLI_DOCTOR_AFTER_HELP: &str = "\
Examples:
prodex doctor
prodex doctor --quota
prodex doctor --runtime
prodex doctor --runtime --json";
const CLI_AUDIT_AFTER_HELP: &str = "\
Examples:
prodex audit
prodex audit --tail 50
prodex audit --component profile --action use
prodex audit --json";
const CLI_CLEANUP_AFTER_HELP: &str = "\
Examples:
prodex cleanup";
const SHARED_CODEX_DIR_NAMES: &[&str] = &[
"sessions",
"archived_sessions",
"shell_snapshots",
"memories",
"rules",
"skills",
];
const SHARED_CODEX_FILE_NAMES: &[&str] = &["history.jsonl", "config.toml"];
const SHARED_CODEX_SQLITE_PREFIXES: &[&str] = &["state_", "logs_"];
const SHARED_CODEX_SQLITE_SUFFIXES: &[&str] = &[".sqlite", ".sqlite-shm", ".sqlite-wal"];
static STATE_SAVE_SEQUENCE: AtomicU64 = AtomicU64::new(0);
static RUNTIME_PROXY_LOG_SEQUENCE: AtomicU64 = AtomicU64::new(0);
static RUNTIME_STATE_SAVE_QUEUE: OnceLock<Arc<RuntimeStateSaveQueue>> = OnceLock::new();
static RUNTIME_CONTINUATION_JOURNAL_SAVE_QUEUE: OnceLock<Arc<RuntimeContinuationJournalSaveQueue>> =
OnceLock::new();
static RUNTIME_PROBE_REFRESH_QUEUE: OnceLock<Arc<RuntimeProbeRefreshQueue>> = OnceLock::new();
static RUNTIME_PERSISTENCE_MODE_BY_LOG_PATH: OnceLock<Mutex<BTreeMap<PathBuf, bool>>> =
OnceLock::new();
static RUNTIME_BROKER_METADATA_BY_LOG_PATH: OnceLock<
Mutex<BTreeMap<PathBuf, RuntimeBrokerMetadata>>,
> = OnceLock::new();
fn runtime_proxy_log_dir() -> PathBuf {
env::var_os("PRODEX_RUNTIME_LOG_DIR")
.filter(|value| !value.is_empty())
.map(PathBuf::from)
.or_else(|| runtime_policy_runtime().and_then(|policy| policy.log_dir))
.unwrap_or_else(env::temp_dir)
}
fn runtime_proxy_log_format() -> RuntimeLogFormat {
env::var("PRODEX_RUNTIME_LOG_FORMAT")
.ok()
.and_then(|value| RuntimeLogFormat::parse(&value))
.or_else(|| runtime_policy_runtime().and_then(|policy| policy.log_format))
.unwrap_or(RuntimeLogFormat::Text)
}
fn create_runtime_proxy_log_path() -> PathBuf {
let millis = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis();
let sequence = RUNTIME_PROXY_LOG_SEQUENCE.fetch_add(1, Ordering::Relaxed);
let dir = runtime_proxy_log_dir();
let _ = fs::create_dir_all(&dir);
dir.join(format!(
"{RUNTIME_PROXY_LOG_FILE_PREFIX}-{}-{millis}-{sequence}.log",
std::process::id()
))
}
fn runtime_proxy_latest_log_pointer_path() -> PathBuf {
runtime_proxy_log_dir().join(RUNTIME_PROXY_LATEST_LOG_POINTER)
}
fn initialize_runtime_proxy_log_path() -> PathBuf {
cleanup_runtime_proxy_log_housekeeping();
let log_path = create_runtime_proxy_log_path();
let _ = fs::write(
runtime_proxy_latest_log_pointer_path(),
format!("{}\n", log_path.display()),
);
runtime_proxy_log_to_path(
&log_path,
&format!(
"runtime proxy log initialized pid={} cwd={}",
std::process::id(),
std::env::current_dir()
.ok()
.map(|path| path.display().to_string())
.unwrap_or_else(|| "<unknown>".to_string())
),
);
log_path
}
fn runtime_proxy_worker_count() -> usize {
let parallelism = thread::available_parallelism()
.map(|count| count.get())
.unwrap_or(4);
usize_override_with_policy(
"PRODEX_RUNTIME_PROXY_WORKER_COUNT",
runtime_policy_proxy().and_then(|policy| policy.worker_count),
(parallelism.saturating_mul(4)).clamp(8, 32),
)
.clamp(1, 64)
}
fn runtime_proxy_long_lived_worker_count_default(parallelism: usize) -> usize {
parallelism.saturating_mul(8).clamp(32, 128)
}
fn runtime_proxy_long_lived_worker_count() -> usize {
let parallelism = thread::available_parallelism()
.map(|count| count.get())
.unwrap_or(4);
usize_override_with_policy(
"PRODEX_RUNTIME_PROXY_LONG_LIVED_WORKER_COUNT",
runtime_policy_proxy().and_then(|policy| policy.long_lived_worker_count),
runtime_proxy_long_lived_worker_count_default(parallelism),
)
.clamp(1, 256)
}
fn runtime_probe_refresh_worker_count() -> usize {
let parallelism = thread::available_parallelism()
.map(|count| count.get())
.unwrap_or(4);
usize_override_with_policy(
"PRODEX_RUNTIME_PROBE_REFRESH_WORKER_COUNT",
runtime_policy_proxy().and_then(|policy| policy.probe_refresh_worker_count),
parallelism.clamp(2, 4),
)
.clamp(1, 8)
}
fn runtime_proxy_async_worker_count_default(parallelism: usize) -> usize {
parallelism.saturating_mul(2).clamp(2, 8)
}
fn runtime_proxy_async_worker_count() -> usize {
let parallelism = thread::available_parallelism()
.map(|count| count.get())
.unwrap_or(4);
usize_override_with_policy(
"PRODEX_RUNTIME_PROXY_ASYNC_WORKER_COUNT",
runtime_policy_proxy().and_then(|policy| policy.async_worker_count),
runtime_proxy_async_worker_count_default(parallelism),
)
.clamp(2, 8)
}
fn runtime_proxy_long_lived_queue_capacity(worker_count: usize) -> usize {
let default_capacity = worker_count.saturating_mul(8).clamp(128, 1024);
usize_override_with_policy(
"PRODEX_RUNTIME_PROXY_LONG_LIVED_QUEUE_CAPACITY",
runtime_policy_proxy().and_then(|policy| policy.long_lived_queue_capacity),
default_capacity,
)
.max(1)
}
fn runtime_proxy_active_request_limit_default(
worker_count: usize,
long_lived_worker_count: usize,
) -> usize {
worker_count
.saturating_add(long_lived_worker_count.saturating_mul(3))
.clamp(64, 512)
}
fn runtime_proxy_active_request_limit(
worker_count: usize,
long_lived_worker_count: usize,
) -> usize {
usize_override_with_policy(
"PRODEX_RUNTIME_PROXY_ACTIVE_REQUEST_LIMIT",
runtime_policy_proxy().and_then(|policy| policy.active_request_limit),
runtime_proxy_active_request_limit_default(worker_count, long_lived_worker_count),
)
.max(1)
}
#[derive(Debug, Clone, Copy)]
struct RuntimeProxyLaneLimits {
responses: usize,
compact: usize,
websocket: usize,
standard: usize,
}
#[derive(Debug, Clone)]
struct RuntimeProxyLaneAdmission {
responses_active: Arc<AtomicUsize>,
compact_active: Arc<AtomicUsize>,
websocket_active: Arc<AtomicUsize>,
standard_active: Arc<AtomicUsize>,
wait: Arc<(Mutex<()>, Condvar)>,
inflight_release_revision: Arc<AtomicU64>,
limits: RuntimeProxyLaneLimits,
}
impl RuntimeProxyLaneAdmission {
fn new(limits: RuntimeProxyLaneLimits) -> Self {
Self {
responses_active: Arc::new(AtomicUsize::new(0)),
compact_active: Arc::new(AtomicUsize::new(0)),
websocket_active: Arc::new(AtomicUsize::new(0)),
standard_active: Arc::new(AtomicUsize::new(0)),
wait: Arc::new((Mutex::new(()), Condvar::new())),
inflight_release_revision: Arc::new(AtomicU64::new(0)),
limits,
}
}
fn active_counter(&self, lane: RuntimeRouteKind) -> Arc<AtomicUsize> {
match lane {
RuntimeRouteKind::Responses => Arc::clone(&self.responses_active),
RuntimeRouteKind::Compact => Arc::clone(&self.compact_active),
RuntimeRouteKind::Websocket => Arc::clone(&self.websocket_active),
RuntimeRouteKind::Standard => Arc::clone(&self.standard_active),
}
}
fn limit(&self, lane: RuntimeRouteKind) -> usize {
match lane {
RuntimeRouteKind::Responses => self.limits.responses,
RuntimeRouteKind::Compact => self.limits.compact,
RuntimeRouteKind::Websocket => self.limits.websocket,
RuntimeRouteKind::Standard => self.limits.standard,
}
}
}
fn runtime_proxy_lane_limits(
global_limit: usize,
worker_count: usize,
long_lived_worker_count: usize,
) -> RuntimeProxyLaneLimits {
let global_limit = global_limit.max(1);
RuntimeProxyLaneLimits {
responses: usize_override_with_policy(
"PRODEX_RUNTIME_PROXY_RESPONSES_ACTIVE_LIMIT",
runtime_policy_proxy().and_then(|policy| policy.responses_active_limit),
(global_limit.saturating_mul(3) / 4).clamp(4, global_limit),
)
.min(global_limit)
.max(1),
compact: usize_override_with_policy(
"PRODEX_RUNTIME_PROXY_COMPACT_ACTIVE_LIMIT",
runtime_policy_proxy().and_then(|policy| policy.compact_active_limit),
(global_limit / 4).clamp(2, 6).min(global_limit),
)
.min(global_limit)
.max(1),
websocket: usize_override_with_policy(
"PRODEX_RUNTIME_PROXY_WEBSOCKET_ACTIVE_LIMIT",
runtime_policy_proxy().and_then(|policy| policy.websocket_active_limit),
long_lived_worker_count.clamp(2, global_limit),
)
.min(global_limit)
.max(1),
standard: usize_override_with_policy(
"PRODEX_RUNTIME_PROXY_STANDARD_ACTIVE_LIMIT",
runtime_policy_proxy().and_then(|policy| policy.standard_active_limit),
(worker_count / 2).clamp(2, 8).min(global_limit),
)
.min(global_limit)
.max(1),
}
}
fn runtime_proxy_log_fields(message: &str) -> BTreeMap<String, String> {
let mut fields = BTreeMap::new();
for token in message.split_whitespace() {
let Some((key, value)) = token.split_once('=') else {
continue;
};
if key.is_empty() || value.is_empty() {
continue;
}
fields.insert(key.to_string(), value.trim_matches('"').to_string());
}
fields
}
fn runtime_proxy_log_event(message: &str) -> Option<&str> {
message
.split_whitespace()
.find(|token| !token.contains('='))
.filter(|token| !token.is_empty())
}
fn runtime_proxy_log_to_path(log_path: &Path, message: &str) {
let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S%.3f %:z");
let sanitized = message.replace(['\r', '\n'], " ");
let line = match runtime_proxy_log_format() {
RuntimeLogFormat::Text => format!("[{timestamp}] {sanitized}\n"),
RuntimeLogFormat::Json => {
let mut value = serde_json::Map::new();
value.insert(
"timestamp".to_string(),
serde_json::Value::String(timestamp.to_string()),
);
value.insert(
"pid".to_string(),
serde_json::Value::Number(std::process::id().into()),
);
value.insert(
"message".to_string(),
serde_json::Value::String(sanitized.clone()),
);
if let Some(event) = runtime_proxy_log_event(&sanitized) {
value.insert(
"event".to_string(),
serde_json::Value::String(event.to_string()),
);
}
let fields = runtime_proxy_log_fields(&sanitized);
if !fields.is_empty() {
value.insert(
"fields".to_string(),
serde_json::Value::Object(
fields
.into_iter()
.map(|(key, value)| (key, serde_json::Value::String(value)))
.collect(),
),
);
}
match serde_json::to_string(&serde_json::Value::Object(value)) {
Ok(serialized) => format!("{serialized}\n"),
Err(_) => format!("[{timestamp}] {sanitized}\n"),
}
}
};
if let Ok(mut file) = fs::OpenOptions::new()
.create(true)
.append(true)
.open(log_path)
{
let _ = file.write_all(line.as_bytes());
let _ = file.flush();
}
}
fn runtime_persistence_mode_by_log_path() -> &'static Mutex<BTreeMap<PathBuf, bool>> {
RUNTIME_PERSISTENCE_MODE_BY_LOG_PATH.get_or_init(|| Mutex::new(BTreeMap::new()))
}
fn register_runtime_proxy_persistence_mode(log_path: &Path, enabled: bool) {
let mut modes = runtime_persistence_mode_by_log_path()
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
modes.insert(log_path.to_path_buf(), enabled);
}
fn unregister_runtime_proxy_persistence_mode(log_path: &Path) {
let mut modes = runtime_persistence_mode_by_log_path()
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
modes.remove(log_path);
}
fn runtime_proxy_persistence_enabled_for_log_path(log_path: &Path) -> bool {
runtime_persistence_mode_by_log_path()
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
.get(log_path)
.copied()
.unwrap_or(true)
}
fn runtime_proxy_persistence_enabled(shared: &RuntimeRotationProxyShared) -> bool {
runtime_proxy_persistence_enabled_for_log_path(&shared.log_path)
}
fn runtime_broker_metadata_by_log_path() -> &'static Mutex<BTreeMap<PathBuf, RuntimeBrokerMetadata>>
{
RUNTIME_BROKER_METADATA_BY_LOG_PATH.get_or_init(|| Mutex::new(BTreeMap::new()))
}
fn register_runtime_broker_metadata(log_path: &Path, metadata: RuntimeBrokerMetadata) {
let mut metadata_by_path = runtime_broker_metadata_by_log_path()
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
metadata_by_path.insert(log_path.to_path_buf(), metadata);
}
fn unregister_runtime_broker_metadata(log_path: &Path) {
let mut metadata_by_path = runtime_broker_metadata_by_log_path()
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
metadata_by_path.remove(log_path);
}
#[allow(dead_code)]
fn runtime_broker_metadata_for_log_path(log_path: &Path) -> Option<RuntimeBrokerMetadata> {
runtime_broker_metadata_by_log_path()
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
.get(log_path)
.cloned()
}
fn schedule_runtime_state_save(
shared: &RuntimeRotationProxyShared,
state: AppState,
continuations: RuntimeContinuationStore,
profile_scores: BTreeMap<String, RuntimeProfileHealth>,
usage_snapshots: BTreeMap<String, RuntimeProfileUsageSnapshot>,
backoffs: RuntimeProfileBackoffs,
paths: AppPaths,
reason: &str,
) {
if !runtime_proxy_persistence_enabled(shared) {
runtime_proxy_log(
shared,
format!(
"state_save_suppressed role=follower reason={reason} path={}",
paths.state_file.display()
),
);
return;
}
let revision = shared.state_save_revision.fetch_add(1, Ordering::SeqCst) + 1;
let queued_at = Instant::now();
let ready_at = queued_at + runtime_state_save_debounce(reason);
let state_profiles = state.profiles.clone();
let journal_continuations = runtime_state_save_reason_requires_continuation_journal(reason)
.then(|| continuations.clone());
if cfg!(test) {
runtime_proxy_log(
shared,
format!(
"state_save_inline revision={} reason={} ready_in_ms={}",
revision,
reason,
ready_at.saturating_duration_since(queued_at).as_millis()
),
);
match save_runtime_state_snapshot_if_latest(
&paths,
&state,
&continuations,
&profile_scores,
&usage_snapshots,
&backoffs,
revision,
&shared.state_save_revision,
) {
Ok(true) => runtime_proxy_log(
shared,
format!(
"state_save_ok revision={} reason={} lag_ms=0",
revision, reason
),
),
Ok(false) => runtime_proxy_log(
shared,
format!(
"state_save_skipped revision={} reason={} lag_ms=0",
revision, reason
),
),
Err(err) => runtime_proxy_log(
shared,
format!(
"state_save_error revision={} reason={} lag_ms=0 stage=write error={err:#}",
revision, reason
),
),
}
if let Some(continuations) = journal_continuations {
schedule_runtime_continuation_journal_save(
shared,
continuations,
state.profiles.clone(),
paths,
reason,
);
}
return;
}
let queue = runtime_state_save_queue();
let mut pending = queue
.pending
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
pending.insert(
paths.state_file.clone(),
RuntimeStateSaveJob {
payload: RuntimeStateSavePayload::Snapshot(RuntimeStateSaveSnapshot {
paths: paths.clone(),
state,
continuations,
profile_scores,
usage_snapshots,
backoffs,
}),
revision,
latest_revision: Arc::clone(&shared.state_save_revision),
log_path: shared.log_path.clone(),
reason: reason.to_string(),
queued_at,
ready_at,
},
);
let backlog = pending.len().saturating_sub(1);
drop(pending);
queue.wake.notify_one();
runtime_proxy_log(
shared,
format!(
"state_save_queued revision={} reason={} backlog={} ready_in_ms={}",
revision,
reason,
backlog,
ready_at.saturating_duration_since(queued_at).as_millis()
),
);
if runtime_proxy_queue_pressure_active(backlog, 0, 0) {
runtime_proxy_log(
shared,
format!(
"state_save_queue_backpressure revision={} reason={} backlog={backlog}",
revision, reason
),
);
}
if let Some(continuations) = journal_continuations {
schedule_runtime_continuation_journal_save(
shared,
continuations,
state_profiles,
paths,
reason,
);
}
}
fn runtime_state_save_snapshot_from_runtime(
runtime: &RuntimeRotationState,
) -> RuntimeStateSaveSnapshot {
RuntimeStateSaveSnapshot {
paths: runtime.paths.clone(),
state: runtime.state.clone(),
continuations: runtime_continuation_store_snapshot(runtime),
profile_scores: runtime.profile_health.clone(),
usage_snapshots: runtime.profile_usage_snapshots.clone(),
backoffs: runtime_profile_backoffs_snapshot(runtime),
}
}
fn runtime_state_save_snapshot_from_shared(
shared: &RuntimeRotationProxyShared,
) -> Result<RuntimeStateSaveSnapshot> {
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
Ok(runtime_state_save_snapshot_from_runtime(&runtime))
}
fn schedule_runtime_state_save_from_runtime(
shared: &RuntimeRotationProxyShared,
runtime: &RuntimeRotationState,
reason: &str,
) {
if !runtime_proxy_persistence_enabled(shared) {
return;
}
if cfg!(test) {
schedule_runtime_state_save(
shared,
runtime.state.clone(),
runtime_continuation_store_snapshot(runtime),
runtime.profile_health.clone(),
runtime.profile_usage_snapshots.clone(),
runtime_profile_backoffs_snapshot(runtime),
runtime.paths.clone(),
reason,
);
return;
}
let revision = shared.state_save_revision.fetch_add(1, Ordering::SeqCst) + 1;
let queued_at = Instant::now();
let ready_at = queued_at + runtime_state_save_debounce(reason);
let queue = runtime_state_save_queue();
let mut pending = queue
.pending
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
pending.insert(
runtime.paths.state_file.clone(),
RuntimeStateSaveJob {
payload: RuntimeStateSavePayload::Live(shared.clone()),
revision,
latest_revision: Arc::clone(&shared.state_save_revision),
log_path: shared.log_path.clone(),
reason: reason.to_string(),
queued_at,
ready_at,
},
);
let backlog = pending.len().saturating_sub(1);
drop(pending);
queue.wake.notify_one();
runtime_proxy_log(
shared,
format!(
"state_save_queued revision={} reason={} backlog={} ready_in_ms={}",
revision,
reason,
backlog,
ready_at.saturating_duration_since(queued_at).as_millis()
),
);
if runtime_proxy_queue_pressure_active(backlog, 0, 0) {
runtime_proxy_log(
shared,
format!(
"state_save_queue_backpressure revision={} reason={} backlog={backlog}",
revision, reason
),
);
}
if runtime_state_save_reason_requires_continuation_journal(reason) {
schedule_runtime_continuation_journal_save_from_runtime(shared, runtime, reason);
}
}
fn schedule_runtime_continuation_journal_save_from_runtime(
shared: &RuntimeRotationProxyShared,
runtime: &RuntimeRotationState,
reason: &str,
) {
if !runtime_proxy_persistence_enabled(shared) {
return;
}
if cfg!(test) {
schedule_runtime_continuation_journal_save(
shared,
runtime_continuation_store_snapshot(runtime),
runtime.state.profiles.clone(),
runtime.paths.clone(),
reason,
);
return;
}
let queue = runtime_continuation_journal_save_queue();
let journal_path = runtime_continuation_journal_file_path(&runtime.paths);
let queued_at = Instant::now();
let ready_at = queued_at + runtime_continuation_journal_save_debounce(reason);
let mut pending = queue
.pending
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
pending.insert(
journal_path,
RuntimeContinuationJournalSaveJob {
payload: RuntimeContinuationJournalSavePayload::Live(shared.clone()),
log_path: shared.log_path.clone(),
reason: reason.to_string(),
saved_at: Local::now().timestamp(),
queued_at,
ready_at,
},
);
let backlog = pending.len().saturating_sub(1);
drop(pending);
queue.wake.notify_one();
runtime_proxy_log(
shared,
format!(
"continuation_journal_save_queued reason={} backlog={} ready_in_ms={}",
reason,
backlog,
ready_at.saturating_duration_since(queued_at).as_millis()
),
);
if runtime_proxy_queue_pressure_active(0, backlog, 0) {
runtime_proxy_log(
shared,
format!(
"continuation_journal_queue_backpressure reason={} backlog={backlog}",
reason
),
);
}
}
fn runtime_state_save_queue() -> Arc<RuntimeStateSaveQueue> {
Arc::clone(RUNTIME_STATE_SAVE_QUEUE.get_or_init(|| {
let queue = Arc::new(RuntimeStateSaveQueue {
pending: Mutex::new(BTreeMap::new()),
wake: Condvar::new(),
active: Arc::new(AtomicUsize::new(0)),
});
let worker_queue = Arc::clone(&queue);
thread::spawn(move || runtime_state_save_worker_loop(worker_queue));
queue
}))
}
fn runtime_continuation_journal_save_queue() -> Arc<RuntimeContinuationJournalSaveQueue> {
Arc::clone(RUNTIME_CONTINUATION_JOURNAL_SAVE_QUEUE.get_or_init(|| {
let queue = Arc::new(RuntimeContinuationJournalSaveQueue {
pending: Mutex::new(BTreeMap::new()),
wake: Condvar::new(),
active: Arc::new(AtomicUsize::new(0)),
});
let worker_queue = Arc::clone(&queue);
thread::spawn(move || runtime_continuation_journal_save_worker_loop(worker_queue));
queue
}))
}
fn runtime_probe_refresh_queue() -> Arc<RuntimeProbeRefreshQueue> {
Arc::clone(RUNTIME_PROBE_REFRESH_QUEUE.get_or_init(|| {
let queue = Arc::new(RuntimeProbeRefreshQueue {
pending: Mutex::new(BTreeMap::new()),
wake: Condvar::new(),
active: Arc::new(AtomicUsize::new(0)),
});
for _ in 0..runtime_probe_refresh_worker_count() {
let worker_queue = Arc::clone(&queue);
thread::spawn(move || runtime_probe_refresh_worker_loop(worker_queue));
}
queue
}))
}
fn runtime_state_save_queue_backlog() -> usize {
runtime_state_save_queue()
.pending
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
.len()
}
#[allow(dead_code)]
fn runtime_state_save_queue_active() -> usize {
runtime_state_save_queue().active.load(Ordering::SeqCst)
}
fn runtime_continuation_journal_queue_backlog() -> usize {
runtime_continuation_journal_save_queue()
.pending
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
.len()
}
#[allow(dead_code)]
fn runtime_continuation_journal_queue_active() -> usize {
runtime_continuation_journal_save_queue()
.active
.load(Ordering::SeqCst)
}
fn runtime_probe_refresh_queue_backlog() -> usize {
runtime_probe_refresh_queue()
.pending
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
.len()
}
#[allow(dead_code)]
fn runtime_probe_refresh_queue_active() -> usize {
runtime_probe_refresh_queue().active.load(Ordering::SeqCst)
}
fn runtime_proxy_queue_pressure_active(
state_save_backlog: usize,
continuation_journal_backlog: usize,
probe_refresh_backlog: usize,
) -> bool {
state_save_backlog >= RUNTIME_STATE_SAVE_QUEUE_PRESSURE_THRESHOLD
|| continuation_journal_backlog >= RUNTIME_CONTINUATION_JOURNAL_QUEUE_PRESSURE_THRESHOLD
|| probe_refresh_backlog >= RUNTIME_PROBE_REFRESH_QUEUE_PRESSURE_THRESHOLD
}
fn schedule_runtime_continuation_journal_save(
shared: &RuntimeRotationProxyShared,
continuations: RuntimeContinuationStore,
profiles: BTreeMap<String, ProfileEntry>,
paths: AppPaths,
reason: &str,
) {
if !runtime_proxy_persistence_enabled(shared) {
runtime_proxy_log(
shared,
format!(
"continuation_journal_save_suppressed role=follower reason={reason} path={}",
runtime_continuation_journal_file_path(&paths).display()
),
);
return;
}
if cfg!(test) {
runtime_proxy_log(
shared,
format!("continuation_journal_save_inline reason={reason} backlog=0"),
);
let saved_at = Local::now().timestamp();
match save_runtime_continuation_journal_for_profiles(
&paths,
&continuations,
&profiles,
saved_at,
) {
Ok(()) => runtime_proxy_log(
shared,
format!(
"continuation_journal_save_ok saved_at={} reason={} lag_ms=0",
saved_at, reason
),
),
Err(err) => runtime_proxy_log(
shared,
format!(
"continuation_journal_save_error saved_at={} reason={} lag_ms=0 stage=write error={err:#}",
saved_at, reason
),
),
}
return;
}
let queue = runtime_continuation_journal_save_queue();
let journal_path = runtime_continuation_journal_file_path(&paths);
let queued_at = Instant::now();
let ready_at = queued_at + runtime_continuation_journal_save_debounce(reason);
let mut pending = queue
.pending
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
pending.insert(
journal_path,
RuntimeContinuationJournalSaveJob {
payload: RuntimeContinuationJournalSavePayload::Snapshot(
RuntimeContinuationJournalSnapshot {
paths,
continuations,
profiles,
},
),
log_path: shared.log_path.clone(),
reason: reason.to_string(),
saved_at: Local::now().timestamp(),
queued_at,
ready_at,
},
);
let backlog = pending.len().saturating_sub(1);
drop(pending);
queue.wake.notify_one();
runtime_proxy_log(
shared,
format!(
"continuation_journal_save_queued reason={} backlog={} ready_in_ms={}",
reason,
backlog,
ready_at.saturating_duration_since(queued_at).as_millis()
),
);
if runtime_proxy_queue_pressure_active(0, backlog, 0) {
runtime_proxy_log(
shared,
format!(
"continuation_journal_queue_backpressure reason={} backlog={backlog}",
reason
),
);
}
}
fn runtime_continuation_journal_snapshot_from_runtime(
runtime: &RuntimeRotationState,
) -> RuntimeContinuationJournalSnapshot {
RuntimeContinuationJournalSnapshot {
paths: runtime.paths.clone(),
continuations: runtime_continuation_store_snapshot(runtime),
profiles: runtime.state.profiles.clone(),
}
}
fn runtime_continuation_journal_snapshot_from_shared(
shared: &RuntimeRotationProxyShared,
) -> Result<RuntimeContinuationJournalSnapshot> {
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
Ok(runtime_continuation_journal_snapshot_from_runtime(&runtime))
}
fn runtime_state_save_worker_loop(queue: Arc<RuntimeStateSaveQueue>) {
loop {
let jobs = {
let mut pending = queue
.pending
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
while pending.is_empty() {
pending = queue
.wake
.wait(pending)
.unwrap_or_else(|poisoned| poisoned.into_inner());
}
loop {
let now = Instant::now();
let next_ready_at = pending.values().map(|job| job.ready_at).min();
let Some(next_ready_at) = next_ready_at else {
break BTreeMap::new();
};
if next_ready_at <= now {
let due_keys = pending
.iter()
.filter_map(|(key, job)| (job.ready_at <= now).then_some(key.clone()))
.collect::<Vec<_>>();
let mut due = BTreeMap::new();
for key in due_keys {
if let Some(job) = pending.remove(&key) {
due.insert(key, job);
}
}
break due;
}
let wait_for = next_ready_at.saturating_duration_since(now);
let (next_pending, _) = queue
.wake
.wait_timeout(pending, wait_for)
.unwrap_or_else(|poisoned| poisoned.into_inner());
pending = next_pending;
}
};
for (_, job) in jobs {
queue.active.fetch_add(1, Ordering::SeqCst);
let snapshot = match &job.payload {
RuntimeStateSavePayload::Snapshot(snapshot) => Ok(snapshot.clone()),
RuntimeStateSavePayload::Live(shared) => {
runtime_state_save_snapshot_from_shared(shared)
}
};
match snapshot.and_then(|snapshot| {
save_runtime_state_snapshot_if_latest(
&snapshot.paths,
&snapshot.state,
&snapshot.continuations,
&snapshot.profile_scores,
&snapshot.usage_snapshots,
&snapshot.backoffs,
job.revision,
&job.latest_revision,
)
}) {
Ok(true) => runtime_proxy_log_to_path(
&job.log_path,
&format!(
"state_save_ok revision={} reason={} lag_ms={}",
job.revision,
job.reason,
job.queued_at.elapsed().as_millis()
),
),
Ok(false) => runtime_proxy_log_to_path(
&job.log_path,
&format!(
"state_save_skipped revision={} reason={} lag_ms={}",
job.revision,
job.reason,
job.queued_at.elapsed().as_millis()
),
),
Err(err) => runtime_proxy_log_to_path(
&job.log_path,
&format!(
"state_save_error revision={} reason={} lag_ms={} stage=write error={err:#}",
job.revision,
job.reason,
job.queued_at.elapsed().as_millis()
),
),
}
queue.active.fetch_sub(1, Ordering::SeqCst);
}
}
}
fn runtime_continuation_journal_save_worker_loop(queue: Arc<RuntimeContinuationJournalSaveQueue>) {
loop {
let jobs = {
let mut pending = queue
.pending
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
while pending.is_empty() {
pending = queue
.wake
.wait(pending)
.unwrap_or_else(|poisoned| poisoned.into_inner());
}
loop {
match runtime_take_due_scheduled_jobs(&mut pending, Instant::now()) {
RuntimeDueJobs::Due(jobs) => break jobs,
RuntimeDueJobs::Wait(wait_for) => {
let (next_pending, _) = queue
.wake
.wait_timeout(pending, wait_for)
.unwrap_or_else(|poisoned| poisoned.into_inner());
pending = next_pending;
}
}
}
};
for (_, job) in jobs {
queue.active.fetch_add(1, Ordering::SeqCst);
let snapshot = match &job.payload {
RuntimeContinuationJournalSavePayload::Snapshot(snapshot) => Ok(snapshot.clone()),
RuntimeContinuationJournalSavePayload::Live(shared) => {
runtime_continuation_journal_snapshot_from_shared(shared)
}
};
match snapshot.and_then(|snapshot| {
save_runtime_continuation_journal_for_profiles(
&snapshot.paths,
&snapshot.continuations,
&snapshot.profiles,
job.saved_at,
)
}) {
Ok(()) => runtime_proxy_log_to_path(
&job.log_path,
&format!(
"continuation_journal_save_ok saved_at={} reason={} lag_ms={}",
job.saved_at,
job.reason,
job.queued_at.elapsed().as_millis()
),
),
Err(err) => runtime_proxy_log_to_path(
&job.log_path,
&format!(
"continuation_journal_save_error saved_at={} reason={} lag_ms={} stage=write error={err:#}",
job.saved_at,
job.reason,
job.queued_at.elapsed().as_millis()
),
),
}
queue.active.fetch_sub(1, Ordering::SeqCst);
}
}
}
fn runtime_state_save_reason_requires_continuation_journal(reason: &str) -> bool {
[
"response_ids:",
"turn_state:",
"session_id:",
"compact_lineage:",
"compact_lineage_release:",
]
.into_iter()
.any(|prefix| reason.starts_with(prefix))
}
fn runtime_hot_continuation_state_reason(reason: &str) -> bool {
[
"response_ids:",
"previous_response_owner:",
"response_touch:",
"turn_state:",
"turn_state_touch:",
"session_id:",
"session_touch:",
"compact_lineage:",
"compact_lineage_release:",
"compact_session_touch:",
"compact_turn_state_touch:",
]
.into_iter()
.any(|prefix| reason.starts_with(prefix))
}
fn runtime_state_save_debounce(reason: &str) -> Duration {
if runtime_hot_continuation_state_reason(reason) {
Duration::from_millis(RUNTIME_STATE_SAVE_DEBOUNCE_MS)
} else {
Duration::ZERO
}
}
fn runtime_continuation_journal_save_debounce(reason: &str) -> Duration {
if runtime_hot_continuation_state_reason(reason) {
Duration::from_millis(RUNTIME_STATE_SAVE_DEBOUNCE_MS)
} else {
Duration::ZERO
}
}
fn schedule_runtime_probe_refresh(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
codex_home: &Path,
) {
let (state_file, upstream_base_url) = match shared.runtime.lock() {
Ok(runtime) => (
runtime.paths.state_file.clone(),
runtime.upstream_base_url.clone(),
),
Err(_) => return,
};
let queue = runtime_probe_refresh_queue();
let mut pending = queue
.pending
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
if pending.contains_key(&(state_file.clone(), profile_name.to_string())) {
return;
}
let queued_at = Instant::now();
pending.insert(
(state_file.clone(), profile_name.to_string()),
RuntimeProbeRefreshJob {
shared: shared.clone(),
profile_name: profile_name.to_string(),
codex_home: codex_home.to_path_buf(),
upstream_base_url,
queued_at,
},
);
let backlog = pending.len().saturating_sub(1);
drop(pending);
queue.wake.notify_one();
runtime_proxy_log(
shared,
format!(
"profile_probe_refresh_queued profile={profile_name} reason=queued backlog={backlog}"
),
);
if runtime_proxy_queue_pressure_active(0, 0, backlog) {
runtime_proxy_log(
shared,
format!("profile_probe_refresh_backpressure profile={profile_name} backlog={backlog}"),
);
}
}
fn runtime_profiles_needing_startup_probe_refresh(
state: &AppState,
current_profile: &str,
profile_probe_cache: &BTreeMap<String, RuntimeProfileProbeCacheEntry>,
profile_usage_snapshots: &BTreeMap<String, RuntimeProfileUsageSnapshot>,
now: i64,
) -> Vec<String> {
active_profile_selection_order(state, current_profile)
.into_iter()
.filter(|profile_name| {
let probe_fresh = profile_probe_cache.get(profile_name).is_some_and(|entry| {
runtime_profile_probe_cache_freshness(entry, now)
== RuntimeProbeCacheFreshness::Fresh
});
let snapshot_usable = profile_usage_snapshots
.get(profile_name)
.is_some_and(|snapshot| runtime_usage_snapshot_is_usable(snapshot, now));
!probe_fresh && !snapshot_usable
})
.take(RUNTIME_STARTUP_PROBE_WARM_LIMIT)
.collect()
}
fn run_runtime_probe_jobs_inline(
shared: &RuntimeRotationProxyShared,
jobs: Vec<(String, PathBuf)>,
context: &str,
) {
if jobs.is_empty() {
return;
}
let upstream_base_url = match shared.runtime.lock() {
Ok(runtime) => runtime.upstream_base_url.clone(),
Err(_) => return,
};
let probe_reports = map_parallel(jobs, |(profile_name, codex_home)| {
let auth = read_auth_summary(&codex_home);
let result = if auth.quota_compatible {
fetch_usage(&codex_home, Some(upstream_base_url.as_str()))
.map_err(|err| err.to_string())
} else {
Err("auth mode is not quota-compatible".to_string())
};
(profile_name, auth, result)
});
for (profile_name, auth, result) in probe_reports {
let apply_result =
apply_runtime_profile_probe_result(shared, &profile_name, auth, result.clone());
match result {
Ok(_) => runtime_proxy_log(
shared,
if let Err(err) = apply_result {
format!(
"{context}_error profile={} error=state_update:{err:#}",
profile_name
)
} else {
format!("{context}_ok profile={profile_name}")
},
),
Err(err) => runtime_proxy_log(
shared,
format!("{context}_error profile={} error={err}", profile_name),
),
}
}
}
fn schedule_runtime_startup_probe_warmup(shared: &RuntimeRotationProxyShared) {
if runtime_proxy_pressure_mode_active(shared) {
runtime_proxy_log(
shared,
"startup_probe_warmup deferred reason=local_pressure",
);
return;
}
let (state, current_profile, profile_probe_cache, profile_usage_snapshots) =
match shared.runtime.lock() {
Ok(runtime) => (
runtime.state.clone(),
runtime.current_profile.clone(),
runtime.profile_probe_cache.clone(),
runtime.profile_usage_snapshots.clone(),
),
Err(_) => return,
};
let refresh_profiles = runtime_profiles_needing_startup_probe_refresh(
&state,
¤t_profile,
&profile_probe_cache,
&profile_usage_snapshots,
Local::now().timestamp(),
);
if refresh_profiles.is_empty() {
return;
}
let refresh_jobs = refresh_profiles
.into_iter()
.filter_map(|profile_name| {
let profile = state.profiles.get(&profile_name)?;
read_auth_summary(&profile.codex_home)
.quota_compatible
.then(|| (profile_name, profile.codex_home.clone()))
})
.collect::<Vec<_>>();
if refresh_jobs.is_empty() {
return;
}
let sync_limit = runtime_startup_sync_probe_warm_limit();
let sync_count = if cfg!(test) {
refresh_jobs.len()
} else {
sync_limit.min(refresh_jobs.len())
};
if sync_count > 0 {
let sync_jobs = refresh_jobs
.iter()
.take(sync_count)
.cloned()
.collect::<Vec<_>>();
runtime_proxy_log(
shared,
format!(
"startup_probe_warmup sync={} profiles={}",
sync_jobs.len(),
sync_jobs
.iter()
.map(|(profile_name, _)| profile_name.as_str())
.collect::<Vec<_>>()
.join(",")
),
);
run_runtime_probe_jobs_inline(shared, sync_jobs, "startup_probe_warmup");
}
if cfg!(test) {
return;
}
let async_jobs = refresh_jobs
.into_iter()
.skip(sync_count)
.collect::<Vec<_>>();
if async_jobs.is_empty() {
return;
}
runtime_proxy_log(
shared,
format!(
"startup_probe_warmup queued={} profiles={}",
async_jobs.len(),
async_jobs
.iter()
.map(|(profile_name, _)| profile_name.as_str())
.collect::<Vec<_>>()
.join(",")
),
);
for (profile_name, codex_home) in async_jobs {
schedule_runtime_probe_refresh(shared, &profile_name, &codex_home);
}
}
fn apply_runtime_profile_probe_result(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
auth: AuthSummary,
result: std::result::Result<UsageResponse, String>,
) -> Result<()> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
runtime.profile_probe_cache.insert(
profile_name.to_string(),
RuntimeProfileProbeCacheEntry {
checked_at: now,
auth,
result: result.clone(),
},
);
let Ok(usage) = result else {
return Ok(());
};
let snapshot = runtime_profile_usage_snapshot_from_usage(&usage);
let previous_snapshot = runtime.profile_usage_snapshots.get(profile_name).cloned();
let previous_retry_backoff = runtime
.profile_retry_backoff_until
.get(profile_name)
.copied();
let quota_summary =
runtime_quota_summary_from_usage_snapshot(&snapshot, RuntimeRouteKind::Responses);
let blocking_reset_at =
runtime_quota_summary_blocking_reset_at(quota_summary, RuntimeRouteKind::Responses)
.filter(|reset_at| *reset_at > now);
let quarantine_until =
runtime_quota_precommit_guard_reason(quota_summary, RuntimeRouteKind::Responses).map(
|_| {
blocking_reset_at.unwrap_or_else(|| {
now.saturating_add(RUNTIME_PROFILE_QUOTA_QUARANTINE_FALLBACK_SECONDS)
})
},
);
let mut quarantine_applied = None;
if let Some(until) = quarantine_until {
let next_until = runtime
.profile_retry_backoff_until
.get(profile_name)
.copied()
.unwrap_or(until)
.max(until);
runtime
.profile_retry_backoff_until
.insert(profile_name.to_string(), next_until);
quarantine_applied = Some(next_until);
}
let snapshot_should_persist =
runtime_profile_usage_snapshot_should_persist(previous_snapshot.as_ref(), &snapshot, now);
let retry_backoff_changed = runtime
.profile_retry_backoff_until
.get(profile_name)
.copied()
!= previous_retry_backoff;
runtime
.profile_usage_snapshots
.insert(profile_name.to_string(), snapshot);
if quota_summary.route_band == RuntimeQuotaPressureBand::Exhausted {
runtime_proxy_log(
shared,
format!(
"quota_probe_exhausted profile={profile_name} reason=usage_snapshot_exhausted {}",
runtime_quota_summary_log_fields(quota_summary)
),
);
}
if let Some(until) = quarantine_applied
&& retry_backoff_changed
{
runtime_proxy_log(
shared,
format!(
"profile_quota_quarantine profile={profile_name} route={} until={} reset_at={} message=probe_snapshot",
runtime_route_kind_label(RuntimeRouteKind::Responses),
until,
blocking_reset_at.unwrap_or(i64::MAX),
),
);
runtime_proxy_log(
shared,
format!("profile_retry_backoff profile={profile_name} until={until}"),
);
}
if snapshot_should_persist || retry_backoff_changed {
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("usage_snapshot:{profile_name}"),
);
}
Ok(())
}
fn runtime_profile_usage_snapshot_materially_matches(
previous: &RuntimeProfileUsageSnapshot,
next: &RuntimeProfileUsageSnapshot,
) -> bool {
previous.five_hour_status == next.five_hour_status
&& previous.five_hour_remaining_percent == next.five_hour_remaining_percent
&& previous.five_hour_reset_at == next.five_hour_reset_at
&& previous.weekly_status == next.weekly_status
&& previous.weekly_remaining_percent == next.weekly_remaining_percent
&& previous.weekly_reset_at == next.weekly_reset_at
}
fn runtime_profile_usage_snapshot_should_persist(
previous: Option<&RuntimeProfileUsageSnapshot>,
next: &RuntimeProfileUsageSnapshot,
now: i64,
) -> bool {
let Some(previous) = previous else {
return true;
};
!runtime_profile_usage_snapshot_materially_matches(previous, next)
|| runtime_binding_touch_should_persist(previous.checked_at, now)
}
fn runtime_probe_refresh_worker_loop(queue: Arc<RuntimeProbeRefreshQueue>) {
loop {
let jobs = {
let mut pending = queue
.pending
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
while pending.is_empty() {
pending = queue
.wake
.wait(pending)
.unwrap_or_else(|poisoned| poisoned.into_inner());
}
std::mem::take(&mut *pending)
};
for (_, job) in jobs {
queue.active.fetch_add(1, Ordering::SeqCst);
runtime_proxy_log(
&job.shared,
format!("profile_probe_refresh_start profile={}", job.profile_name),
);
let auth = read_auth_summary(&job.codex_home);
let result = if auth.quota_compatible {
fetch_usage(&job.codex_home, Some(job.upstream_base_url.as_str()))
.map_err(|err| err.to_string())
} else {
Err("auth mode is not quota-compatible".to_string())
};
let apply_result = apply_runtime_profile_probe_result(
&job.shared,
&job.profile_name,
auth,
result.clone(),
);
match result {
Ok(_) => runtime_proxy_log(
&job.shared,
if let Err(err) = apply_result {
format!(
"profile_probe_refresh_error profile={} lag_ms={} error=state_update:{err:#}",
job.profile_name,
job.queued_at.elapsed().as_millis()
)
} else {
format!(
"profile_probe_refresh_ok profile={} lag_ms={}",
job.profile_name,
job.queued_at.elapsed().as_millis()
)
},
),
Err(err) => runtime_proxy_log(
&job.shared,
format!(
"profile_probe_refresh_error profile={} lag_ms={} error={err}",
job.profile_name,
job.queued_at.elapsed().as_millis()
),
),
}
queue.active.fetch_sub(1, Ordering::SeqCst);
}
}
}
#[derive(Debug)]
struct JsonFileLock {
file: fs::File,
}
impl Drop for JsonFileLock {
fn drop(&mut self) {
let _ = self.file.unlock();
}
}
fn merge_last_run_selection(
existing: &BTreeMap<String, i64>,
incoming: &BTreeMap<String, i64>,
profiles: &BTreeMap<String, ProfileEntry>,
) -> BTreeMap<String, i64> {
let mut merged = existing.clone();
for (profile_name, timestamp) in incoming {
merged
.entry(profile_name.clone())
.and_modify(|current| *current = (*current).max(*timestamp))
.or_insert(*timestamp);
}
merged.retain(|profile_name, _| profiles.contains_key(profile_name));
merged
}
fn prune_last_run_selection(
selections: &mut BTreeMap<String, i64>,
profiles: &BTreeMap<String, ProfileEntry>,
now: i64,
) {
let oldest_allowed = now.saturating_sub(APP_STATE_LAST_RUN_RETENTION_SECONDS);
selections.retain(|profile_name, timestamp| {
profiles.contains_key(profile_name) && *timestamp >= oldest_allowed
});
}
fn merge_profile_bindings(
existing: &BTreeMap<String, ResponseProfileBinding>,
incoming: &BTreeMap<String, ResponseProfileBinding>,
profiles: &BTreeMap<String, ProfileEntry>,
) -> BTreeMap<String, ResponseProfileBinding> {
let mut merged = existing.clone();
for (response_id, binding) in incoming {
let should_replace = merged
.get(response_id)
.is_none_or(|current| current.bound_at <= binding.bound_at);
if should_replace {
merged.insert(response_id.clone(), binding.clone());
}
}
merged.retain(|_, binding| profiles.contains_key(&binding.profile_name));
merged
}
fn runtime_continuation_binding_lifecycle_rank(state: RuntimeContinuationBindingLifecycle) -> u8 {
match state {
RuntimeContinuationBindingLifecycle::Dead => 0,
RuntimeContinuationBindingLifecycle::Suspect => 1,
RuntimeContinuationBindingLifecycle::Warm => 2,
RuntimeContinuationBindingLifecycle::Verified => 3,
}
}
fn runtime_continuation_status_evidence_sort_key(
status: &RuntimeContinuationBindingStatus,
) -> (u8, u32, u32, u32, u8, i64, i64, i64) {
(
runtime_continuation_binding_lifecycle_rank(status.state),
status.confidence.min(RUNTIME_CONTINUATION_CONFIDENCE_MAX),
status.success_count,
u32::MAX.saturating_sub(status.not_found_streak),
if status.last_verified_route.is_some() {
1
} else {
0
},
status.last_verified_at.unwrap_or(i64::MIN),
status.last_touched_at.unwrap_or(i64::MIN),
status.last_not_found_at.unwrap_or(i64::MIN),
)
}
fn runtime_continuation_status_is_more_evidenced(
candidate: &RuntimeContinuationBindingStatus,
current: &RuntimeContinuationBindingStatus,
) -> bool {
runtime_continuation_status_evidence_sort_key(candidate)
> runtime_continuation_status_evidence_sort_key(current)
}
fn runtime_continuation_status_should_replace(
candidate: &RuntimeContinuationBindingStatus,
current: &RuntimeContinuationBindingStatus,
) -> bool {
match (
runtime_continuation_status_last_event_at(candidate),
runtime_continuation_status_last_event_at(current),
) {
(Some(candidate_at), Some(current_at)) if candidate_at != current_at => {
return candidate_at > current_at;
}
(Some(_), None) => return true,
(None, Some(_)) => return false,
_ => {}
}
match (
runtime_continuation_status_is_terminal(candidate),
runtime_continuation_status_is_terminal(current),
) {
(true, false) => return true,
(false, true) => return false,
_ => {}
}
runtime_continuation_status_is_more_evidenced(candidate, current)
}
fn runtime_continuation_status_last_event_at(
status: &RuntimeContinuationBindingStatus,
) -> Option<i64> {
[
status.last_not_found_at,
status.last_verified_at,
status.last_touched_at,
]
.into_iter()
.flatten()
.max()
}
fn runtime_continuation_status_is_terminal(status: &RuntimeContinuationBindingStatus) -> bool {
status.state == RuntimeContinuationBindingLifecycle::Dead
|| status.not_found_streak >= RUNTIME_CONTINUATION_SUSPECT_NOT_FOUND_STREAK_LIMIT
|| (status.state == RuntimeContinuationBindingLifecycle::Suspect
&& status.confidence == 0
&& status.failure_count > 0)
}
fn runtime_continuation_status_is_stale_verified(
status: &RuntimeContinuationBindingStatus,
now: i64,
) -> bool {
status.state == RuntimeContinuationBindingLifecycle::Verified
&& runtime_continuation_status_last_event_at(status).is_some_and(|last| {
now.saturating_sub(last) >= RUNTIME_CONTINUATION_VERIFIED_STALE_SECONDS
})
}
fn runtime_age_stale_verified_continuation_status(
statuses: &mut RuntimeContinuationStatuses,
kind: RuntimeContinuationBindingKind,
key: &str,
now: i64,
) -> bool {
let Some(status) = runtime_continuation_status_map_mut(statuses, kind).get_mut(key) else {
return false;
};
if !runtime_continuation_status_is_stale_verified(status, now) {
return false;
}
status.state = RuntimeContinuationBindingLifecycle::Warm;
true
}
fn runtime_continuation_status_should_retain_with_binding(
status: &RuntimeContinuationBindingStatus,
now: i64,
) -> bool {
match status.state {
RuntimeContinuationBindingLifecycle::Dead => false,
RuntimeContinuationBindingLifecycle::Verified
| RuntimeContinuationBindingLifecycle::Warm => {
status.confidence > 0
|| status.success_count > 0
|| status.last_verified_at.is_some()
|| status.last_touched_at.is_some()
}
RuntimeContinuationBindingLifecycle::Suspect => {
status.not_found_streak < RUNTIME_CONTINUATION_SUSPECT_NOT_FOUND_STREAK_LIMIT
&& status.confidence > 0
&& status.last_not_found_at.is_some_and(|last| {
now.saturating_sub(last) < RUNTIME_CONTINUATION_SUSPECT_GRACE_SECONDS
})
}
}
}
fn runtime_continuation_status_should_retain_without_binding(
status: &RuntimeContinuationBindingStatus,
now: i64,
) -> bool {
match status.state {
RuntimeContinuationBindingLifecycle::Dead => status
.last_not_found_at
.or(status.last_touched_at)
.is_some_and(|last| now.saturating_sub(last) < RUNTIME_CONTINUATION_DEAD_GRACE_SECONDS),
_ => runtime_continuation_status_should_retain_with_binding(status, now),
}
}
fn runtime_continuation_status_dead_at(status: &RuntimeContinuationBindingStatus) -> Option<i64> {
(status.state == RuntimeContinuationBindingLifecycle::Dead)
.then(|| status.last_not_found_at.or(status.last_touched_at))
.flatten()
}
fn runtime_continuation_dead_status_shadowed_by_binding(
binding: &ResponseProfileBinding,
status: &RuntimeContinuationBindingStatus,
) -> bool {
runtime_continuation_status_dead_at(status).is_some_and(|dead_at| binding.bound_at > dead_at)
}
fn merge_runtime_continuation_status_map(
existing: &BTreeMap<String, RuntimeContinuationBindingStatus>,
incoming: &BTreeMap<String, RuntimeContinuationBindingStatus>,
live_bindings: &BTreeMap<String, ResponseProfileBinding>,
) -> BTreeMap<String, RuntimeContinuationBindingStatus> {
let now = Local::now().timestamp();
let mut merged = existing.clone();
for (key, status) in incoming {
let should_replace = merged
.get(key)
.is_none_or(|current| runtime_continuation_status_should_replace(status, current));
if should_replace {
merged.insert(key.clone(), status.clone());
}
}
merged.retain(|key, status| {
live_bindings.contains_key(key)
|| runtime_continuation_status_should_retain_without_binding(status, now)
});
merged
}
fn merge_runtime_continuation_statuses(
existing: &RuntimeContinuationStatuses,
incoming: &RuntimeContinuationStatuses,
response_bindings: &BTreeMap<String, ResponseProfileBinding>,
turn_state_bindings: &BTreeMap<String, ResponseProfileBinding>,
session_id_bindings: &BTreeMap<String, ResponseProfileBinding>,
) -> RuntimeContinuationStatuses {
RuntimeContinuationStatuses {
response: merge_runtime_continuation_status_map(
&existing.response,
&incoming.response,
response_bindings,
),
turn_state: merge_runtime_continuation_status_map(
&existing.turn_state,
&incoming.turn_state,
turn_state_bindings,
),
session_id: merge_runtime_continuation_status_map(
&existing.session_id,
&incoming.session_id,
session_id_bindings,
),
}
}
fn compact_runtime_continuation_statuses(
statuses: RuntimeContinuationStatuses,
continuations: &RuntimeContinuationStore,
) -> RuntimeContinuationStatuses {
let now = Local::now().timestamp();
let mut merged = merge_runtime_continuation_statuses(
&RuntimeContinuationStatuses::default(),
&statuses,
&continuations.response_profile_bindings,
&continuations.turn_state_bindings,
&continuations.session_id_bindings,
);
merged.response.retain(|key, status| {
if let Some(binding) = continuations.response_profile_bindings.get(key) {
!runtime_continuation_dead_status_shadowed_by_binding(binding, status)
&& runtime_continuation_status_should_retain_with_binding(status, now)
} else {
runtime_continuation_status_should_retain_without_binding(status, now)
}
});
merged.turn_state.retain(|key, status| {
if let Some(binding) = continuations.turn_state_bindings.get(key) {
!runtime_continuation_dead_status_shadowed_by_binding(binding, status)
&& runtime_continuation_status_should_retain_with_binding(status, now)
} else {
runtime_continuation_status_should_retain_without_binding(status, now)
}
});
merged.session_id.retain(|key, status| {
if let Some(binding) = continuations.session_id_bindings.get(key) {
!runtime_continuation_dead_status_shadowed_by_binding(binding, status)
&& runtime_continuation_status_should_retain_with_binding(status, now)
} else {
runtime_continuation_status_should_retain_without_binding(status, now)
}
});
merged
}
fn runtime_continuation_binding_should_retain(
binding: &ResponseProfileBinding,
status: Option<&RuntimeContinuationBindingStatus>,
now: i64,
) -> bool {
match status {
Some(status) if runtime_continuation_dead_status_shadowed_by_binding(binding, status) => {
true
}
Some(status) if runtime_continuation_status_is_terminal(status) => false,
Some(status) => runtime_continuation_status_should_retain_with_binding(status, now),
None => binding.bound_at <= now,
}
}
fn runtime_continuation_binding_retention_sort_key(
binding: &ResponseProfileBinding,
status: Option<&RuntimeContinuationBindingStatus>,
) -> (u8, u32, u32, u32, u8, i64, i64, i64, i64) {
let evidence = status
.map(runtime_continuation_status_evidence_sort_key)
.unwrap_or((0, 0, 0, 0, 0, i64::MIN, i64::MIN, i64::MIN));
(
evidence.0,
evidence.1,
evidence.2,
evidence.3,
evidence.4,
evidence.5,
evidence.6,
evidence.7,
binding.bound_at,
)
}
fn prune_runtime_continuation_response_bindings(
bindings: &mut BTreeMap<String, ResponseProfileBinding>,
statuses: &BTreeMap<String, RuntimeContinuationBindingStatus>,
max_entries: usize,
) {
if bindings.len() <= max_entries {
return;
}
let excess = bindings.len() - max_entries;
let mut coldest = bindings
.iter()
.map(|(response_id, binding)| {
(
response_id.clone(),
runtime_continuation_binding_retention_sort_key(binding, statuses.get(response_id)),
)
})
.collect::<Vec<_>>();
coldest.sort_by_key(|(_, retention)| *retention);
for (response_id, _) in coldest.into_iter().take(excess) {
bindings.remove(&response_id);
}
}
fn prune_profile_bindings_for_housekeeping(
bindings: &mut BTreeMap<String, ResponseProfileBinding>,
profiles: &BTreeMap<String, ProfileEntry>,
now: i64,
retention_seconds: i64,
max_entries: usize,
) {
let oldest_allowed = now.saturating_sub(retention_seconds);
bindings.retain(|_, binding| {
profiles.contains_key(&binding.profile_name) && binding.bound_at >= oldest_allowed
});
prune_profile_bindings(bindings, max_entries);
}
fn prune_profile_bindings_for_housekeeping_without_retention(
bindings: &mut BTreeMap<String, ResponseProfileBinding>,
profiles: &BTreeMap<String, ProfileEntry>,
) {
bindings.retain(|_, binding| profiles.contains_key(&binding.profile_name));
}
fn compact_app_state(mut state: AppState, now: i64) -> AppState {
state.active_profile = state
.active_profile
.filter(|profile_name| state.profiles.contains_key(profile_name));
prune_last_run_selection(&mut state.last_run_selected_at, &state.profiles, now);
prune_profile_bindings_for_housekeeping_without_retention(
&mut state.response_profile_bindings,
&state.profiles,
);
prune_profile_bindings_for_housekeeping(
&mut state.session_profile_bindings,
&state.profiles,
now,
APP_STATE_SESSION_BINDING_RETENTION_SECONDS,
SESSION_ID_PROFILE_BINDING_LIMIT,
);
state
}
fn merge_runtime_state_snapshot(existing: AppState, snapshot: &AppState) -> AppState {
let profiles = if existing.profiles.is_empty() {
snapshot.profiles.clone()
} else {
existing.profiles.clone()
};
let active_profile = snapshot
.active_profile
.clone()
.or(existing.active_profile.clone())
.filter(|profile_name| profiles.contains_key(profile_name));
let merged = AppState {
active_profile,
profiles: profiles.clone(),
last_run_selected_at: merge_last_run_selection(
&existing.last_run_selected_at,
&snapshot.last_run_selected_at,
&profiles,
),
response_profile_bindings: merge_profile_bindings(
&existing.response_profile_bindings,
&snapshot.response_profile_bindings,
&profiles,
),
session_profile_bindings: merge_profile_bindings(
&existing.session_profile_bindings,
&snapshot.session_profile_bindings,
&profiles,
),
};
compact_app_state(merged, Local::now().timestamp())
}
fn read_auth_json_text(codex_home: &Path) -> Result<Option<String>> {
secret_store::SecretManager::new(secret_store::FileSecretBackend::new())
.read_text(&secret_store::auth_json_location(codex_home))
.map_err(anyhow::Error::new)
}
fn probe_auth_secret_revision(codex_home: &Path) -> Result<Option<secret_store::SecretRevision>> {
secret_store::SecretManager::new(secret_store::FileSecretBackend::new())
.probe_revision(&secret_store::auth_json_location(codex_home))
.map_err(anyhow::Error::new)
}
fn load_runtime_profile_usage_auth_cache_entry(
codex_home: &Path,
) -> Result<RuntimeProfileUsageAuthCacheEntry> {
let location = secret_store::auth_json_location(codex_home);
let revision = probe_auth_secret_revision(codex_home)?;
let auth = read_usage_auth(codex_home)?;
Ok(RuntimeProfileUsageAuthCacheEntry {
auth,
location,
revision,
})
}
fn runtime_profile_usage_auth_cache_entry_matches(
entry: &RuntimeProfileUsageAuthCacheEntry,
) -> Result<bool> {
let revision = secret_store::SecretManager::new(secret_store::FileSecretBackend::new())
.probe_revision(&entry.location)
.map_err(anyhow::Error::new)?;
Ok(revision == entry.revision)
}
fn load_runtime_profile_usage_auth_cache(
state: &AppState,
) -> BTreeMap<String, RuntimeProfileUsageAuthCacheEntry> {
state
.profiles
.iter()
.filter_map(|(name, profile)| {
load_runtime_profile_usage_auth_cache_entry(&profile.codex_home)
.ok()
.map(|entry| (name.clone(), entry))
})
.collect()
}
fn runtime_profile_usage_auth(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
) -> Result<UsageAuth> {
let (cached_entry, codex_home) = {
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let profile = runtime
.state
.profiles
.get(profile_name)
.with_context(|| format!("profile '{}' is missing", profile_name))?;
(
runtime.profile_usage_auth.get(profile_name).cloned(),
profile.codex_home.clone(),
)
};
if let Some(entry) = cached_entry {
match runtime_profile_usage_auth_cache_entry_matches(&entry) {
Ok(true) => return Ok(entry.auth),
Ok(false) => {}
Err(_) => return Ok(entry.auth),
}
}
let entry = load_runtime_profile_usage_auth_cache_entry(&codex_home)?;
let auth = entry.auth.clone();
if let Ok(mut runtime) = shared.runtime.lock() {
runtime
.profile_usage_auth
.insert(profile_name.to_string(), entry);
}
Ok(auth)
}
fn runtime_profile_auth_failure_key(profile_name: &str) -> String {
format!("__auth_failure__:{profile_name}")
}
fn runtime_profile_auth_failure_active_from_map(
profile_health: &BTreeMap<String, RuntimeProfileHealth>,
profile_name: &str,
now: i64,
) -> bool {
runtime_profile_effective_score_from_map(
profile_health,
&runtime_profile_auth_failure_key(profile_name),
now,
RUNTIME_PROFILE_AUTH_FAILURE_DECAY_SECONDS,
) > 0
}
fn runtime_profile_auth_failure_active_with_auth_cache(
profile_health: &BTreeMap<String, RuntimeProfileHealth>,
profile_usage_auth: &BTreeMap<String, RuntimeProfileUsageAuthCacheEntry>,
profile_name: &str,
now: i64,
) -> bool {
if !runtime_profile_auth_failure_active_from_map(profile_health, profile_name, now) {
return false;
}
let Some(entry) = profile_usage_auth.get(profile_name) else {
return true;
};
runtime_profile_usage_auth_cache_entry_matches(entry).unwrap_or(true)
}
fn runtime_profile_auth_failure_active(
runtime: &RuntimeRotationState,
profile_name: &str,
now: i64,
) -> bool {
runtime_profile_auth_failure_active_with_auth_cache(
&runtime.profile_health,
&runtime.profile_usage_auth,
profile_name,
now,
)
}
fn runtime_profile_auth_failure_score(status: u16) -> u32 {
match status {
401 => RUNTIME_PROFILE_AUTH_FAILURE_401_SCORE,
_ => RUNTIME_PROFILE_AUTH_FAILURE_403_SCORE,
}
}
fn note_runtime_profile_auth_failure(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
status: u16,
) {
let mut runtime = match shared.runtime.lock() {
Ok(runtime) => runtime,
Err(_) => return,
};
let now = Local::now().timestamp();
let next_score = runtime_profile_effective_score_from_map(
&runtime.profile_health,
&runtime_profile_auth_failure_key(profile_name),
now,
RUNTIME_PROFILE_AUTH_FAILURE_DECAY_SECONDS,
)
.max(runtime_profile_auth_failure_score(status));
runtime.profile_health.insert(
runtime_profile_auth_failure_key(profile_name),
RuntimeProfileHealth {
score: next_score,
updated_at: now,
},
);
runtime_proxy_log(
shared,
format!(
"profile_auth_backoff profile={profile_name} route={} status={} score={} seconds={}",
runtime_route_kind_label(route_kind),
status,
next_score,
RUNTIME_PROFILE_AUTH_FAILURE_DECAY_SECONDS
),
);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("profile_auth_backoff:{profile_name}"),
);
}
fn merge_app_state_for_save(existing: AppState, desired: &AppState) -> AppState {
let active_profile = desired
.active_profile
.clone()
.filter(|profile_name| desired.profiles.contains_key(profile_name));
let merged = AppState {
active_profile,
profiles: desired.profiles.clone(),
last_run_selected_at: merge_last_run_selection(
&existing.last_run_selected_at,
&desired.last_run_selected_at,
&desired.profiles,
),
response_profile_bindings: merge_profile_bindings(
&existing.response_profile_bindings,
&desired.response_profile_bindings,
&desired.profiles,
),
session_profile_bindings: merge_profile_bindings(
&existing.session_profile_bindings,
&desired.session_profile_bindings,
&desired.profiles,
),
};
compact_app_state(merged, Local::now().timestamp())
}
fn runtime_continuations_file_path(paths: &AppPaths) -> PathBuf {
paths.root.join("runtime-continuations.json")
}
fn runtime_continuations_last_good_file_path(paths: &AppPaths) -> PathBuf {
last_good_file_path(&runtime_continuations_file_path(paths))
}
fn runtime_continuation_journal_file_path(paths: &AppPaths) -> PathBuf {
paths.root.join("runtime-continuation-journal.json")
}
fn runtime_continuation_journal_last_good_file_path(paths: &AppPaths) -> PathBuf {
last_good_file_path(&runtime_continuation_journal_file_path(paths))
}
fn runtime_broker_registry_file_path(paths: &AppPaths, broker_key: &str) -> PathBuf {
paths.root.join(format!("runtime-broker-{broker_key}.json"))
}
fn runtime_broker_registry_last_good_file_path(paths: &AppPaths, broker_key: &str) -> PathBuf {
last_good_file_path(&runtime_broker_registry_file_path(paths, broker_key))
}
fn runtime_broker_lease_dir(paths: &AppPaths, broker_key: &str) -> PathBuf {
paths
.root
.join(format!("runtime-broker-{broker_key}-leases"))
}
fn runtime_broker_ensure_lock_path(paths: &AppPaths, broker_key: &str) -> PathBuf {
paths
.root
.join(format!("runtime-broker-{broker_key}-ensure"))
}
fn runtime_broker_registry_keys(paths: &AppPaths) -> Vec<String> {
let Ok(entries) = fs::read_dir(&paths.root) else {
return Vec::new();
};
let mut keys = entries
.flatten()
.filter_map(|entry| {
let name = entry.file_name();
let name = name.to_str()?;
name.strip_prefix("runtime-broker-")
.and_then(|suffix| suffix.strip_suffix(".json"))
.map(str::to_string)
})
.collect::<Vec<_>>();
keys.sort();
keys.dedup();
keys
}
fn update_check_cache_file_path(paths: &AppPaths) -> PathBuf {
paths.root.join("update-check.json")
}
fn runtime_continuation_store_from_app_state(state: &AppState) -> RuntimeContinuationStore {
RuntimeContinuationStore {
response_profile_bindings: state.response_profile_bindings.clone(),
session_profile_bindings: state.session_profile_bindings.clone(),
turn_state_bindings: BTreeMap::new(),
session_id_bindings: runtime_external_session_id_bindings(&state.session_profile_bindings),
statuses: RuntimeContinuationStatuses::default(),
}
}
fn compact_runtime_continuation_store(
mut continuations: RuntimeContinuationStore,
profiles: &BTreeMap<String, ProfileEntry>,
) -> RuntimeContinuationStore {
let now = Local::now().timestamp();
let response_statuses = continuations.statuses.response.clone();
let turn_state_statuses = continuations.statuses.turn_state.clone();
let session_id_statuses = continuations.statuses.session_id.clone();
prune_profile_bindings_for_housekeeping_without_retention(
&mut continuations.response_profile_bindings,
profiles,
);
prune_profile_bindings_for_housekeeping_without_retention(
&mut continuations.session_profile_bindings,
profiles,
);
prune_profile_bindings_for_housekeeping_without_retention(
&mut continuations.turn_state_bindings,
profiles,
);
prune_profile_bindings_for_housekeeping_without_retention(
&mut continuations.session_id_bindings,
profiles,
);
continuations
.response_profile_bindings
.retain(|key, binding| {
runtime_continuation_binding_should_retain(binding, response_statuses.get(key), now)
});
continuations.turn_state_bindings.retain(|key, binding| {
runtime_continuation_binding_should_retain(binding, turn_state_statuses.get(key), now)
});
continuations
.session_profile_bindings
.retain(|key, binding| {
runtime_continuation_binding_should_retain(binding, session_id_statuses.get(key), now)
});
continuations.session_id_bindings.retain(|key, binding| {
runtime_continuation_binding_should_retain(binding, session_id_statuses.get(key), now)
});
prune_runtime_continuation_response_bindings(
&mut continuations.response_profile_bindings,
&response_statuses,
RESPONSE_PROFILE_BINDING_LIMIT,
);
prune_profile_bindings(
&mut continuations.turn_state_bindings,
TURN_STATE_PROFILE_BINDING_LIMIT,
);
prune_profile_bindings(
&mut continuations.session_profile_bindings,
SESSION_ID_PROFILE_BINDING_LIMIT,
);
prune_profile_bindings(
&mut continuations.session_id_bindings,
SESSION_ID_PROFILE_BINDING_LIMIT,
);
let statuses = continuations.statuses.clone();
continuations.statuses = compact_runtime_continuation_statuses(statuses, &continuations);
continuations
}
fn merge_runtime_continuation_store(
existing: &RuntimeContinuationStore,
incoming: &RuntimeContinuationStore,
profiles: &BTreeMap<String, ProfileEntry>,
) -> RuntimeContinuationStore {
compact_runtime_continuation_store(
RuntimeContinuationStore {
response_profile_bindings: merge_profile_bindings(
&existing.response_profile_bindings,
&incoming.response_profile_bindings,
profiles,
),
session_profile_bindings: merge_profile_bindings(
&existing.session_profile_bindings,
&incoming.session_profile_bindings,
profiles,
),
turn_state_bindings: merge_profile_bindings(
&existing.turn_state_bindings,
&incoming.turn_state_bindings,
profiles,
),
session_id_bindings: merge_profile_bindings(
&existing.session_id_bindings,
&incoming.session_id_bindings,
profiles,
),
statuses: merge_runtime_continuation_statuses(
&existing.statuses,
&incoming.statuses,
&merge_profile_bindings(
&existing.response_profile_bindings,
&incoming.response_profile_bindings,
profiles,
),
&merge_profile_bindings(
&existing.turn_state_bindings,
&incoming.turn_state_bindings,
profiles,
),
&merge_profile_bindings(
&existing.session_id_bindings,
&incoming.session_id_bindings,
profiles,
),
),
},
profiles,
)
}
fn load_runtime_continuations_with_recovery(
paths: &AppPaths,
profiles: &BTreeMap<String, ProfileEntry>,
) -> Result<RecoveredLoad<RuntimeContinuationStore>> {
let path = runtime_continuations_file_path(paths);
if !path.exists() && !runtime_continuations_last_good_file_path(paths).exists() {
return Ok(RecoveredLoad {
value: RuntimeContinuationStore::default(),
recovered_from_backup: false,
});
}
let loaded = read_versioned_json_file_with_backup::<RuntimeContinuationStore>(
&path,
&runtime_continuations_last_good_file_path(paths),
)?;
remember_runtime_sidecar_generation(&path, loaded.generation);
Ok(RecoveredLoad {
value: compact_runtime_continuation_store(loaded.value, profiles),
recovered_from_backup: loaded.recovered_from_backup,
})
}
fn save_runtime_continuations_for_profiles(
paths: &AppPaths,
continuations: &RuntimeContinuationStore,
profiles: &BTreeMap<String, ProfileEntry>,
) -> Result<()> {
if runtime_take_fault_injection("PRODEX_RUNTIME_FAULT_CONTINUATIONS_SAVE_ERROR_ONCE") {
bail!("injected runtime continuations save failure");
}
let path = runtime_continuations_file_path(paths);
let compacted = compact_runtime_continuation_store(continuations.clone(), profiles);
save_versioned_json_file_with_fence(
&path,
&runtime_continuations_last_good_file_path(paths),
&compacted,
)?;
Ok(())
}
fn load_runtime_continuation_journal_with_recovery(
paths: &AppPaths,
profiles: &BTreeMap<String, ProfileEntry>,
) -> Result<RecoveredLoad<RuntimeContinuationJournal>> {
let path = runtime_continuation_journal_file_path(paths);
if !path.exists() && !runtime_continuation_journal_last_good_file_path(paths).exists() {
return Ok(RecoveredLoad {
value: RuntimeContinuationJournal::default(),
recovered_from_backup: false,
});
}
let loaded = read_versioned_json_file_with_backup::<RuntimeContinuationJournal>(
&path,
&runtime_continuation_journal_last_good_file_path(paths),
)?;
remember_runtime_sidecar_generation(&path, loaded.generation);
Ok(RecoveredLoad {
value: RuntimeContinuationJournal {
saved_at: loaded.value.saved_at,
continuations: compact_runtime_continuation_store(loaded.value.continuations, profiles),
},
recovered_from_backup: loaded.recovered_from_backup,
})
}
#[cfg_attr(not(test), allow(dead_code))]
fn save_runtime_continuation_journal(
paths: &AppPaths,
continuations: &RuntimeContinuationStore,
saved_at: i64,
) -> Result<()> {
let profiles = AppState::load(paths)
.map(|state| state.profiles)
.unwrap_or_default();
save_runtime_continuation_journal_for_profiles(paths, continuations, &profiles, saved_at)
}
fn save_runtime_continuation_journal_for_profiles(
paths: &AppPaths,
continuations: &RuntimeContinuationStore,
profiles: &BTreeMap<String, ProfileEntry>,
saved_at: i64,
) -> Result<()> {
let incoming = compact_runtime_continuation_store(continuations.clone(), profiles);
for attempt in 0..=RUNTIME_SIDECAR_STALE_SAVE_RETRY_LIMIT {
let existing = load_runtime_continuation_journal_with_recovery(paths, profiles)?;
let journal = RuntimeContinuationJournal {
saved_at: saved_at.max(existing.value.saved_at),
continuations: merge_runtime_continuation_store(
&existing.value.continuations,
&incoming,
profiles,
),
};
match save_versioned_json_file_with_fence(
&runtime_continuation_journal_file_path(paths),
&runtime_continuation_journal_last_good_file_path(paths),
&journal,
) {
Ok(()) => return Ok(()),
Err(err)
if runtime_sidecar_generation_error_is_stale(&err)
&& attempt < RUNTIME_SIDECAR_STALE_SAVE_RETRY_LIMIT =>
{
continue;
}
Err(err) => return Err(err),
}
}
Ok(())
}
fn runtime_profile_route_key_parts<'a>(key: &'a str, prefix: &str) -> Option<(&'a str, &'a str)> {
let rest = key.strip_prefix(prefix)?;
let (route, profile_name) = rest.split_once(':')?;
Some((route, profile_name))
}
fn runtime_profile_transport_backoff_key(
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> String {
format!(
"__route_transport_backoff__:{}:{profile_name}",
runtime_route_kind_label(route_kind)
)
}
fn runtime_profile_transport_backoff_key_parts<'a>(key: &'a str) -> Option<(&'a str, &'a str)> {
runtime_profile_route_key_parts(key, "__route_transport_backoff__:")
}
fn runtime_profile_transport_backoff_profile_name(key: &str) -> &str {
runtime_profile_transport_backoff_key_parts(key)
.map(|(_, profile_name)| profile_name)
.unwrap_or(key)
}
fn runtime_profile_transport_backoff_key_valid(
key: &str,
valid_profiles: &BTreeSet<String>,
) -> bool {
runtime_profile_transport_backoff_key_parts(key)
.map(|(route, profile_name)| {
runtime_route_kind_from_label(route).is_some() && valid_profiles.contains(profile_name)
})
.unwrap_or_else(|| valid_profiles.contains(key))
}
fn runtime_profile_transport_backoff_key_matches_profiles(
key: &str,
profiles: &BTreeMap<String, ProfileEntry>,
) -> bool {
runtime_profile_transport_backoff_key_parts(key)
.map(|(route, profile_name)| {
runtime_route_kind_from_label(route).is_some() && profiles.contains_key(profile_name)
})
.unwrap_or_else(|| profiles.contains_key(key))
}
fn runtime_profile_transport_backoff_until_from_map(
transport_backoff_until: &BTreeMap<String, i64>,
profile_name: &str,
route_kind: RuntimeRouteKind,
now: i64,
) -> Option<i64> {
let route_key = runtime_profile_transport_backoff_key(profile_name, route_kind);
[
transport_backoff_until.get(&route_key).copied(),
transport_backoff_until.get(profile_name).copied(),
]
.into_iter()
.flatten()
.filter(|until| *until > now)
.max()
}
fn runtime_profile_transport_backoff_max_until(
transport_backoff_until: &BTreeMap<String, i64>,
profile_name: &str,
now: i64,
) -> Option<i64> {
transport_backoff_until
.iter()
.filter(|(key, until)| {
runtime_profile_transport_backoff_profile_name(key) == profile_name && **until > now
})
.map(|(_, until)| *until)
.max()
}
fn save_runtime_state_snapshot_if_latest(
paths: &AppPaths,
snapshot: &AppState,
continuations: &RuntimeContinuationStore,
profile_scores: &BTreeMap<String, RuntimeProfileHealth>,
usage_snapshots: &BTreeMap<String, RuntimeProfileUsageSnapshot>,
backoffs: &RuntimeProfileBackoffs,
revision: u64,
latest_revision: &AtomicU64,
) -> Result<bool> {
for attempt in 0..=RUNTIME_SIDECAR_STALE_SAVE_RETRY_LIMIT {
if latest_revision.load(Ordering::SeqCst) != revision {
return Ok(false);
}
let _lock = acquire_state_file_lock(paths)?;
if latest_revision.load(Ordering::SeqCst) != revision {
return Ok(false);
}
let existing = AppState::load(paths)?;
let merged = merge_runtime_state_snapshot(existing, snapshot);
let existing_continuations =
load_runtime_continuations_with_recovery(paths, &merged.profiles)?;
let merged_continuations = merge_runtime_continuation_store(
&existing_continuations.value,
continuations,
&merged.profiles,
);
let mut merged = merged;
merged.response_profile_bindings = merged_continuations.response_profile_bindings.clone();
merged.session_profile_bindings = merged_continuations.session_profile_bindings.clone();
let json =
serde_json::to_string_pretty(&merged).context("failed to serialize prodex state")?;
let existing_scores = load_runtime_profile_scores(paths, &merged.profiles)?;
let merged_scores =
merge_runtime_profile_scores(&existing_scores, profile_scores, &merged.profiles);
let existing_usage_snapshots = load_runtime_usage_snapshots(paths, &merged.profiles)?;
let merged_usage_snapshots = merge_runtime_usage_snapshots(
&existing_usage_snapshots,
usage_snapshots,
&merged.profiles,
);
let existing_backoffs = load_runtime_profile_backoffs(paths, &merged.profiles)?;
let merged_backoffs = merge_runtime_profile_backoffs(
&existing_backoffs,
backoffs,
&merged.profiles,
Local::now().timestamp(),
);
if latest_revision.load(Ordering::SeqCst) != revision {
return Ok(false);
}
let save_result = (|| -> Result<()> {
// Continuations are restored as the stronger source of truth on startup,
// so persist them before the state snapshot to reduce crash windows where
// a newer state file could be overwritten by an older continuation sidecar.
save_runtime_continuations_for_profiles(
paths,
&merged_continuations,
&merged.profiles,
)?;
write_state_json_atomic(paths, &json)?;
save_runtime_profile_scores_for_profiles(paths, &merged_scores, &merged.profiles)?;
save_runtime_usage_snapshots_for_profiles(
paths,
&merged_usage_snapshots,
&merged.profiles,
)?;
save_runtime_profile_backoffs_for_profiles(paths, &merged_backoffs, &merged.profiles)?;
Ok(())
})();
match save_result {
Ok(()) => return Ok(true),
Err(err)
if runtime_sidecar_generation_error_is_stale(&err)
&& attempt < RUNTIME_SIDECAR_STALE_SAVE_RETRY_LIMIT =>
{
continue;
}
Err(err) => return Err(err),
}
}
Ok(false)
}
#[derive(Parser, Debug)]
#[command(
name = "prodex",
version,
about = "Manage multiple Codex profiles backed by isolated CODEX_HOME directories.",
after_help = CLI_TOP_LEVEL_AFTER_HELP
)]
struct Cli {
#[command(subcommand)]
command: Commands,
}
#[derive(Subcommand, Debug)]
enum Commands {
#[command(
subcommand,
about = "Add, inspect, remove, and activate managed profiles.",
after_help = CLI_PROFILE_AFTER_HELP
)]
Profile(ProfileCommands),
#[command(
name = "use",
about = "Set the active profile used by commands that omit --profile."
)]
UseProfile(ProfileSelector),
#[command(about = "Show the active profile and its CODEX_HOME details.")]
Current,
#[command(
name = "info",
about = "Summarize version status, running processes, quota pool, and runway."
)]
Info(InfoArgs),
#[command(
about = "Inspect local state, Codex resolution, quota readiness, and runtime logs.",
after_help = CLI_DOCTOR_AFTER_HELP
)]
Doctor(DoctorArgs),
#[command(
about = "Inspect structured enterprise audit events written to /tmp.",
after_help = CLI_AUDIT_AFTER_HELP
)]
Audit(AuditArgs),
#[command(
about = "Remove stale local runtime logs, temp homes, dead broker artifacts, and orphaned managed homes.",
after_help = CLI_CLEANUP_AFTER_HELP
)]
Cleanup,
#[command(
trailing_var_arg = true,
about = "Run codex login inside a selected or auto-created profile.",
after_help = CLI_LOGIN_AFTER_HELP
)]
Login(CodexPassthroughArgs),
#[command(about = "Run codex logout for the selected or active profile.")]
Logout(LogoutArgs),
#[command(
about = "Inspect live quota for one profile or the whole profile pool.",
after_help = CLI_QUOTA_AFTER_HELP
)]
Quota(QuotaArgs),
#[command(
trailing_var_arg = true,
about = "Run codex through prodex with quota preflight and safe auto-rotate.",
after_help = CLI_RUN_AFTER_HELP
)]
Run(RunArgs),
#[command(
trailing_var_arg = true,
about = "Run Claude Code through prodex via an Anthropic-compatible runtime proxy.",
after_help = CLI_CLAUDE_AFTER_HELP
)]
Claude(ClaudeArgs),
#[command(name = "__runtime-broker", hide = true)]
RuntimeBroker(RuntimeBrokerArgs),
}
#[derive(Subcommand, Debug)]
enum ProfileCommands {
/// Add a profile entry and optionally seed it from another CODEX_HOME.
Add(AddProfileArgs),
/// Export one or more profiles, including their auth.json access tokens.
Export(ExportProfileArgs),
/// Import profiles from a bundle created by `prodex profile export`.
Import(ImportProfileArgs),
/// Copy the current shared Prodex CODEX_HOME into a new managed profile and activate it.
ImportCurrent(ImportCurrentArgs),
/// List configured profiles and show which one is active.
List,
/// Remove a profile entry and optionally delete its managed home.
Remove(RemoveProfileArgs),
/// Set the active profile used by commands that omit --profile.
Use(ProfileSelector),
}
#[derive(Args, Debug)]
struct AddProfileArgs {
/// Name of the profile to create.
name: String,
/// Register an existing CODEX_HOME path instead of creating a managed profile home.
#[arg(long, value_name = "PATH")]
codex_home: Option<PathBuf>,
/// Copy initial state from another CODEX_HOME path into the new managed profile.
#[arg(long, value_name = "PATH")]
copy_from: Option<PathBuf>,
/// Seed the new managed profile from the default shared Prodex CODEX_HOME.
#[arg(long)]
copy_current: bool,
/// Make the new profile active after creation.
#[arg(long)]
activate: bool,
}
#[derive(Args, Debug)]
struct ExportProfileArgs {
/// Export only the named profile. Repeat to export multiple profiles. Defaults to all profiles.
#[arg(short, long, value_name = "NAME")]
profile: Vec<String>,
/// Write the export bundle to this path. Defaults to a timestamped JSON file in the current directory.
#[arg(value_name = "PATH")]
output: Option<PathBuf>,
/// Protect the export bundle with a password.
#[arg(long, conflicts_with = "no_password")]
password_protect: bool,
/// Export without password protection and skip the interactive prompt.
#[arg(long)]
no_password: bool,
}
#[derive(Args, Debug)]
struct ImportProfileArgs {
/// Path to a profile export bundle created by `prodex profile export`.
#[arg(value_name = "PATH")]
path: PathBuf,
}
#[derive(Args, Debug)]
struct ImportCurrentArgs {
/// Name of the managed profile to create from the current shared Prodex CODEX_HOME.
#[arg(default_value = "default")]
name: String,
}
#[derive(Args, Debug)]
struct RemoveProfileArgs {
/// Name of the profile to remove.
name: String,
/// Also delete the managed CODEX_HOME directory from disk.
#[arg(long)]
delete_home: bool,
}
#[derive(Args, Debug, Clone)]
struct ProfileSelector {
/// Profile name. If omitted, prodex uses the active profile.
#[arg(short, long, value_name = "NAME")]
profile: Option<String>,
}
#[derive(Args, Debug, Clone)]
struct LogoutArgs {
/// Profile name. If omitted, prodex uses the active profile.
#[arg(value_name = "NAME", conflicts_with = "profile")]
profile_name: Option<String>,
/// Profile name. If omitted, prodex uses the active profile.
#[arg(short, long, value_name = "NAME")]
profile: Option<String>,
}
impl LogoutArgs {
fn selected_profile(&self) -> Option<&str> {
self.profile.as_deref().or(self.profile_name.as_deref())
}
}
#[derive(Args, Debug)]
struct CodexPassthroughArgs {
/// Existing profile to log into. If omitted, prodex creates or reuses a profile by account email.
#[arg(short, long, value_name = "NAME")]
profile: Option<String>,
/// Extra arguments passed through to `codex login` unchanged.
#[arg(value_name = "CODEX_ARG", allow_hyphen_values = true)]
codex_args: Vec<OsString>,
}
#[derive(Args, Debug)]
struct QuotaArgs {
/// Inspect a single profile. If omitted, prodex uses the active profile.
#[arg(short, long, value_name = "NAME")]
profile: Option<String>,
/// Show every configured profile in one aggregated view.
#[arg(long)]
all: bool,
/// Include exact reset timestamps and expanded window details.
#[arg(long)]
detail: bool,
/// Print raw usage JSON for a single profile and disable the live refresh view.
#[arg(long)]
raw: bool,
#[arg(long, hide = true)]
watch: bool,
/// Render one human-readable snapshot instead of refreshing every 5 seconds.
#[arg(long, conflicts_with = "watch")]
once: bool,
/// Override the ChatGPT backend base URL used for quota requests.
#[arg(long, value_name = "URL")]
base_url: Option<String>,
}
#[derive(Args, Debug, Default)]
struct InfoArgs {}
#[derive(Args, Debug)]
struct DoctorArgs {
/// Also probe each profile's quota endpoint.
#[arg(long)]
quota: bool,
/// Also summarize runtime proxy state and recent logs from /tmp.
#[arg(long)]
runtime: bool,
/// Emit machine-readable JSON output. Supported together with --runtime.
#[arg(long)]
json: bool,
}
#[derive(Args, Debug)]
struct AuditArgs {
/// Show only the most recent matching events.
#[arg(long, default_value_t = 50, value_name = "COUNT")]
tail: usize,
/// Emit machine-readable JSON output.
#[arg(long)]
json: bool,
/// Filter by component, for example `profile` or `runtime`.
#[arg(long, value_name = "NAME")]
component: Option<String>,
/// Filter by action, for example `use` or `broker_start`.
#[arg(long, value_name = "NAME")]
action: Option<String>,
/// Filter by outcome, for example `success` or `failure`.
#[arg(long, value_name = "NAME")]
outcome: Option<String>,
}
#[derive(Args, Debug)]
struct RunArgs {
/// Starting profile for the run. If omitted, prodex uses the active profile.
#[arg(short, long, value_name = "NAME")]
profile: Option<String>,
/// Explicitly enable auto-rotate. This is the default behavior.
#[arg(long, conflicts_with = "no_auto_rotate")]
auto_rotate: bool,
/// Keep the selected profile fixed and fail instead of rotating.
#[arg(long)]
no_auto_rotate: bool,
/// Skip the preflight quota gate before launching codex.
#[arg(long)]
skip_quota_check: bool,
/// Override the upstream ChatGPT base URL used for quota preflight and the runtime proxy.
#[arg(long, value_name = "URL")]
base_url: Option<String>,
/// Arguments passed through to `codex`. A lone session id is normalized to `codex resume <session-id>`.
#[arg(value_name = "CODEX_ARG", allow_hyphen_values = true)]
codex_args: Vec<OsString>,
}
#[derive(Args, Debug)]
struct ClaudeArgs {
/// Starting profile for the run. If omitted, prodex uses the active profile.
#[arg(short, long, value_name = "NAME")]
profile: Option<String>,
/// Explicitly enable auto-rotate. This is the default behavior.
#[arg(long, conflicts_with = "no_auto_rotate")]
auto_rotate: bool,
/// Keep the selected profile fixed and fail instead of rotating.
#[arg(long)]
no_auto_rotate: bool,
/// Skip the preflight quota gate before launching Claude Code.
#[arg(long)]
skip_quota_check: bool,
/// Override the upstream ChatGPT base URL used for quota preflight and the runtime proxy.
#[arg(long, value_name = "URL")]
base_url: Option<String>,
/// Arguments passed through to `claude` unchanged.
#[arg(value_name = "CLAUDE_ARG", allow_hyphen_values = true)]
claude_args: Vec<OsString>,
}
#[derive(Args, Debug)]
struct RuntimeBrokerArgs {
#[arg(long)]
current_profile: String,
#[arg(long)]
upstream_base_url: String,
#[arg(long, default_value_t = false)]
include_code_review: bool,
#[arg(long)]
broker_key: String,
#[arg(long)]
instance_token: String,
#[arg(long)]
admin_token: String,
#[arg(long)]
listen_addr: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
struct AppState {
active_profile: Option<String>,
#[serde(default)]
profiles: BTreeMap<String, ProfileEntry>,
#[serde(default)]
last_run_selected_at: BTreeMap<String, i64>,
#[serde(default)]
response_profile_bindings: BTreeMap<String, ResponseProfileBinding>,
#[serde(default)]
session_profile_bindings: BTreeMap<String, ResponseProfileBinding>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct ProfileEntry {
codex_home: PathBuf,
managed: bool,
#[serde(default)]
email: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
struct ResponseProfileBinding {
profile_name: String,
bound_at: i64,
}
#[derive(Debug, Clone)]
struct AppPaths {
root: PathBuf,
state_file: PathBuf,
managed_profiles_root: PathBuf,
shared_codex_root: PathBuf,
legacy_shared_codex_root: PathBuf,
}
#[derive(Debug, Clone, Deserialize)]
struct UsageResponse {
email: Option<String>,
plan_type: Option<String>,
rate_limit: Option<WindowPair>,
code_review_rate_limit: Option<WindowPair>,
#[serde(default, deserialize_with = "deserialize_null_default")]
additional_rate_limits: Vec<AdditionalRateLimit>,
}
#[derive(Debug, Clone, Deserialize)]
struct WindowPair {
primary_window: Option<UsageWindow>,
secondary_window: Option<UsageWindow>,
}
#[derive(Debug, Clone, Deserialize)]
struct AdditionalRateLimit {
limit_name: Option<String>,
metered_feature: Option<String>,
rate_limit: WindowPair,
}
#[derive(Debug, Clone, Deserialize)]
struct UsageWindow {
used_percent: Option<i64>,
reset_at: Option<i64>,
limit_window_seconds: Option<i64>,
}
#[derive(Debug, Clone, Deserialize)]
struct StoredAuth {
auth_mode: Option<String>,
tokens: Option<StoredTokens>,
#[serde(rename = "OPENAI_API_KEY")]
openai_api_key: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
struct StoredTokens {
access_token: Option<String>,
account_id: Option<String>,
id_token: Option<String>,
}
#[derive(Debug, Clone, Deserialize)]
struct IdTokenClaims {
#[serde(default)]
email: Option<String>,
#[serde(rename = "https://api.openai.com/profile", default)]
profile: Option<IdTokenProfileClaims>,
}
#[derive(Debug, Clone, Deserialize)]
struct IdTokenProfileClaims {
#[serde(default)]
email: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum InfoQuotaSource {
LiveProbe,
PersistedSnapshot,
}
#[derive(Debug, Clone)]
struct InfoQuotaAggregate {
quota_compatible_profiles: usize,
live_profiles: usize,
snapshot_profiles: usize,
unavailable_profiles: usize,
five_hour_pool_remaining: i64,
weekly_pool_remaining: i64,
earliest_five_hour_reset_at: Option<i64>,
earliest_weekly_reset_at: Option<i64>,
}
impl InfoQuotaAggregate {
fn profiles_with_data(&self) -> usize {
self.live_profiles + self.snapshot_profiles
}
}
#[derive(Debug, Clone)]
struct ProcessRow {
pid: u32,
command: String,
args: Vec<String>,
}
#[derive(Debug, Clone)]
struct ProdexProcessInfo {
pid: u32,
runtime: bool,
}
#[derive(Debug, Clone)]
struct InfoRuntimeQuotaObservation {
timestamp: i64,
profile: String,
five_hour_remaining: i64,
weekly_remaining: i64,
}
#[derive(Debug, Clone, Default)]
struct InfoRuntimeLoadSummary {
log_count: usize,
observations: Vec<InfoRuntimeQuotaObservation>,
active_inflight_units: usize,
recent_selection_events: usize,
recent_first_timestamp: Option<i64>,
recent_last_timestamp: Option<i64>,
}
#[derive(Debug, Clone, Copy)]
struct InfoRunwayEstimate {
burn_per_hour: f64,
observed_profiles: usize,
observed_span_seconds: i64,
exhaust_at: i64,
}
#[derive(Debug, Clone, Copy)]
enum InfoQuotaWindow {
FiveHour,
Weekly,
}
struct ProfileEmailLookupJob {
name: String,
codex_home: PathBuf,
}
#[derive(Debug)]
struct RunProfileProbeJob {
name: String,
order_index: usize,
codex_home: PathBuf,
}
#[derive(Debug, Clone)]
struct RunProfileProbeReport {
name: String,
order_index: usize,
auth: AuthSummary,
result: std::result::Result<UsageResponse, String>,
}
#[derive(Debug, Clone)]
struct ReadyProfileCandidate {
name: String,
usage: UsageResponse,
order_index: usize,
preferred: bool,
quota_source: RuntimeQuotaSource,
}
#[derive(Debug, Clone, Copy)]
struct MainWindowSnapshot {
remaining_percent: i64,
reset_at: i64,
pressure_score: i64,
}
#[derive(Debug, Clone, Copy)]
struct ReadyProfileScore {
total_pressure: i64,
weekly_pressure: i64,
five_hour_pressure: i64,
reserve_floor: i64,
weekly_remaining: i64,
five_hour_remaining: i64,
weekly_reset_at: i64,
five_hour_reset_at: i64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
enum RuntimeQuotaWindowStatus {
Ready,
Thin,
Critical,
Exhausted,
Unknown,
}
#[derive(Debug, Clone, Copy)]
struct RuntimeQuotaWindowSummary {
status: RuntimeQuotaWindowStatus,
remaining_percent: i64,
reset_at: i64,
}
#[derive(Debug, Clone, Copy)]
struct RuntimeQuotaSummary {
five_hour: RuntimeQuotaWindowSummary,
weekly: RuntimeQuotaWindowSummary,
route_band: RuntimeQuotaPressureBand,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
enum RuntimeQuotaPressureBand {
Healthy,
Thin,
Critical,
Exhausted,
Unknown,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RuntimeQuotaSource {
LiveProbe,
PersistedSnapshot,
}
#[derive(Debug, Clone)]
struct RuntimeProxyRequest {
method: String,
path_and_query: String,
headers: Vec<(String, String)>,
body: Vec<u8>,
}
#[derive(Debug, Clone)]
struct RecoveredLoad<T> {
value: T,
recovered_from_backup: bool,
}
#[derive(Debug, Clone)]
struct RecoveredVersionedLoad<T> {
value: T,
generation: u64,
recovered_from_backup: bool,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct VersionedJson<T> {
#[serde(default)]
generation: u64,
value: T,
}
#[derive(Debug, Clone)]
struct RuntimeRotationProxyShared {
async_client: reqwest::Client,
async_runtime: Arc<TokioRuntime>,
runtime: Arc<Mutex<RuntimeRotationState>>,
log_path: PathBuf,
request_sequence: Arc<AtomicU64>,
state_save_revision: Arc<AtomicU64>,
local_overload_backoff_until: Arc<AtomicU64>,
active_request_count: Arc<AtomicUsize>,
active_request_limit: usize,
lane_admission: RuntimeProxyLaneAdmission,
}
#[derive(Debug)]
struct RuntimeStateSaveQueue {
pending: Mutex<BTreeMap<PathBuf, RuntimeStateSaveJob>>,
wake: Condvar,
active: Arc<AtomicUsize>,
}
#[derive(Debug)]
struct RuntimeContinuationJournalSaveQueue {
pending: Mutex<BTreeMap<PathBuf, RuntimeContinuationJournalSaveJob>>,
wake: Condvar,
active: Arc<AtomicUsize>,
}
#[derive(Debug, Clone)]
struct RuntimeStateSaveSnapshot {
paths: AppPaths,
state: AppState,
continuations: RuntimeContinuationStore,
profile_scores: BTreeMap<String, RuntimeProfileHealth>,
usage_snapshots: BTreeMap<String, RuntimeProfileUsageSnapshot>,
backoffs: RuntimeProfileBackoffs,
}
#[derive(Debug, Clone)]
enum RuntimeStateSavePayload {
Snapshot(RuntimeStateSaveSnapshot),
Live(RuntimeRotationProxyShared),
}
#[derive(Debug)]
struct RuntimeStateSaveJob {
payload: RuntimeStateSavePayload,
revision: u64,
latest_revision: Arc<AtomicU64>,
log_path: PathBuf,
reason: String,
queued_at: Instant,
ready_at: Instant,
}
#[derive(Debug, Clone)]
struct RuntimeContinuationJournalSnapshot {
paths: AppPaths,
continuations: RuntimeContinuationStore,
profiles: BTreeMap<String, ProfileEntry>,
}
#[derive(Debug, Clone)]
enum RuntimeContinuationJournalSavePayload {
Snapshot(RuntimeContinuationJournalSnapshot),
Live(RuntimeRotationProxyShared),
}
#[derive(Debug)]
struct RuntimeContinuationJournalSaveJob {
payload: RuntimeContinuationJournalSavePayload,
log_path: PathBuf,
reason: String,
saved_at: i64,
queued_at: Instant,
ready_at: Instant,
}
trait RuntimeScheduledSaveJob {
fn ready_at(&self) -> Instant;
}
impl RuntimeScheduledSaveJob for RuntimeStateSaveJob {
fn ready_at(&self) -> Instant {
self.ready_at
}
}
impl RuntimeScheduledSaveJob for RuntimeContinuationJournalSaveJob {
fn ready_at(&self) -> Instant {
self.ready_at
}
}
enum RuntimeDueJobs<K, J> {
Due(BTreeMap<K, J>),
Wait(Duration),
}
fn runtime_take_due_scheduled_jobs<K, J>(
pending: &mut BTreeMap<K, J>,
now: Instant,
) -> RuntimeDueJobs<K, J>
where
K: Ord + Clone,
J: RuntimeScheduledSaveJob,
{
let next_ready_at = pending
.values()
.map(RuntimeScheduledSaveJob::ready_at)
.min()
.expect("scheduled save jobs should be present");
if next_ready_at > now {
return RuntimeDueJobs::Wait(next_ready_at.saturating_duration_since(now));
}
let due_keys = pending
.iter()
.filter_map(|(key, job)| (job.ready_at() <= now).then_some(key.clone()))
.collect::<Vec<_>>();
let mut due = BTreeMap::new();
for key in due_keys {
if let Some(job) = pending.remove(&key) {
due.insert(key, job);
}
}
RuntimeDueJobs::Due(due)
}
#[derive(Debug)]
struct RuntimeProbeRefreshQueue {
pending: Mutex<BTreeMap<(PathBuf, String), RuntimeProbeRefreshJob>>,
wake: Condvar,
active: Arc<AtomicUsize>,
}
#[derive(Debug, Clone)]
struct RuntimeProbeRefreshJob {
shared: RuntimeRotationProxyShared,
profile_name: String,
codex_home: PathBuf,
upstream_base_url: String,
queued_at: Instant,
}
struct StateFileLock {
file: fs::File,
}
impl Drop for StateFileLock {
fn drop(&mut self) {
let _ = self.file.unlock();
}
}
#[derive(Debug, Clone)]
struct RuntimeRotationState {
paths: AppPaths,
state: AppState,
upstream_base_url: String,
include_code_review: bool,
current_profile: String,
profile_usage_auth: BTreeMap<String, RuntimeProfileUsageAuthCacheEntry>,
turn_state_bindings: BTreeMap<String, ResponseProfileBinding>,
session_id_bindings: BTreeMap<String, ResponseProfileBinding>,
continuation_statuses: RuntimeContinuationStatuses,
profile_probe_cache: BTreeMap<String, RuntimeProfileProbeCacheEntry>,
profile_usage_snapshots: BTreeMap<String, RuntimeProfileUsageSnapshot>,
profile_retry_backoff_until: BTreeMap<String, i64>,
profile_transport_backoff_until: BTreeMap<String, i64>,
profile_route_circuit_open_until: BTreeMap<String, i64>,
profile_inflight: BTreeMap<String, usize>,
profile_health: BTreeMap<String, RuntimeProfileHealth>,
}
#[derive(Debug, Clone)]
struct RuntimeProfileUsageAuthCacheEntry {
auth: UsageAuth,
location: secret_store::SecretLocation,
revision: Option<secret_store::SecretRevision>,
}
#[derive(Debug, Clone)]
struct RuntimeProfileProbeCacheEntry {
checked_at: i64,
auth: AuthSummary,
result: std::result::Result<UsageResponse, String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
struct RuntimeProfileUsageSnapshot {
checked_at: i64,
five_hour_status: RuntimeQuotaWindowStatus,
five_hour_remaining_percent: i64,
five_hour_reset_at: i64,
weekly_status: RuntimeQuotaWindowStatus,
weekly_remaining_percent: i64,
weekly_reset_at: i64,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
struct RuntimeProfileBackoffs {
#[serde(default, deserialize_with = "deserialize_null_default")]
retry_backoff_until: BTreeMap<String, i64>,
#[serde(default, deserialize_with = "deserialize_null_default")]
transport_backoff_until: BTreeMap<String, i64>,
#[serde(default, deserialize_with = "deserialize_null_default")]
route_circuit_open_until: BTreeMap<String, i64>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
struct RuntimeContinuationJournal {
#[serde(default)]
saved_at: i64,
#[serde(default)]
continuations: RuntimeContinuationStore,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
struct RuntimeContinuationStore {
#[serde(default)]
response_profile_bindings: BTreeMap<String, ResponseProfileBinding>,
#[serde(default)]
session_profile_bindings: BTreeMap<String, ResponseProfileBinding>,
#[serde(default)]
turn_state_bindings: BTreeMap<String, ResponseProfileBinding>,
#[serde(default)]
session_id_bindings: BTreeMap<String, ResponseProfileBinding>,
#[serde(default)]
statuses: RuntimeContinuationStatuses,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
struct RuntimeContinuationStatuses {
#[serde(default)]
response: BTreeMap<String, RuntimeContinuationBindingStatus>,
#[serde(default)]
turn_state: BTreeMap<String, RuntimeContinuationBindingStatus>,
#[serde(default)]
session_id: BTreeMap<String, RuntimeContinuationBindingStatus>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
enum RuntimeContinuationBindingLifecycle {
#[default]
Warm,
Verified,
Suspect,
Dead,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)]
struct RuntimeContinuationBindingStatus {
#[serde(default)]
state: RuntimeContinuationBindingLifecycle,
#[serde(default)]
confidence: u32,
#[serde(default)]
last_touched_at: Option<i64>,
#[serde(default)]
last_verified_at: Option<i64>,
#[serde(default)]
last_verified_route: Option<String>,
#[serde(default)]
last_not_found_at: Option<i64>,
#[serde(default)]
not_found_streak: u32,
#[serde(default)]
success_count: u32,
#[serde(default)]
failure_count: u32,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RuntimeProbeCacheFreshness {
Fresh,
StaleUsable,
Expired,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
struct RuntimeProfileHealth {
score: u32,
updated_at: i64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RuntimeRouteKind {
Responses,
Compact,
Websocket,
Standard,
}
#[derive(Debug, Clone, Default)]
struct RuntimeDoctorSummary {
log_path: Option<PathBuf>,
pointer_exists: bool,
log_exists: bool,
line_count: usize,
marker_counts: BTreeMap<&'static str, usize>,
last_marker_line: Option<String>,
marker_last_fields: BTreeMap<&'static str, BTreeMap<String, String>>,
facet_counts: BTreeMap<String, BTreeMap<String, usize>>,
previous_response_not_found_by_route: BTreeMap<String, usize>,
previous_response_not_found_by_transport: BTreeMap<String, usize>,
first_timestamp: Option<String>,
last_timestamp: Option<String>,
selection_pressure: String,
transport_pressure: String,
persistence_pressure: String,
quota_freshness_pressure: String,
startup_audit_pressure: String,
persisted_retry_backoffs: usize,
persisted_transport_backoffs: usize,
persisted_route_circuits: usize,
persisted_usage_snapshots: usize,
persisted_response_bindings: usize,
persisted_session_bindings: usize,
persisted_turn_state_bindings: usize,
persisted_session_id_bindings: usize,
persisted_verified_continuations: usize,
persisted_warm_continuations: usize,
persisted_suspect_continuations: usize,
persisted_dead_continuations: usize,
persisted_continuation_journal_response_bindings: usize,
persisted_continuation_journal_session_bindings: usize,
persisted_continuation_journal_turn_state_bindings: usize,
persisted_continuation_journal_session_id_bindings: usize,
state_save_queue_backlog: Option<usize>,
state_save_lag_ms: Option<u64>,
continuation_journal_save_backlog: Option<usize>,
continuation_journal_save_lag_ms: Option<u64>,
profile_probe_refresh_backlog: Option<usize>,
profile_probe_refresh_lag_ms: Option<u64>,
continuation_journal_saved_at: Option<i64>,
suspect_continuation_bindings: Vec<String>,
failure_class_counts: BTreeMap<String, usize>,
stale_persisted_usage_snapshots: usize,
recovered_state_file: bool,
recovered_scores_file: bool,
recovered_usage_snapshots_file: bool,
recovered_backoffs_file: bool,
recovered_continuations_file: bool,
recovered_continuation_journal_file: bool,
last_good_backups_present: usize,
degraded_routes: Vec<String>,
orphan_managed_dirs: Vec<String>,
profiles: Vec<RuntimeDoctorProfileSummary>,
diagnosis: String,
}
const RUNTIME_COMPACT_SESSION_LINEAGE_PREFIX: &str = "__compact_session__:";
const RUNTIME_COMPACT_TURN_STATE_LINEAGE_PREFIX: &str = "__compact_turn_state__:";
#[derive(Debug, Clone, Default)]
struct RuntimeDoctorProfileSummary {
profile: String,
quota_freshness: String,
quota_age_seconds: i64,
retry_backoff_until: Option<i64>,
transport_backoff_until: Option<i64>,
routes: Vec<RuntimeDoctorRouteSummary>,
}
#[derive(Debug, Clone, Default)]
struct RuntimeDoctorRouteSummary {
route: String,
circuit_state: String,
circuit_until: Option<i64>,
transport_backoff_until: Option<i64>,
health_score: u32,
bad_pairing_score: u32,
performance_score: u32,
quota_band: String,
five_hour_status: String,
weekly_status: String,
}
struct RuntimeRotationProxy {
server: Arc<TinyServer>,
shutdown: Arc<AtomicBool>,
worker_threads: Vec<thread::JoinHandle<()>>,
accept_worker_count: usize,
listen_addr: std::net::SocketAddr,
log_path: PathBuf,
active_request_count: Arc<AtomicUsize>,
owner_lock: Option<StateFileLock>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
struct RuntimeBrokerRegistry {
pid: u32,
listen_addr: String,
started_at: i64,
upstream_base_url: String,
include_code_review: bool,
current_profile: String,
instance_token: String,
admin_token: String,
#[serde(default)]
openai_mount_path: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
struct RuntimeBrokerHealth {
pid: u32,
started_at: i64,
current_profile: String,
include_code_review: bool,
active_requests: usize,
instance_token: String,
persistence_role: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
struct RuntimeBrokerLaneMetrics {
active: usize,
limit: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
struct RuntimeBrokerTrafficMetrics {
responses: RuntimeBrokerLaneMetrics,
compact: RuntimeBrokerLaneMetrics,
websocket: RuntimeBrokerLaneMetrics,
standard: RuntimeBrokerLaneMetrics,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
struct RuntimeBrokerContinuationMetrics {
response_bindings: usize,
turn_state_bindings: usize,
session_id_bindings: usize,
warm: usize,
verified: usize,
suspect: usize,
dead: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
struct RuntimeBrokerMetrics {
health: RuntimeBrokerHealth,
active_request_limit: usize,
local_overload_backoff_remaining_seconds: u64,
traffic: RuntimeBrokerTrafficMetrics,
profile_inflight: BTreeMap<String, usize>,
retry_backoffs: usize,
transport_backoffs: usize,
route_circuits: usize,
degraded_profiles: usize,
degraded_routes: usize,
continuations: RuntimeBrokerContinuationMetrics,
}
#[derive(Debug, Clone, Serialize)]
struct RuntimeBrokerObservation {
broker_key: String,
listen_addr: String,
metrics: RuntimeBrokerMetrics,
}
#[allow(dead_code)]
#[derive(Debug, Clone)]
struct RuntimeBrokerMetadata {
broker_key: String,
listen_addr: String,
started_at: i64,
current_profile: String,
include_code_review: bool,
instance_token: String,
admin_token: String,
}
#[derive(Debug)]
struct RuntimeBrokerLease {
path: PathBuf,
}
#[derive(Debug)]
struct RuntimeProxyEndpoint {
listen_addr: std::net::SocketAddr,
openai_mount_path: String,
lease_dir: PathBuf,
_lease: Option<RuntimeBrokerLease>,
}
struct RuntimeLaunchRequest<'a> {
profile: Option<&'a str>,
allow_auto_rotate: bool,
skip_quota_check: bool,
base_url: Option<&'a str>,
include_code_review: bool,
force_runtime_proxy: bool,
}
struct PreparedRuntimeLaunch {
paths: AppPaths,
codex_home: PathBuf,
managed: bool,
runtime_proxy: Option<RuntimeProxyEndpoint>,
}
type RuntimeLocalWebSocket = WsSocket<Box<dyn TinyReadWrite + Send>>;
type RuntimeUpstreamWebSocket = WsSocket<MaybeTlsStream<TcpStream>>;
fn runtime_set_upstream_websocket_io_timeout(
socket: &mut RuntimeUpstreamWebSocket,
timeout: Option<Duration>,
) -> io::Result<()> {
match socket.get_mut() {
MaybeTlsStream::Plain(stream) => {
stream.set_read_timeout(timeout)?;
stream.set_write_timeout(timeout)?;
}
MaybeTlsStream::Rustls(stream) => {
stream.sock.set_read_timeout(timeout)?;
stream.sock.set_write_timeout(timeout)?;
}
_ => {}
}
Ok(())
}
fn runtime_websocket_timeout_error(err: &WsError) -> bool {
matches!(
err,
WsError::Io(io_err)
if matches!(
io_err.kind(),
io::ErrorKind::TimedOut | io::ErrorKind::WouldBlock
)
)
}
enum RuntimeResponsesAttempt {
Success {
profile_name: String,
response: RuntimeResponsesReply,
},
QuotaBlocked {
profile_name: String,
response: RuntimeResponsesReply,
},
PreviousResponseNotFound {
profile_name: String,
response: RuntimeResponsesReply,
turn_state: Option<String>,
},
LocalSelectionBlocked {
profile_name: String,
reason: &'static str,
},
}
enum RuntimeStandardAttempt {
Success {
profile_name: String,
response: tiny_http::ResponseBox,
},
RetryableFailure {
profile_name: String,
response: tiny_http::ResponseBox,
overload: bool,
},
LocalSelectionBlocked {
profile_name: String,
},
}
#[derive(Debug)]
enum RuntimeSseInspection {
Commit {
prelude: Vec<u8>,
response_ids: Vec<String>,
},
QuotaBlocked(Vec<u8>),
PreviousResponseNotFound(Vec<u8>),
}
#[derive(Debug)]
enum RuntimeSseInspectionProgress {
Hold { response_ids: Vec<String> },
Commit { response_ids: Vec<String> },
QuotaBlocked,
PreviousResponseNotFound,
}
#[derive(Default)]
struct RuntimeParsedSseEvent {
quota_blocked: bool,
previous_response_not_found: bool,
response_ids: Vec<String>,
event_type: Option<String>,
}
#[derive(Default)]
struct RuntimeSseTapState {
line: Vec<u8>,
data_lines: Vec<String>,
remembered_response_ids: BTreeSet<String>,
}
enum RuntimeResponsesReply {
Buffered(RuntimeBufferedResponseParts),
Streaming(RuntimeStreamingResponse),
}
struct RuntimeStreamingResponse {
status: u16,
headers: Vec<(String, String)>,
body: Box<dyn Read + Send>,
request_id: u64,
profile_name: String,
log_path: PathBuf,
shared: RuntimeRotationProxyShared,
_inflight_guard: Option<RuntimeProfileInFlightGuard>,
}
struct RuntimeProfileInFlightGuard {
shared: RuntimeRotationProxyShared,
profile_name: String,
context: &'static str,
weight: usize,
}
struct RuntimeProxyActiveRequestGuard {
active_request_count: Arc<AtomicUsize>,
lane_active_count: Arc<AtomicUsize>,
wait: Arc<(Mutex<()>, Condvar)>,
}
impl Drop for RuntimeProxyActiveRequestGuard {
fn drop(&mut self) {
let (mutex, condvar) = &*self.wait;
let _guard = mutex
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
self.active_request_count.fetch_sub(1, Ordering::SeqCst);
self.lane_active_count.fetch_sub(1, Ordering::SeqCst);
condvar.notify_all();
}
}
impl Drop for RuntimeProfileInFlightGuard {
fn drop(&mut self) {
if let Ok(mut runtime) = self.shared.runtime.lock() {
let remaining =
if let Some(count) = runtime.profile_inflight.get_mut(&self.profile_name) {
*count = count.saturating_sub(self.weight);
let remaining = *count;
if remaining == 0 {
runtime.profile_inflight.remove(&self.profile_name);
}
remaining
} else {
0
};
drop(runtime);
runtime_proxy_log(
&self.shared,
format!(
"profile_inflight profile={} count={} weight={} context={} event=release",
self.profile_name, remaining, self.weight, self.context
),
);
self.shared
.lane_admission
.inflight_release_revision
.fetch_add(1, Ordering::SeqCst);
let (mutex, condvar) = &*self.shared.lane_admission.wait;
let _guard = mutex
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
condvar.notify_all();
}
}
}
enum RuntimePrefetchChunk {
Data(Vec<u8>),
End,
Error(io::ErrorKind, String),
}
#[derive(Debug, PartialEq, Eq)]
enum RuntimePrefetchSendOutcome {
Sent { wait_ms: u128, retries: usize },
Disconnected,
TimedOut { message: String },
}
#[derive(Default)]
struct RuntimePrefetchSharedState {
terminal_error: Mutex<Option<(io::ErrorKind, String)>>,
queued_bytes: AtomicUsize,
}
struct RuntimePrefetchStream {
receiver: Option<Receiver<RuntimePrefetchChunk>>,
shared: Arc<RuntimePrefetchSharedState>,
backlog: VecDeque<RuntimePrefetchChunk>,
worker_abort: Option<tokio::task::AbortHandle>,
}
struct RuntimePrefetchReader {
receiver: Receiver<RuntimePrefetchChunk>,
shared: Arc<RuntimePrefetchSharedState>,
backlog: VecDeque<RuntimePrefetchChunk>,
pending: Cursor<Vec<u8>>,
finished: bool,
worker_abort: tokio::task::AbortHandle,
}
#[derive(Debug)]
enum RuntimeWebsocketAttempt {
Delivered,
QuotaBlocked {
profile_name: String,
payload: RuntimeWebsocketErrorPayload,
},
Overloaded {
profile_name: String,
payload: RuntimeWebsocketErrorPayload,
},
LocalSelectionBlocked {
profile_name: String,
reason: &'static str,
},
PreviousResponseNotFound {
profile_name: String,
payload: RuntimeWebsocketErrorPayload,
turn_state: Option<String>,
},
ReuseWatchdogTripped {
profile_name: String,
event: &'static str,
},
}
enum RuntimeUpstreamFailureResponse {
Http(RuntimeResponsesReply),
Websocket(RuntimeWebsocketErrorPayload),
}
#[derive(Debug)]
enum RuntimeWebsocketConnectResult {
Connected {
socket: RuntimeUpstreamWebSocket,
turn_state: Option<String>,
},
QuotaBlocked(RuntimeWebsocketErrorPayload),
Overloaded(RuntimeWebsocketErrorPayload),
}
#[derive(Debug)]
struct RuntimeWebsocketTcpConnectSuccess {
stream: TcpStream,
selected_addr: SocketAddr,
resolved_addrs: usize,
attempted_addrs: usize,
}
#[derive(Debug)]
struct RuntimeWebsocketTcpAttemptResult {
addr: SocketAddr,
result: io::Result<TcpStream>,
}
#[derive(Debug, Clone)]
enum RuntimeWebsocketErrorPayload {
Text(String),
Binary(Vec<u8>),
Empty,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RuntimeWebsocketRetryInspectionKind {
QuotaBlocked,
Overloaded,
PreviousResponseNotFound,
}
#[derive(Debug, Clone, Default)]
struct RuntimeInspectedWebsocketTextFrame {
event_type: Option<String>,
turn_state: Option<String>,
response_ids: Vec<String>,
retry_kind: Option<RuntimeWebsocketRetryInspectionKind>,
precommit_hold: bool,
terminal_event: bool,
}
#[derive(Debug)]
struct RuntimeBufferedWebsocketTextFrame {
text: String,
response_ids: Vec<String>,
}
fn runtime_proxy_log(shared: &RuntimeRotationProxyShared, message: impl AsRef<str>) {
runtime_proxy_log_to_path(&shared.log_path, message.as_ref());
}
fn runtime_proxy_next_request_id(shared: &RuntimeRotationProxyShared) -> u64 {
shared.request_sequence.fetch_add(1, Ordering::Relaxed)
}
pub fn main_entry() {
if let Err(err) = run() {
eprintln!("Error: {err:#}");
std::process::exit(1);
}
}
fn run() -> Result<()> {
let command = parse_cli_command_or_exit();
if !matches!(command, Commands::RuntimeBroker(_)) {
let _ = show_update_notice_if_available(&command);
}
ensure_runtime_policy_valid()?;
match command {
Commands::Profile(command) => handle_profile_command(command),
Commands::UseProfile(selector) => handle_set_active_profile(selector),
Commands::Current => handle_current_profile(),
Commands::Info(args) => handle_info(args),
Commands::Doctor(args) => handle_doctor(args),
Commands::Audit(args) => handle_audit(args),
Commands::Cleanup => handle_cleanup(),
Commands::Login(args) => handle_codex_login(args),
Commands::Logout(args) => handle_codex_logout(args),
Commands::Quota(args) => handle_quota(args),
Commands::Run(args) => handle_run(args),
Commands::Claude(args) => handle_claude(args),
Commands::RuntimeBroker(args) => handle_runtime_broker(args),
}
}
fn parse_cli_command_or_exit() -> Commands {
match parse_cli_command_from(env::args_os()) {
Ok(command) => command,
Err(err) => err.exit(),
}
}
fn parse_cli_command_from<I, T>(args: I) -> std::result::Result<Commands, clap::Error>
where
I: IntoIterator<Item = T>,
T: Into<OsString>,
{
let raw_args = args.into_iter().map(Into::into).collect::<Vec<_>>();
let parse_args = if should_default_cli_invocation_to_run(&raw_args) {
rewrite_cli_args_as_run(&raw_args)
} else {
raw_args
};
Ok(Cli::try_parse_from(parse_args)?.command)
}
fn should_default_cli_invocation_to_run(args: &[OsString]) -> bool {
let Some(first_arg) = args.get(1).and_then(|arg| arg.to_str()) else {
return true;
};
!matches!(
first_arg,
"-h" | "--help"
| "-V"
| "--version"
| "profile"
| "use"
| "current"
| "info"
| "doctor"
| "audit"
| "cleanup"
| "login"
| "logout"
| "quota"
| "run"
| "claude"
| "help"
| "__runtime-broker"
)
}
fn rewrite_cli_args_as_run(args: &[OsString]) -> Vec<OsString> {
let mut rewritten = Vec::with_capacity(args.len() + 1);
rewritten.push(
args.first()
.cloned()
.unwrap_or_else(|| OsString::from("prodex")),
);
rewritten.push(OsString::from("run"));
rewritten.extend(args.iter().skip(1).cloned());
rewritten
}
fn handle_profile_command(command: ProfileCommands) -> Result<()> {
match command {
ProfileCommands::Add(args) => handle_add_profile(args),
ProfileCommands::Export(args) => handle_export_profiles(args),
ProfileCommands::Import(args) => handle_import_profiles(args),
ProfileCommands::ImportCurrent(args) => handle_import_current_profile(args),
ProfileCommands::List => handle_list_profiles(),
ProfileCommands::Remove(args) => handle_remove_profile(args),
ProfileCommands::Use(selector) => handle_set_active_profile(selector),
}
}
fn handle_info(_args: InfoArgs) -> Result<()> {
let paths = AppPaths::discover()?;
let state = AppState::load(&paths)?;
let policy_summary = runtime_policy_summary()?;
let runtime_metrics_targets = collect_runtime_broker_metrics_targets(&paths);
let now = Local::now().timestamp();
let version_summary = format_info_prodex_version(&paths)?;
let quota = collect_info_quota_aggregate(&paths, &state, now);
let processes = collect_prodex_processes();
let runtime_logs = collect_active_runtime_log_paths(&processes);
let runtime_load = collect_info_runtime_load_summary(&runtime_logs, now);
let runtime_process_count = processes.iter().filter(|process| process.runtime).count();
let five_hour_runway = estimate_info_runway(
&runtime_load.observations,
InfoQuotaWindow::FiveHour,
quota.five_hour_pool_remaining,
now,
);
let weekly_runway = estimate_info_runway(
&runtime_load.observations,
InfoQuotaWindow::Weekly,
quota.weekly_pool_remaining,
now,
);
let fields = vec![
("Profiles".to_string(), state.profiles.len().to_string()),
(
"Active profile".to_string(),
state.active_profile.as_deref().unwrap_or("-").to_string(),
),
(
"Runtime policy".to_string(),
format_runtime_policy_summary(policy_summary.as_ref()),
),
(
"Secret backend".to_string(),
format_secret_backend_summary(),
),
("Runtime logs".to_string(), format_runtime_logs_summary()),
("Audit logs".to_string(), format_audit_logs_summary()),
(
"Runtime metrics".to_string(),
format_runtime_broker_metrics_targets(&runtime_metrics_targets),
),
("Prodex version".to_string(), version_summary),
(
"Prodex processes".to_string(),
format_info_process_summary(&processes),
),
(
"Recent load".to_string(),
format_info_load_summary(&runtime_load, runtime_process_count),
),
(
"Quota data".to_string(),
format_info_quota_data_summary("a),
),
(
"5h remaining pool".to_string(),
format_info_pool_remaining(
quota.five_hour_pool_remaining,
quota.profiles_with_data(),
quota.earliest_five_hour_reset_at,
),
),
(
"Weekly remaining pool".to_string(),
format_info_pool_remaining(
quota.weekly_pool_remaining,
quota.profiles_with_data(),
quota.earliest_weekly_reset_at,
),
),
(
"5h runway".to_string(),
format_info_runway(
quota.profiles_with_data(),
quota.five_hour_pool_remaining,
quota.earliest_five_hour_reset_at,
five_hour_runway.as_ref(),
now,
),
),
(
"Weekly runway".to_string(),
format_info_runway(
quota.profiles_with_data(),
quota.weekly_pool_remaining,
quota.earliest_weekly_reset_at,
weekly_runway.as_ref(),
now,
),
),
];
print_panel("Info", &fields);
Ok(())
}
fn collect_info_quota_aggregate(
paths: &AppPaths,
state: &AppState,
now: i64,
) -> InfoQuotaAggregate {
if state.profiles.is_empty() {
return InfoQuotaAggregate {
quota_compatible_profiles: 0,
live_profiles: 0,
snapshot_profiles: 0,
unavailable_profiles: 0,
five_hour_pool_remaining: 0,
weekly_pool_remaining: 0,
earliest_five_hour_reset_at: None,
earliest_weekly_reset_at: None,
};
}
let persisted_usage_snapshots =
load_runtime_usage_snapshots(paths, &state.profiles).unwrap_or_default();
let reports =
collect_run_profile_reports(state, state.profiles.keys().cloned().collect(), None);
build_info_quota_aggregate(&reports, &persisted_usage_snapshots, now)
}
fn build_info_quota_aggregate(
reports: &[RunProfileProbeReport],
persisted_usage_snapshots: &BTreeMap<String, RuntimeProfileUsageSnapshot>,
now: i64,
) -> InfoQuotaAggregate {
let mut aggregate = InfoQuotaAggregate {
quota_compatible_profiles: reports
.iter()
.filter(|report| report.auth.quota_compatible)
.count(),
live_profiles: 0,
snapshot_profiles: 0,
unavailable_profiles: 0,
five_hour_pool_remaining: 0,
weekly_pool_remaining: 0,
earliest_five_hour_reset_at: None,
earliest_weekly_reset_at: None,
};
for report in reports {
if !report.auth.quota_compatible {
continue;
}
let usage = match &report.result {
Ok(usage) => Some((usage.clone(), InfoQuotaSource::LiveProbe)),
Err(_) => persisted_usage_snapshots
.get(&report.name)
.filter(|snapshot| runtime_usage_snapshot_is_usable(snapshot, now))
.map(|snapshot| {
(
usage_from_runtime_usage_snapshot(snapshot),
InfoQuotaSource::PersistedSnapshot,
)
}),
};
let Some((usage, source)) = usage else {
aggregate.unavailable_profiles += 1;
continue;
};
let Some((five_hour, weekly)) = info_main_window_snapshots(&usage) else {
aggregate.unavailable_profiles += 1;
continue;
};
match source {
InfoQuotaSource::LiveProbe => aggregate.live_profiles += 1,
InfoQuotaSource::PersistedSnapshot => aggregate.snapshot_profiles += 1,
}
aggregate.five_hour_pool_remaining += five_hour.remaining_percent;
aggregate.weekly_pool_remaining += weekly.remaining_percent;
if five_hour.reset_at != i64::MAX {
aggregate.earliest_five_hour_reset_at = Some(
aggregate
.earliest_five_hour_reset_at
.map_or(five_hour.reset_at, |current| {
current.min(five_hour.reset_at)
}),
);
}
if weekly.reset_at != i64::MAX {
aggregate.earliest_weekly_reset_at = Some(
aggregate
.earliest_weekly_reset_at
.map_or(weekly.reset_at, |current| current.min(weekly.reset_at)),
);
}
}
aggregate
}
fn info_main_window_snapshots(
usage: &UsageResponse,
) -> Option<(MainWindowSnapshot, MainWindowSnapshot)> {
Some((
required_main_window_snapshot(usage, "5h")?,
required_main_window_snapshot(usage, "weekly")?,
))
}
fn collect_prodex_processes() -> Vec<ProdexProcessInfo> {
let current_pid = std::process::id();
let current_basename = std::env::current_exe().ok().and_then(|path| {
path.file_name()
.and_then(|name| name.to_str())
.map(ToOwned::to_owned)
});
let mut processes = collect_process_rows()
.into_iter()
.filter_map(|row| {
classify_prodex_process_row(row, current_pid, current_basename.as_deref())
})
.collect::<Vec<_>>();
processes.sort_by_key(|process| process.pid);
processes
}
fn collect_process_rows() -> Vec<ProcessRow> {
collect_process_rows_from_proc()
.or_else(|| collect_process_rows_from_ps().ok())
.unwrap_or_default()
}
fn collect_process_rows_from_proc() -> Option<Vec<ProcessRow>> {
let mut rows = Vec::new();
let entries = fs::read_dir("/proc").ok()?;
for entry in entries.flatten() {
let Ok(pid) = entry.file_name().to_string_lossy().parse::<u32>() else {
continue;
};
let dir = entry.path();
let Some(command) = fs::read_to_string(dir.join("comm"))
.ok()
.map(|value| value.trim().to_string())
else {
continue;
};
let Some(args_bytes) = fs::read(dir.join("cmdline")).ok() else {
continue;
};
let args = args_bytes
.split(|byte| *byte == 0)
.filter_map(|chunk| {
if chunk.is_empty() {
return None;
}
String::from_utf8(chunk.to_vec()).ok()
})
.collect::<Vec<_>>();
rows.push(ProcessRow { pid, command, args });
}
Some(rows)
}
fn collect_process_rows_from_ps() -> Result<Vec<ProcessRow>> {
let output = Command::new("ps")
.args(["-Ao", "pid=,comm=,args="])
.output()
.context("failed to execute ps for prodex process listing")?;
if !output.status.success() {
bail!("ps returned exit status {}", output.status);
}
let text = String::from_utf8(output.stdout).context("ps output was not valid UTF-8")?;
Ok(parse_ps_process_rows(&text))
}
fn parse_ps_process_rows(text: &str) -> Vec<ProcessRow> {
let mut rows = Vec::new();
for line in text.lines() {
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
let tokens = trimmed.split_whitespace().collect::<Vec<_>>();
if tokens.len() < 2 {
continue;
}
let Ok(pid) = tokens[0].parse::<u32>() else {
continue;
};
rows.push(ProcessRow {
pid,
command: tokens[1].to_string(),
args: tokens
.iter()
.skip(2)
.map(|token| (*token).to_string())
.collect(),
});
}
rows
}
fn classify_prodex_process_row(
row: ProcessRow,
current_pid: u32,
current_basename: Option<&str>,
) -> Option<ProdexProcessInfo> {
if row.pid == current_pid || !is_prodex_process_row(&row, current_basename) {
return None;
}
Some(ProdexProcessInfo {
pid: row.pid,
runtime: prodex_process_row_is_runtime(&row, current_basename),
})
}
fn is_prodex_process_row(row: &ProcessRow, current_basename: Option<&str>) -> bool {
let command_base = process_basename(&row.command);
process_basename_matches(command_base, current_basename)
|| prodex_process_row_argv_span(row, current_basename).is_some()
}
fn prodex_process_row_is_runtime(row: &ProcessRow, current_basename: Option<&str>) -> bool {
prodex_process_row_argv_span(row, current_basename)
.and_then(|args| args.get(1))
.is_some_and(|arg| arg == "run" || arg == "__runtime-broker")
}
fn prodex_process_row_argv_span<'a>(
row: &'a ProcessRow,
current_basename: Option<&str>,
) -> Option<&'a [String]> {
row.args.iter().enumerate().find_map(|(index, arg)| {
process_basename_matches(process_basename(arg), current_basename)
.then_some(&row.args[index..])
})
}
fn process_basename_matches(candidate: &str, current_basename: Option<&str>) -> bool {
candidate == "prodex" || current_basename.is_some_and(|name| candidate == name)
}
fn process_basename(input: &str) -> &str {
Path::new(input)
.file_name()
.and_then(|name| name.to_str())
.unwrap_or(input)
}
fn collect_active_runtime_log_paths(processes: &[ProdexProcessInfo]) -> Vec<PathBuf> {
let runtime_pids = processes
.iter()
.filter(|process| process.runtime)
.map(|process| process.pid)
.collect::<BTreeSet<_>>();
if runtime_pids.is_empty() {
return Vec::new();
}
let mut latest_logs = BTreeMap::new();
for path in prodex_runtime_log_paths_in_dir(&runtime_proxy_log_dir()) {
let Some(pid) = runtime_log_pid_from_path(&path) else {
continue;
};
if runtime_pids.contains(&pid) {
latest_logs.insert(pid, path);
}
}
latest_logs.into_values().collect()
}
fn runtime_log_pid_from_path(path: &Path) -> Option<u32> {
let name = path.file_name()?.to_str()?;
let rest = name.strip_prefix(&format!("{RUNTIME_PROXY_LOG_FILE_PREFIX}-"))?;
let (pid, _) = rest.split_once('-')?;
pid.parse::<u32>().ok()
}
fn collect_info_runtime_load_summary(log_paths: &[PathBuf], now: i64) -> InfoRuntimeLoadSummary {
let mut summary = InfoRuntimeLoadSummary::default();
summary.log_count = log_paths.len();
for path in log_paths {
let Ok(tail) = read_runtime_log_tail(path, INFO_RUNTIME_LOG_TAIL_BYTES) else {
continue;
};
let log_summary = collect_info_runtime_load_summary_from_text(
&String::from_utf8_lossy(&tail),
now,
INFO_RECENT_LOAD_WINDOW_SECONDS,
INFO_FORECAST_LOOKBACK_SECONDS,
);
summary.observations.extend(log_summary.observations);
summary.active_inflight_units += log_summary.active_inflight_units;
summary.recent_selection_events += log_summary.recent_selection_events;
summary.recent_first_timestamp = match (
summary.recent_first_timestamp,
log_summary.recent_first_timestamp,
) {
(Some(current), Some(candidate)) => Some(current.min(candidate)),
(current, candidate) => current.or(candidate),
};
summary.recent_last_timestamp = match (
summary.recent_last_timestamp,
log_summary.recent_last_timestamp,
) {
(Some(current), Some(candidate)) => Some(current.max(candidate)),
(current, candidate) => current.or(candidate),
};
}
summary
.observations
.sort_by_key(|observation| (observation.timestamp, observation.profile.clone()));
summary
}
fn collect_info_runtime_load_summary_from_text(
text: &str,
now: i64,
recent_window_seconds: i64,
lookback_seconds: i64,
) -> InfoRuntimeLoadSummary {
let recent_cutoff = now.saturating_sub(recent_window_seconds);
let lookback_cutoff = now.saturating_sub(lookback_seconds);
let mut summary = InfoRuntimeLoadSummary::default();
let mut latest_inflight_counts = BTreeMap::new();
for line in text.lines() {
if let Some((profile, count)) = info_runtime_inflight_from_line(line) {
latest_inflight_counts.insert(profile, count);
}
let Some(observation) = info_runtime_selection_observation_from_line(line) else {
continue;
};
if observation.timestamp >= lookback_cutoff {
summary.observations.push(observation.clone());
}
if observation.timestamp >= recent_cutoff {
summary.recent_selection_events += 1;
summary.recent_first_timestamp = Some(
summary
.recent_first_timestamp
.map_or(observation.timestamp, |current| {
current.min(observation.timestamp)
}),
);
summary.recent_last_timestamp = Some(
summary
.recent_last_timestamp
.map_or(observation.timestamp, |current| {
current.max(observation.timestamp)
}),
);
}
}
summary.active_inflight_units = latest_inflight_counts.values().sum();
summary
}
fn info_runtime_selection_observation_from_line(line: &str) -> Option<InfoRuntimeQuotaObservation> {
if !line.contains("selection_pick") && !line.contains("selection_keep_current") {
return None;
}
let timestamp = runtime_log_timestamp_epoch(line)?;
let fields = info_runtime_parse_fields(line);
let profile = fields.get("profile")?;
if profile == "none" {
return None;
}
Some(InfoRuntimeQuotaObservation {
timestamp,
profile: profile.clone(),
five_hour_remaining: fields.get("five_hour_remaining")?.parse::<i64>().ok()?,
weekly_remaining: fields.get("weekly_remaining")?.parse::<i64>().ok()?,
})
}
fn info_runtime_inflight_from_line(line: &str) -> Option<(String, usize)> {
if !line.contains("profile_inflight ") {
return None;
}
let fields = info_runtime_parse_fields(line);
Some((
fields.get("profile")?.clone(),
fields.get("count")?.parse::<usize>().ok()?,
))
}
fn runtime_log_timestamp_epoch(line: &str) -> Option<i64> {
let timestamp = info_runtime_line_timestamp(line)?;
chrono::DateTime::parse_from_str(×tamp, "%Y-%m-%d %H:%M:%S%.f %:z")
.or_else(|_| chrono::DateTime::parse_from_str(×tamp, "%Y-%m-%d %H:%M:%S %:z"))
.ok()
.map(|datetime| datetime.timestamp())
}
fn info_runtime_line_timestamp(line: &str) -> Option<String> {
let end = line.find("] ")?;
line.strip_prefix('[')
.and_then(|trimmed| trimmed.get(..end.saturating_sub(1)))
.map(ToString::to_string)
}
fn info_runtime_parse_fields(line: &str) -> BTreeMap<String, String> {
let message = line
.split_once("] ")
.map(|(_, message)| message)
.unwrap_or(line)
.trim();
let mut fields = BTreeMap::new();
for token in message.split_whitespace() {
let Some((key, value)) = token.split_once('=') else {
continue;
};
if key.is_empty() || value.is_empty() {
continue;
}
fields.insert(key.to_string(), value.trim_matches('"').to_string());
}
fields
}
fn estimate_info_runway(
observations: &[InfoRuntimeQuotaObservation],
window: InfoQuotaWindow,
current_remaining: i64,
now: i64,
) -> Option<InfoRunwayEstimate> {
if current_remaining <= 0 {
return Some(InfoRunwayEstimate {
burn_per_hour: 0.0,
observed_profiles: 0,
observed_span_seconds: 0,
exhaust_at: now,
});
}
let mut by_profile = BTreeMap::<String, Vec<&InfoRuntimeQuotaObservation>>::new();
for observation in observations {
by_profile
.entry(observation.profile.clone())
.or_default()
.push(observation);
}
let mut burn_per_hour = 0.0;
let mut observed_profiles = 0;
let mut earliest = i64::MAX;
let mut latest = i64::MIN;
for profile_observations in by_profile.values_mut() {
profile_observations.sort_by_key(|observation| observation.timestamp);
let Some((profile_burn_per_hour, start, end)) =
info_profile_window_burn_rate(profile_observations, window)
else {
continue;
};
burn_per_hour += profile_burn_per_hour;
observed_profiles += 1;
earliest = earliest.min(start);
latest = latest.max(end);
}
if burn_per_hour <= 0.0 || observed_profiles == 0 || earliest == i64::MAX || latest == i64::MIN
{
return None;
}
let seconds_until_exhaustion =
((current_remaining as f64 / burn_per_hour) * 3600.0).ceil() as i64;
Some(InfoRunwayEstimate {
burn_per_hour,
observed_profiles,
observed_span_seconds: latest.saturating_sub(earliest),
exhaust_at: now.saturating_add(seconds_until_exhaustion.max(0)),
})
}
fn info_profile_window_burn_rate(
observations: &[&InfoRuntimeQuotaObservation],
window: InfoQuotaWindow,
) -> Option<(f64, i64, i64)> {
if observations.len() < 2 {
return None;
}
let latest = observations.last()?;
let mut earliest = *latest;
let mut current_remaining = info_observation_window_remaining(latest, window);
for observation in observations.iter().rev().skip(1) {
let remaining = info_observation_window_remaining(observation, window);
if remaining < current_remaining {
break;
}
earliest = *observation;
current_remaining = remaining;
}
let earliest_remaining = info_observation_window_remaining(earliest, window);
let latest_remaining = info_observation_window_remaining(latest, window);
let burned = earliest_remaining.saturating_sub(latest_remaining);
let span_seconds = latest.timestamp.saturating_sub(earliest.timestamp);
if burned <= 0 || span_seconds < INFO_FORECAST_MIN_SPAN_SECONDS {
return None;
}
Some((
burned as f64 * 3600.0 / span_seconds as f64,
earliest.timestamp,
latest.timestamp,
))
}
fn info_observation_window_remaining(
observation: &InfoRuntimeQuotaObservation,
window: InfoQuotaWindow,
) -> i64 {
match window {
InfoQuotaWindow::FiveHour => observation.five_hour_remaining,
InfoQuotaWindow::Weekly => observation.weekly_remaining,
}
}
fn format_info_process_summary(processes: &[ProdexProcessInfo]) -> String {
if processes.is_empty() {
return "No".to_string();
}
let runtime_count = processes.iter().filter(|process| process.runtime).count();
let pid_list = processes
.iter()
.take(6)
.map(|process| process.pid.to_string())
.collect::<Vec<_>>()
.join(", ");
let remaining = processes.len().saturating_sub(6);
let extra = if remaining > 0 {
format!(" (+{remaining} more)")
} else {
String::new()
};
format!(
"Yes ({} total, {} runtime; pids: {}{})",
processes.len(),
runtime_count,
pid_list,
extra
)
}
fn format_info_load_summary(
summary: &InfoRuntimeLoadSummary,
runtime_process_count: usize,
) -> String {
if runtime_process_count == 0 {
return "No active prodex runtime detected".to_string();
}
if summary.log_count == 0 {
return "Runtime process detected, but no matching runtime log was found".to_string();
}
if summary.recent_selection_events == 0 {
return format!(
"{} active runtime log(s); no selection activity observed in the sampled window; inflight units {}",
summary.log_count, summary.active_inflight_units
);
}
if summary.recent_selection_events == 1 {
return format!(
"1 selection event observed in the sampled window; inflight units {}; {} active runtime log(s)",
summary.active_inflight_units, summary.log_count
);
}
let activity_span = summary
.recent_first_timestamp
.zip(summary.recent_last_timestamp)
.map(|(start, end)| format_relative_duration(end.saturating_sub(start)))
.unwrap_or_else(|| format!("{}m", INFO_RECENT_LOAD_WINDOW_SECONDS / 60));
format!(
"{} selection event(s) over {}; inflight units {}; {} active runtime log(s)",
summary.recent_selection_events,
activity_span,
summary.active_inflight_units,
summary.log_count
)
}
fn format_info_quota_data_summary(aggregate: &InfoQuotaAggregate) -> String {
if aggregate.quota_compatible_profiles == 0 {
return "No quota-compatible profiles".to_string();
}
format!(
"{} quota-compatible profile(s): live={}, snapshot={}, unavailable={}",
aggregate.quota_compatible_profiles,
aggregate.live_profiles,
aggregate.snapshot_profiles,
aggregate.unavailable_profiles
)
}
fn format_runtime_policy_summary(summary: Option<&RuntimePolicySummary>) -> String {
summary
.map(|summary| format!("{} (v{})", summary.path.display(), summary.version))
.unwrap_or_else(|| "disabled".to_string())
}
fn format_runtime_logs_summary() -> String {
format!(
"{} ({})",
runtime_proxy_log_dir().display(),
runtime_proxy_log_format().as_str()
)
}
fn runtime_policy_json_value(summary: Option<&RuntimePolicySummary>) -> serde_json::Value {
summary
.map(|summary| {
serde_json::json!({
"path": summary.path.display().to_string(),
"version": summary.version,
})
})
.unwrap_or(serde_json::Value::Null)
}
fn runtime_logs_json_value() -> serde_json::Value {
serde_json::json!({
"directory": runtime_proxy_log_dir().display().to_string(),
"format": runtime_proxy_log_format().as_str(),
})
}
fn configured_secret_backend_selection() -> Result<secret_store::SecretBackendSelection> {
let policy = runtime_policy_secrets();
let backend = env::var(PRODEX_SECRET_BACKEND_ENV)
.ok()
.map(|value| value.parse::<secret_store::SecretBackendKind>())
.transpose()
.map_err(anyhow::Error::new)?
.or_else(|| policy.as_ref().and_then(|policy| policy.backend))
.unwrap_or(secret_store::SecretBackendKind::File);
let keyring_service = env::var(PRODEX_SECRET_KEYRING_SERVICE_ENV)
.ok()
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty())
.or_else(|| {
policy
.as_ref()
.and_then(|policy| policy.keyring_service.clone())
});
secret_store::SecretBackendSelection::from_kind(backend, keyring_service)
.map_err(anyhow::Error::new)
}
fn format_secret_backend_summary() -> String {
match configured_secret_backend_selection() {
Ok(selection) => match selection.keyring_service() {
Some(service) => format!("{} ({service})", selection.kind()),
None => selection.kind().to_string(),
},
Err(err) => format!("invalid ({err})"),
}
}
fn secret_backend_json_value() -> serde_json::Value {
match configured_secret_backend_selection() {
Ok(selection) => serde_json::json!({
"backend": selection.kind().as_str(),
"keyring_service": selection.keyring_service(),
}),
Err(err) => serde_json::json!({
"invalid": true,
"error": err.to_string(),
}),
}
}
fn audit_log_event_best_effort(
component: &str,
action: &str,
outcome: &str,
details: serde_json::Value,
) {
let _ = append_audit_event(component, action, outcome, details);
}
fn format_info_pool_remaining(
total_remaining: i64,
profiles_with_data: usize,
earliest_reset_at: Option<i64>,
) -> String {
if profiles_with_data == 0 {
return "Unavailable".to_string();
}
let mut value = format!("{total_remaining}% across {profiles_with_data} profile(s)");
if let Some(reset_at) = earliest_reset_at {
value.push_str(&format!(
"; earliest reset {}",
format_precise_reset_time(Some(reset_at))
));
}
value
}
fn format_info_runway(
profiles_with_data: usize,
current_remaining: i64,
earliest_reset_at: Option<i64>,
estimate: Option<&InfoRunwayEstimate>,
now: i64,
) -> String {
if profiles_with_data == 0 {
return "Unavailable".to_string();
}
if current_remaining <= 0 {
return "Exhausted".to_string();
}
let Some(estimate) = estimate else {
return "Unavailable (no recent quota decay observed in active runtime logs)".to_string();
};
let observed = format_relative_duration(estimate.observed_span_seconds);
let burn = format!("{:.1}", estimate.burn_per_hour);
if let Some(reset_at) = earliest_reset_at
&& reset_at <= estimate.exhaust_at
{
return format!(
"Earliest reset {} arrives before the no-reset runway (~{} at {} aggregated-%/h, {} profile(s), observed over {})",
format_precise_reset_time(Some(reset_at)),
format_relative_duration(estimate.exhaust_at.saturating_sub(now)),
burn,
estimate.observed_profiles,
observed
);
}
format!(
"{} (~{}) at {} aggregated-%/h from {} profile(s), observed over {}, no-reset estimate",
format_precise_reset_time(Some(estimate.exhaust_at)),
format_relative_duration(estimate.exhaust_at.saturating_sub(now)),
burn,
estimate.observed_profiles,
observed
)
}
fn format_relative_duration(seconds: i64) -> String {
let seconds = seconds.max(0);
if seconds == 0 {
return "now".to_string();
}
let days = seconds / 86_400;
let hours = (seconds % 86_400) / 3_600;
let minutes = (seconds % 3_600) / 60;
if days > 0 {
if hours > 0 {
format!("{days}d {hours}h")
} else {
format!("{days}d")
}
} else if hours > 0 {
if minutes > 0 {
format!("{hours}h {minutes}m")
} else {
format!("{hours}h")
}
} else if minutes > 0 {
format!("{minutes}m")
} else {
"<1m".to_string()
}
}
fn handle_doctor(args: DoctorArgs) -> Result<()> {
let paths = AppPaths::discover()?;
let state = AppState::load(&paths)?;
let codex_home = default_codex_home(&paths)?;
let policy_summary = runtime_policy_summary()?;
let runtime_metrics_targets = collect_runtime_broker_metrics_targets(&paths);
if args.runtime && args.json {
let summary = collect_runtime_doctor_summary();
let mut value = runtime_doctor_json_value(&summary);
if let Some(object) = value.as_object_mut() {
object.insert(
"runtime_policy".to_string(),
runtime_policy_json_value(policy_summary.as_ref()),
);
object.insert("secret_backend".to_string(), secret_backend_json_value());
object.insert("runtime_logs".to_string(), runtime_logs_json_value());
object.insert("audit_logs".to_string(), audit_logs_json_value());
object.insert(
"live_brokers".to_string(),
serde_json::to_value(collect_live_runtime_broker_observations(&paths))
.unwrap_or_else(|_| serde_json::Value::Array(Vec::new())),
);
object.insert(
"live_broker_metrics_targets".to_string(),
serde_json::to_value(&runtime_metrics_targets)
.unwrap_or_else(|_| serde_json::Value::Array(Vec::new())),
);
}
let json = serde_json::to_string_pretty(&value)
.context("failed to serialize runtime doctor summary")?;
println!("{json}");
return Ok(());
}
let summary_fields = vec![
("Prodex root".to_string(), paths.root.display().to_string()),
(
"State file".to_string(),
format!(
"{} ({})",
paths.state_file.display(),
if paths.state_file.exists() {
"exists"
} else {
"missing"
}
),
),
(
"Profiles root".to_string(),
paths.managed_profiles_root.display().to_string(),
),
(
"Default CODEX_HOME".to_string(),
format!(
"{} ({})",
codex_home.display(),
if codex_home.exists() {
"exists"
} else {
"missing"
}
),
),
(
"Codex binary".to_string(),
format_binary_resolution(&codex_bin()),
),
(
"Quota endpoint".to_string(),
usage_url("a_base_url(None)),
),
(
"Runtime policy".to_string(),
format_runtime_policy_summary(policy_summary.as_ref()),
),
(
"Secret backend".to_string(),
format_secret_backend_summary(),
),
("Runtime logs".to_string(), format_runtime_logs_summary()),
("Audit logs".to_string(), format_audit_logs_summary()),
(
"Runtime metrics".to_string(),
format_runtime_broker_metrics_targets(&runtime_metrics_targets),
),
("Profiles".to_string(), state.profiles.len().to_string()),
(
"Active profile".to_string(),
state.active_profile.as_deref().unwrap_or("-").to_string(),
),
];
print_panel("Doctor", &summary_fields);
if args.runtime {
println!();
print_panel("Runtime Proxy", &runtime_doctor_fields());
}
if state.profiles.is_empty() {
return Ok(());
}
for report in collect_doctor_profile_reports(&state, args.quota) {
let kind = if report.summary.managed {
"managed"
} else {
"external"
};
println!();
let mut fields = vec![
(
"Current".to_string(),
if report.summary.active {
"Yes".to_string()
} else {
"No".to_string()
},
),
("Kind".to_string(), kind.to_string()),
("Auth".to_string(), report.summary.auth.label),
(
"Email".to_string(),
report.summary.email.as_deref().unwrap_or("-").to_string(),
),
(
"Path".to_string(),
report.summary.codex_home.display().to_string(),
),
(
"Exists".to_string(),
if report.summary.codex_home.exists() {
"Yes".to_string()
} else {
"No".to_string()
},
),
];
if let Some(quota) = report.quota {
match quota {
Ok(usage) => {
let blocked = collect_blocked_limits(&usage, false);
fields.push((
"Quota".to_string(),
if blocked.is_empty() {
"Ready".to_string()
} else {
format!("Blocked ({})", format_blocked_limits(&blocked))
},
));
fields.push(("Main".to_string(), format_main_windows(&usage)));
}
Err(err) => {
fields.push((
"Quota".to_string(),
format!("Error ({})", first_line_of_error(&err.to_string())),
));
}
}
}
print_panel(&format!("Profile {}", report.summary.name), &fields);
}
Ok(())
}
fn handle_audit(args: AuditArgs) -> Result<()> {
let query = AuditLogQuery {
tail: args.tail,
component: args.component.as_deref().map(str::trim).map(str::to_string),
action: args.action.as_deref().map(str::trim).map(str::to_string),
outcome: args.outcome.as_deref().map(str::trim).map(str::to_string),
};
let events = read_recent_audit_events(&query)?;
if args.json {
let json = serde_json::to_string_pretty(&serde_json::json!({
"audit_logs": audit_logs_json_value(),
"filters": {
"tail": query.tail,
"component": query.component,
"action": query.action,
"outcome": query.outcome,
},
"events": events,
}))
.context("failed to serialize audit log output")?;
println!("{json}");
return Ok(());
}
println!(
"{}",
render_audit_events_human(&audit_log_path(), &query, &events)
);
Ok(())
}
fn handle_cleanup() -> Result<()> {
let paths = AppPaths::discover()?;
let state = AppState::load(&paths)?;
let runtime_log_dir = runtime_proxy_log_dir();
let summary = perform_prodex_cleanup(&paths, &state)?;
let fields = vec![
("Prodex root".to_string(), paths.root.display().to_string()),
(
"Runtime logs".to_string(),
format!(
"{} removed from {}",
summary.runtime_logs_removed,
runtime_log_dir.display()
),
),
(
"Runtime pointer".to_string(),
if summary.stale_runtime_log_pointer_removed > 0 {
"removed stale latest-pointer file".to_string()
} else {
"clean".to_string()
},
),
(
"Temp login homes".to_string(),
summary.stale_login_dirs_removed.to_string(),
),
(
"Orphan managed homes".to_string(),
summary.orphan_managed_profile_dirs_removed.to_string(),
),
(
"Dead broker leases".to_string(),
summary.dead_runtime_broker_leases_removed.to_string(),
),
(
"Dead broker registries".to_string(),
summary.dead_runtime_broker_registries_removed.to_string(),
),
(
"Total removed".to_string(),
summary.total_removed().to_string(),
),
];
print_panel("Cleanup", &fields);
Ok(())
}
fn deserialize_null_default<'de, D, T>(deserializer: D) -> std::result::Result<T, D::Error>
where
D: serde::Deserializer<'de>,
T: serde::Deserialize<'de> + Default,
{
Ok(Option::<T>::deserialize(deserializer)?.unwrap_or_default())
}
fn handle_quota(args: QuotaArgs) -> Result<()> {
let paths = AppPaths::discover()?;
let state = AppState::load(&paths)?;
if args.all {
if state.profiles.is_empty() {
bail!("no profiles configured");
}
if quota_watch_enabled(&args) {
return watch_all_quotas(&paths, args.base_url.as_deref(), args.detail);
}
let reports = collect_quota_reports(&state, args.base_url.as_deref());
print_quota_reports(&reports, args.detail);
return Ok(());
}
let profile_name = resolve_profile_name(&state, args.profile.as_deref())?;
let codex_home = state
.profiles
.get(&profile_name)
.with_context(|| format!("profile '{}' is missing", profile_name))?
.codex_home
.clone();
if args.raw {
let usage = fetch_usage_json(&codex_home, args.base_url.as_deref())?;
println!(
"{}",
serde_json::to_string_pretty(&usage).context("failed to render usage JSON")?
);
return Ok(());
}
if quota_watch_enabled(&args) {
return watch_quota(&profile_name, &codex_home, args.base_url.as_deref());
}
let usage = fetch_usage(&codex_home, args.base_url.as_deref())?;
println!("{}", render_profile_quota(&profile_name, &usage));
Ok(())
}
fn handle_run(args: RunArgs) -> Result<()> {
let codex_args = normalize_run_codex_args(&args.codex_args);
let allow_auto_rotate = !args.no_auto_rotate;
let include_code_review = is_review_invocation(&codex_args);
let prepared = prepare_runtime_launch(RuntimeLaunchRequest {
profile: args.profile.as_deref(),
allow_auto_rotate,
skip_quota_check: args.skip_quota_check,
base_url: args.base_url.as_deref(),
include_code_review,
force_runtime_proxy: false,
})?;
let runtime_proxy = prepared.runtime_proxy;
let runtime_args = runtime_proxy
.as_ref()
.map(|proxy| {
if proxy.openai_mount_path == RUNTIME_PROXY_OPENAI_MOUNT_PATH {
runtime_proxy_codex_args(proxy.listen_addr, &codex_args)
} else {
runtime_proxy_codex_args_with_mount_path(
proxy.listen_addr,
&proxy.openai_mount_path,
&codex_args,
)
}
})
.unwrap_or(codex_args);
let status = run_child(
&codex_bin(),
&runtime_args,
&prepared.codex_home,
&[],
&[],
runtime_proxy.as_ref(),
)?;
// `std::process::exit` does not run destructors, so release the broker lease
// before mirroring Codex's exit status back to the caller.
drop(runtime_proxy);
exit_with_status(status)
}
fn handle_claude(args: ClaudeArgs) -> Result<()> {
let prepared = prepare_runtime_launch(RuntimeLaunchRequest {
profile: args.profile.as_deref(),
allow_auto_rotate: !args.no_auto_rotate,
skip_quota_check: args.skip_quota_check,
base_url: args.base_url.as_deref(),
include_code_review: false,
force_runtime_proxy: true,
})?;
let runtime_proxy = prepared
.runtime_proxy
.context("Claude Code launch requires a local runtime proxy")?;
let claude_bin = claude_bin();
let claude_config_dir = prepare_runtime_proxy_claude_config_dir(
&prepared.paths,
&prepared.codex_home,
prepared.managed,
)?;
let current_dir =
env::current_dir().context("failed to determine current directory for Claude Code")?;
let claude_version = runtime_proxy_claude_binary_version(&claude_bin);
ensure_runtime_proxy_claude_launch_config(
&claude_config_dir,
¤t_dir,
claude_version.as_deref(),
)?;
let extra_env = runtime_proxy_claude_launch_env(
runtime_proxy.listen_addr,
&claude_config_dir,
&prepared.codex_home,
);
let status = run_child(
&claude_bin,
&args.claude_args,
&prepared.codex_home,
&extra_env,
runtime_proxy_claude_removed_env(),
Some(&runtime_proxy),
)?;
drop(runtime_proxy);
exit_with_status(status)
}
fn runtime_proxy_claude_launch_env(
listen_addr: std::net::SocketAddr,
config_dir: &Path,
codex_home: &Path,
) -> Vec<(&'static str, OsString)> {
let target_model = runtime_proxy_claude_launch_model(codex_home);
let base_url = format!("http://{listen_addr}");
let mut env = vec![
("CLAUDE_CONFIG_DIR", OsString::from(config_dir.as_os_str())),
("ANTHROPIC_BASE_URL", OsString::from(base_url.as_str())),
(
"ANTHROPIC_AUTH_TOKEN",
OsString::from(PRODEX_CLAUDE_PROXY_API_KEY),
),
(
"ANTHROPIC_MODEL",
OsString::from(runtime_proxy_claude_picker_model(&target_model)),
),
];
if runtime_proxy_claude_use_foundry_compat() {
env.push(("CLAUDE_CODE_USE_FOUNDRY", OsString::from("1")));
env.push(("ANTHROPIC_FOUNDRY_BASE_URL", OsString::from(base_url)));
env.push((
"ANTHROPIC_FOUNDRY_API_KEY",
OsString::from(PRODEX_CLAUDE_PROXY_API_KEY),
));
}
env.extend(runtime_proxy_claude_pinned_alias_env());
env.extend(runtime_proxy_claude_custom_model_option_env(&target_model));
env
}
fn runtime_proxy_claude_removed_env() -> &'static [&'static str] {
&[
"ANTHROPIC_API_KEY",
"CLAUDE_CODE_OAUTH_TOKEN",
"CLAUDE_CODE_OAUTH_TOKEN_FILE_DESCRIPTOR",
"CLAUDE_CODE_USE_BEDROCK",
"CLAUDE_CODE_USE_VERTEX",
"CLAUDE_CODE_USE_FOUNDRY",
"CLAUDE_CODE_USE_ANTHROPIC_AWS",
"ANTHROPIC_BEDROCK_BASE_URL",
"ANTHROPIC_VERTEX_BASE_URL",
"ANTHROPIC_FOUNDRY_BASE_URL",
"ANTHROPIC_AWS_BASE_URL",
"ANTHROPIC_FOUNDRY_RESOURCE",
"ANTHROPIC_VERTEX_PROJECT_ID",
"ANTHROPIC_AWS_WORKSPACE_ID",
"CLOUD_ML_REGION",
"ANTHROPIC_FOUNDRY_API_KEY",
"ANTHROPIC_AWS_API_KEY",
"CLAUDE_CODE_SKIP_BEDROCK_AUTH",
"CLAUDE_CODE_SKIP_VERTEX_AUTH",
"CLAUDE_CODE_SKIP_FOUNDRY_AUTH",
"CLAUDE_CODE_SKIP_ANTHROPIC_AWS_AUTH",
"ANTHROPIC_DEFAULT_OPUS_MODEL",
"ANTHROPIC_DEFAULT_OPUS_MODEL_NAME",
"ANTHROPIC_DEFAULT_OPUS_MODEL_DESCRIPTION",
"ANTHROPIC_DEFAULT_OPUS_MODEL_SUPPORTED_CAPABILITIES",
"ANTHROPIC_DEFAULT_SONNET_MODEL",
"ANTHROPIC_DEFAULT_SONNET_MODEL_NAME",
"ANTHROPIC_DEFAULT_SONNET_MODEL_DESCRIPTION",
"ANTHROPIC_DEFAULT_SONNET_MODEL_SUPPORTED_CAPABILITIES",
"ANTHROPIC_DEFAULT_HAIKU_MODEL",
"ANTHROPIC_DEFAULT_HAIKU_MODEL_NAME",
"ANTHROPIC_DEFAULT_HAIKU_MODEL_DESCRIPTION",
"ANTHROPIC_DEFAULT_HAIKU_MODEL_SUPPORTED_CAPABILITIES",
"ANTHROPIC_CUSTOM_MODEL_OPTION",
"ANTHROPIC_CUSTOM_MODEL_OPTION_NAME",
"ANTHROPIC_CUSTOM_MODEL_OPTION_DESCRIPTION",
]
}
fn runtime_proxy_claude_model_override() -> Option<String> {
env::var("PRODEX_CLAUDE_MODEL")
.ok()
.map(|value| value.trim().to_string())
.filter(|value| !value.is_empty())
}
fn parse_toml_string_assignment(contents: &str, key: &str) -> Option<String> {
for raw_line in contents.lines() {
let line = raw_line.trim();
if line.is_empty() || line.starts_with('#') {
continue;
}
let Some(rest) = line.strip_prefix(key) else {
continue;
};
let rest = rest.trim_start();
let rest = rest.strip_prefix('=')?.trim_start();
let rest = rest.strip_prefix('"')?;
let mut value = String::new();
let mut escaped = false;
for ch in rest.chars() {
if escaped {
value.push(match ch {
'n' => '\n',
'r' => '\r',
't' => '\t',
'"' => '"',
'\\' => '\\',
other => other,
});
escaped = false;
continue;
}
match ch {
'\\' => escaped = true,
'"' => return Some(value),
other => value.push(other),
}
}
}
None
}
fn runtime_proxy_claude_config_value(codex_home: &Path, key: &str) -> Option<String> {
let contents = fs::read_to_string(codex_home.join("config.toml")).ok()?;
parse_toml_string_assignment(&contents, key).filter(|value| !value.trim().is_empty())
}
fn runtime_proxy_normalize_responses_reasoning_effort(effort: &str) -> Option<&'static str> {
match effort.trim().to_ascii_lowercase().as_str() {
"minimal" => Some("minimal"),
"low" => Some("low"),
"medium" => Some("medium"),
"high" => Some("high"),
"xhigh" => Some("xhigh"),
"none" => Some("none"),
// Claude Code exposes `max`; treat it as the strongest explicit upstream tier.
"max" => Some("xhigh"),
_ => None,
}
}
fn runtime_proxy_claude_reasoning_effort_override() -> Option<String> {
env::var("PRODEX_CLAUDE_REASONING_EFFORT")
.ok()
.and_then(|value| {
runtime_proxy_normalize_responses_reasoning_effort(value.trim()).map(str::to_string)
})
}
fn runtime_proxy_claude_launch_model(codex_home: &Path) -> String {
runtime_proxy_claude_model_override()
.or_else(|| runtime_proxy_claude_config_value(codex_home, "model"))
.unwrap_or_else(|| DEFAULT_PRODEX_CLAUDE_MODEL.to_string())
}
fn runtime_proxy_claude_alias_env_keys(
alias: RuntimeProxyClaudeModelAlias,
) -> (&'static str, &'static str, &'static str, &'static str) {
match alias {
RuntimeProxyClaudeModelAlias::Opus => (
"ANTHROPIC_DEFAULT_OPUS_MODEL",
"ANTHROPIC_DEFAULT_OPUS_MODEL_NAME",
"ANTHROPIC_DEFAULT_OPUS_MODEL_DESCRIPTION",
"ANTHROPIC_DEFAULT_OPUS_MODEL_SUPPORTED_CAPABILITIES",
),
RuntimeProxyClaudeModelAlias::Sonnet => (
"ANTHROPIC_DEFAULT_SONNET_MODEL",
"ANTHROPIC_DEFAULT_SONNET_MODEL_NAME",
"ANTHROPIC_DEFAULT_SONNET_MODEL_DESCRIPTION",
"ANTHROPIC_DEFAULT_SONNET_MODEL_SUPPORTED_CAPABILITIES",
),
RuntimeProxyClaudeModelAlias::Haiku => (
"ANTHROPIC_DEFAULT_HAIKU_MODEL",
"ANTHROPIC_DEFAULT_HAIKU_MODEL_NAME",
"ANTHROPIC_DEFAULT_HAIKU_MODEL_DESCRIPTION",
"ANTHROPIC_DEFAULT_HAIKU_MODEL_SUPPORTED_CAPABILITIES",
),
}
}
fn runtime_proxy_claude_alias_model(
alias: RuntimeProxyClaudeModelAlias,
) -> &'static RuntimeProxyResponsesModelDescriptor {
runtime_proxy_responses_model_descriptors()
.iter()
.find(|descriptor| descriptor.claude_alias == Some(alias))
.expect("Claude alias model should exist")
}
fn runtime_proxy_claude_picker_model_descriptor(
picker_model: &str,
) -> Option<&'static RuntimeProxyResponsesModelDescriptor> {
let normalized = picker_model.trim();
let without_extended_context = normalized.strip_suffix("[1m]").unwrap_or(normalized);
runtime_proxy_responses_model_descriptor(without_extended_context).or_else(|| {
runtime_proxy_responses_model_descriptors()
.iter()
.find(|descriptor| {
descriptor
.claude_picker_model
.is_some_and(|value| value.eq_ignore_ascii_case(without_extended_context))
|| descriptor.claude_alias.is_some_and(|alias| {
runtime_proxy_claude_alias_picker_value(alias)
.eq_ignore_ascii_case(without_extended_context)
})
})
})
}
fn runtime_proxy_responses_model_descriptor(
model_id: &str,
) -> Option<&'static RuntimeProxyResponsesModelDescriptor> {
runtime_proxy_responses_model_descriptors()
.iter()
.find(|descriptor| descriptor.id.eq_ignore_ascii_case(model_id))
}
fn runtime_proxy_responses_model_capabilities(model_id: &str) -> &'static str {
if runtime_proxy_responses_model_supports_xhigh(model_id) {
"effort,max_effort,thinking,adaptive_thinking,interleaved_thinking"
} else {
"effort,thinking,adaptive_thinking,interleaved_thinking"
}
}
fn runtime_proxy_responses_model_supported_effort_levels(
model_id: &str,
) -> &'static [&'static str] {
if runtime_proxy_responses_model_supports_xhigh(model_id) {
&["low", "medium", "high", "max"]
} else {
&["low", "medium", "high"]
}
}
fn runtime_proxy_responses_model_supports_xhigh(model_id: &str) -> bool {
runtime_proxy_responses_model_descriptor(model_id)
.map(|descriptor| descriptor.supports_xhigh)
.unwrap_or_else(|| {
matches!(
model_id.trim().to_ascii_lowercase().as_str(),
value
if value.starts_with("gpt-5.2")
|| value.starts_with("gpt-5.3")
|| value.starts_with("gpt-5.4")
)
})
}
fn runtime_proxy_claude_use_foundry_compat() -> bool {
true
}
fn runtime_proxy_claude_alias_picker_value(alias: RuntimeProxyClaudeModelAlias) -> &'static str {
match alias {
RuntimeProxyClaudeModelAlias::Opus => "opus",
RuntimeProxyClaudeModelAlias::Sonnet => "sonnet",
RuntimeProxyClaudeModelAlias::Haiku => "haiku",
}
}
fn runtime_proxy_claude_pinned_alias_env() -> Vec<(&'static str, OsString)> {
let mut env = Vec::new();
for alias in [
RuntimeProxyClaudeModelAlias::Opus,
RuntimeProxyClaudeModelAlias::Sonnet,
RuntimeProxyClaudeModelAlias::Haiku,
] {
let descriptor = runtime_proxy_claude_alias_model(alias);
let (model_key, name_key, description_key, caps_key) =
runtime_proxy_claude_alias_env_keys(alias);
env.push((model_key, OsString::from(descriptor.id)));
env.push((name_key, OsString::from(descriptor.id)));
env.push((description_key, OsString::from(descriptor.description)));
env.push((
caps_key,
OsString::from(runtime_proxy_responses_model_capabilities(descriptor.id)),
));
}
env
}
fn runtime_proxy_claude_picker_model(target_model: &str) -> String {
runtime_proxy_responses_model_descriptor(target_model)
.map(|descriptor| {
if runtime_proxy_claude_use_foundry_compat() {
descriptor
.claude_alias
.map(runtime_proxy_claude_alias_picker_value)
.unwrap_or(descriptor.id)
} else {
descriptor.id
}
})
.unwrap_or(target_model)
.to_string()
}
fn runtime_proxy_claude_custom_model_option_env(
target_model: &str,
) -> Vec<(&'static str, OsString)> {
if runtime_proxy_responses_model_descriptor(target_model).is_some() {
return Vec::new();
}
let descriptor = runtime_proxy_responses_model_descriptor(target_model);
let display_name = descriptor
.map(|descriptor| descriptor.display_name)
.unwrap_or(target_model);
let description = descriptor
.map(|descriptor| descriptor.description.to_string())
.unwrap_or_else(|| format!("Custom OpenAI model routed through prodex ({target_model})"));
vec![
(
"ANTHROPIC_CUSTOM_MODEL_OPTION",
OsString::from(target_model),
),
(
"ANTHROPIC_CUSTOM_MODEL_OPTION_NAME",
OsString::from(display_name),
),
(
"ANTHROPIC_CUSTOM_MODEL_OPTION_DESCRIPTION",
OsString::from(description),
),
]
}
fn runtime_proxy_claude_additional_model_option_entries() -> Vec<serde_json::Value> {
runtime_proxy_responses_model_descriptors()
.iter()
.filter(|descriptor| {
!(runtime_proxy_claude_use_foundry_compat() && descriptor.claude_alias.is_some())
})
.map(|descriptor| {
let supported_effort_levels =
runtime_proxy_responses_model_supported_effort_levels(descriptor.id);
serde_json::json!({
"value": descriptor.id,
"label": descriptor.id,
"description": descriptor.description,
"supportsEffort": true,
"supportedEffortLevels": supported_effort_levels,
})
})
.collect()
}
fn runtime_proxy_claude_managed_model_option_value(value: &str) -> bool {
runtime_proxy_claude_picker_model_descriptor(value).is_some()
}
fn runtime_proxy_claude_config_dir(codex_home: &Path) -> PathBuf {
codex_home.join(PRODEX_CLAUDE_CONFIG_DIR_NAME)
}
fn runtime_proxy_shared_claude_config_dir(paths: &AppPaths) -> PathBuf {
paths.root.join(PRODEX_SHARED_CLAUDE_DIR_NAME)
}
fn runtime_proxy_claude_config_path(config_dir: &Path) -> PathBuf {
config_dir.join(DEFAULT_CLAUDE_CONFIG_FILE_NAME)
}
fn runtime_proxy_claude_legacy_import_marker_path(config_dir: &Path) -> PathBuf {
config_dir.join(PRODEX_CLAUDE_LEGACY_IMPORT_MARKER_NAME)
}
fn legacy_default_claude_config_dir() -> Result<PathBuf> {
Ok(home_dir()
.context("failed to determine home directory")?
.join(DEFAULT_CLAUDE_CONFIG_DIR_NAME))
}
fn legacy_default_claude_config_path() -> Result<PathBuf> {
Ok(home_dir()
.context("failed to determine home directory")?
.join(DEFAULT_CLAUDE_CONFIG_FILE_NAME))
}
fn prepare_runtime_proxy_claude_config_dir(
paths: &AppPaths,
codex_home: &Path,
managed: bool,
) -> Result<PathBuf> {
let profile_dir = runtime_proxy_claude_config_dir(codex_home);
if !managed {
prepare_runtime_proxy_claude_import_target(&profile_dir)?;
return Ok(profile_dir);
}
let shared_dir = runtime_proxy_shared_claude_config_dir(paths);
prepare_runtime_proxy_claude_import_target(&shared_dir)?;
migrate_runtime_proxy_claude_profile_dir_to_target(&profile_dir, &shared_dir)?;
ensure_runtime_proxy_claude_profile_link(&profile_dir, &shared_dir)?;
Ok(profile_dir)
}
fn prepare_runtime_proxy_claude_import_target(target_dir: &Path) -> Result<()> {
create_codex_home_if_missing(target_dir)?;
maybe_import_runtime_proxy_claude_legacy_home(target_dir)
}
fn maybe_import_runtime_proxy_claude_legacy_home(target_dir: &Path) -> Result<()> {
let marker_path = runtime_proxy_claude_legacy_import_marker_path(target_dir);
if marker_path.exists() {
return Ok(());
}
let mut imported = false;
if let Ok(legacy_dir) = legacy_default_claude_config_dir()
&& legacy_dir.is_dir()
{
merge_runtime_proxy_claude_directory_contents(&legacy_dir, target_dir)?;
imported = true;
}
if let Ok(legacy_config_path) = legacy_default_claude_config_path()
&& legacy_config_path.is_file()
{
merge_runtime_proxy_claude_file(
&legacy_config_path,
&runtime_proxy_claude_config_path(target_dir),
)?;
imported = true;
}
if imported {
fs::write(&marker_path, "imported\n").with_context(|| {
format!(
"failed to write Claude legacy import marker at {}",
marker_path.display()
)
})?;
}
Ok(())
}
fn migrate_runtime_proxy_claude_profile_dir_to_target(
profile_dir: &Path,
target_dir: &Path,
) -> Result<()> {
if same_path(profile_dir, target_dir) {
return Ok(());
}
let metadata = match fs::symlink_metadata(profile_dir) {
Ok(metadata) => metadata,
Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(()),
Err(err) => {
return Err(err)
.with_context(|| format!("failed to inspect {}", profile_dir.display()));
}
};
if metadata.file_type().is_symlink() {
let source_dir = runtime_proxy_resolve_symlink_target(profile_dir)?;
if !source_dir.exists() || same_path(&source_dir, target_dir) {
return Ok(());
}
if !source_dir.is_dir() {
bail!(
"expected {} to point to a Claude config directory",
profile_dir.display()
);
}
merge_runtime_proxy_claude_directory_contents(&source_dir, target_dir)?;
runtime_proxy_remove_path(profile_dir)?;
return Ok(());
}
if !metadata.is_dir() {
bail!(
"expected {} to be a Claude config directory",
profile_dir.display()
);
}
merge_runtime_proxy_claude_directory_contents(profile_dir, target_dir)?;
fs::remove_dir_all(profile_dir)
.with_context(|| format!("failed to remove {}", profile_dir.display()))?;
Ok(())
}
fn ensure_runtime_proxy_claude_profile_link(link_path: &Path, target_dir: &Path) -> Result<()> {
if same_path(link_path, target_dir) {
return Ok(());
}
match fs::symlink_metadata(link_path) {
Ok(metadata) => {
if metadata.file_type().is_symlink() {
let existing_target = runtime_proxy_resolve_symlink_target(link_path)?;
if same_path(&existing_target, target_dir) {
return Ok(());
}
}
runtime_proxy_remove_path(link_path)?;
}
Err(err) if err.kind() == io::ErrorKind::NotFound => {}
Err(err) => {
return Err(err).with_context(|| format!("failed to inspect {}", link_path.display()));
}
}
runtime_proxy_create_directory_symlink(target_dir, link_path)
}
fn merge_runtime_proxy_claude_directory_contents(source: &Path, destination: &Path) -> Result<()> {
if same_path(source, destination) {
return Ok(());
}
create_codex_home_if_missing(destination)?;
for entry in fs::read_dir(source)
.with_context(|| format!("failed to read directory {}", source.display()))?
{
let entry =
entry.with_context(|| format!("failed to read entry in {}", source.display()))?;
let source_path = entry.path();
let destination_path = destination.join(entry.file_name());
let file_type = entry
.file_type()
.with_context(|| format!("failed to inspect {}", source_path.display()))?;
if file_type.is_dir() {
merge_runtime_proxy_claude_directory_contents(&source_path, &destination_path)?;
} else if file_type.is_file() {
merge_runtime_proxy_claude_file(&source_path, &destination_path)?;
} else if file_type.is_symlink() {
merge_runtime_proxy_claude_symlink(&source_path, &destination_path)?;
}
}
Ok(())
}
fn merge_runtime_proxy_claude_file(source: &Path, destination: &Path) -> Result<()> {
if same_path(source, destination) {
return Ok(());
}
if let Some(parent) = destination.parent() {
fs::create_dir_all(parent)
.with_context(|| format!("failed to create {}", parent.display()))?;
}
if !destination.exists() {
fs::copy(source, destination).with_context(|| {
format!(
"failed to copy Claude state file {} to {}",
source.display(),
destination.display()
)
})?;
return Ok(());
}
if destination.is_dir() {
bail!(
"expected {} to be a file for Claude state",
destination.display()
);
}
let file_name = source.file_name().and_then(|name| name.to_str());
if file_name == Some(DEFAULT_CLAUDE_CONFIG_FILE_NAME) {
return merge_runtime_proxy_claude_json_file(source, destination);
}
if source
.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| ext.eq_ignore_ascii_case("jsonl"))
{
return merge_runtime_proxy_claude_jsonl_file(source, destination);
}
Ok(())
}
fn merge_runtime_proxy_claude_json_file(source: &Path, destination: &Path) -> Result<()> {
let source_raw = fs::read_to_string(source)
.with_context(|| format!("failed to read {}", source.display()))?;
let destination_raw = fs::read_to_string(destination)
.with_context(|| format!("failed to read {}", destination.display()))?;
let source_value = match serde_json::from_str::<serde_json::Value>(&source_raw) {
Ok(value) => value,
Err(_) => return Ok(()),
};
let mut destination_value = match serde_json::from_str::<serde_json::Value>(&destination_raw) {
Ok(value) => value,
Err(_) => return Ok(()),
};
runtime_proxy_merge_json_defaults(&mut destination_value, &source_value);
let rendered = serde_json::to_string_pretty(&destination_value)
.context("failed to render merged Claude config")?;
fs::write(destination, rendered)
.with_context(|| format!("failed to write {}", destination.display()))
}
fn runtime_proxy_merge_json_defaults(
destination: &mut serde_json::Value,
source_defaults: &serde_json::Value,
) {
if destination.is_null() {
*destination = source_defaults.clone();
return;
}
if let (Some(destination), Some(source_defaults)) =
(destination.as_object_mut(), source_defaults.as_object())
{
for (key, source_value) in source_defaults {
if let Some(destination_value) = destination.get_mut(key) {
runtime_proxy_merge_json_defaults(destination_value, source_value);
} else {
destination.insert(key.clone(), source_value.clone());
}
}
}
}
fn merge_runtime_proxy_claude_jsonl_file(source: &Path, destination: &Path) -> Result<()> {
fn load_jsonl_lines(
path: &Path,
merged: &mut Vec<String>,
seen: &mut BTreeSet<String>,
) -> Result<()> {
let content = fs::read_to_string(path)
.with_context(|| format!("failed to read {}", path.display()))?;
for raw_line in content.lines() {
let line = raw_line.trim_end_matches('\r');
if line.is_empty() || !seen.insert(line.to_string()) {
continue;
}
merged.push(line.to_string());
}
Ok(())
}
let mut merged = Vec::new();
let mut seen = BTreeSet::new();
load_jsonl_lines(destination, &mut merged, &mut seen)?;
load_jsonl_lines(source, &mut merged, &mut seen)?;
fs::write(destination, merged.join("\n"))
.with_context(|| format!("failed to write {}", destination.display()))
}
fn merge_runtime_proxy_claude_symlink(source: &Path, destination: &Path) -> Result<()> {
if destination.exists() || fs::symlink_metadata(destination).is_ok() {
return Ok(());
}
let target = fs::read_link(source)
.with_context(|| format!("failed to read symlink {}", source.display()))?;
runtime_proxy_create_symlink(&target, destination, true)
}
fn runtime_proxy_resolve_symlink_target(path: &Path) -> Result<PathBuf> {
let target = fs::read_link(path)
.with_context(|| format!("failed to read symlink {}", path.display()))?;
Ok(if target.is_absolute() {
target
} else {
path.parent().unwrap_or_else(|| Path::new(".")).join(target)
})
}
fn runtime_proxy_remove_path(path: &Path) -> Result<()> {
let metadata = fs::symlink_metadata(path)
.with_context(|| format!("failed to inspect {}", path.display()))?;
let file_type = metadata.file_type();
if file_type.is_symlink() {
fs::remove_file(path)
.or_else(|_| fs::remove_dir(path))
.with_context(|| format!("failed to remove symbolic link {}", path.display()))?;
return Ok(());
}
if metadata.is_dir() {
fs::remove_dir_all(path).with_context(|| format!("failed to remove {}", path.display()))
} else {
fs::remove_file(path).with_context(|| format!("failed to remove {}", path.display()))
}
}
fn runtime_proxy_create_directory_symlink(target: &Path, link: &Path) -> Result<()> {
runtime_proxy_create_symlink(target, link, true)
}
fn runtime_proxy_create_symlink(target: &Path, link: &Path, is_dir: bool) -> Result<()> {
if let Some(parent) = link.parent() {
fs::create_dir_all(parent)
.with_context(|| format!("failed to create {}", parent.display()))?;
}
#[cfg(unix)]
{
let _ = is_dir;
std::os::unix::fs::symlink(target, link).with_context(|| {
format!(
"failed to link Claude state {} -> {}",
link.display(),
target.display()
)
})?;
}
#[cfg(windows)]
{
if is_dir {
std::os::windows::fs::symlink_dir(target, link)
} else {
std::os::windows::fs::symlink_file(target, link)
}
.with_context(|| {
format!(
"failed to link Claude state {} -> {}",
link.display(),
target.display()
)
})?;
}
#[cfg(not(any(unix, windows)))]
{
let _ = is_dir;
bail!("Claude state links are not supported on this platform");
}
Ok(())
}
fn runtime_proxy_claude_binary_version(binary: &OsString) -> Option<String> {
let output = Command::new(binary).arg("--version").output().ok()?;
if !output.status.success() {
return None;
}
parse_runtime_proxy_claude_version_text(&String::from_utf8_lossy(&output.stdout)).or_else(
|| parse_runtime_proxy_claude_version_text(&String::from_utf8_lossy(&output.stderr)),
)
}
fn parse_runtime_proxy_claude_version_text(text: &str) -> Option<String> {
text.split_whitespace()
.find(|token| token.chars().next().is_some_and(|ch| ch.is_ascii_digit()))
.map(str::to_string)
}
fn ensure_runtime_proxy_claude_launch_config(
config_dir: &Path,
cwd: &Path,
claude_version: Option<&str>,
) -> Result<()> {
fs::create_dir_all(config_dir).with_context(|| {
format!(
"failed to create Claude Code config dir at {}",
config_dir.display()
)
})?;
let config_path = runtime_proxy_claude_config_path(config_dir);
let raw = fs::read_to_string(&config_path).ok();
let mut config = raw
.as_deref()
.and_then(|value| serde_json::from_str::<serde_json::Value>(value).ok())
.unwrap_or_else(|| serde_json::json!({}));
if !config.is_object() {
config = serde_json::json!({});
}
let object = config
.as_object_mut()
.expect("Claude Code config should be normalized to an object");
let num_startups = object
.get("numStartups")
.and_then(serde_json::Value::as_u64)
.unwrap_or(0)
.max(1);
object.insert("numStartups".to_string(), serde_json::json!(num_startups));
object.insert(
"hasCompletedOnboarding".to_string(),
serde_json::json!(true),
);
if let Some(version) = claude_version {
object.insert(
"lastOnboardingVersion".to_string(),
serde_json::json!(version),
);
}
let mut additional_model_options = runtime_proxy_claude_additional_model_option_entries();
if let Some(existing) = object
.get("additionalModelOptionsCache")
.and_then(serde_json::Value::as_array)
{
for entry in existing {
let existing_value = entry.get("value").and_then(serde_json::Value::as_str);
if existing_value.is_some_and(runtime_proxy_claude_managed_model_option_value) {
continue;
}
additional_model_options.push(entry.clone());
}
}
object.insert(
"additionalModelOptionsCache".to_string(),
serde_json::Value::Array(additional_model_options),
);
let projects = object
.entry("projects".to_string())
.or_insert_with(|| serde_json::json!({}));
if !projects.is_object() {
*projects = serde_json::json!({});
}
let projects = projects
.as_object_mut()
.expect("Claude Code projects config should be an object");
let project_key = cwd.to_string_lossy().into_owned();
let project = projects
.entry(project_key)
.or_insert_with(|| serde_json::json!({}));
if !project.is_object() {
*project = serde_json::json!({});
}
let project = project
.as_object_mut()
.expect("Claude Code project config should be an object");
project.insert(
"hasTrustDialogAccepted".to_string(),
serde_json::json!(true),
);
let project_onboarding_seen_count = project
.get("projectOnboardingSeenCount")
.and_then(serde_json::Value::as_u64)
.unwrap_or(0)
.max(1);
project.insert(
"projectOnboardingSeenCount".to_string(),
serde_json::json!(project_onboarding_seen_count),
);
for key in [
"allowedTools",
"mcpContextUris",
"enabledMcpjsonServers",
"disabledMcpjsonServers",
"exampleFiles",
] {
if !project.get(key).is_some_and(serde_json::Value::is_array) {
project.insert(key.to_string(), serde_json::json!([]));
}
}
if !project
.get("mcpServers")
.is_some_and(serde_json::Value::is_object)
{
project.insert("mcpServers".to_string(), serde_json::json!({}));
}
project.insert(
"hasClaudeMdExternalIncludesApproved".to_string(),
serde_json::json!(
project
.get("hasClaudeMdExternalIncludesApproved")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false)
),
);
project.insert(
"hasClaudeMdExternalIncludesWarningShown".to_string(),
serde_json::json!(
project
.get("hasClaudeMdExternalIncludesWarningShown")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false)
),
);
let rendered =
serde_json::to_string_pretty(&config).context("failed to render Claude Code config")?;
fs::write(&config_path, rendered).with_context(|| {
format!(
"failed to write Claude Code config at {}",
config_path.display()
)
})?;
Ok(())
}
fn prepare_runtime_launch(request: RuntimeLaunchRequest<'_>) -> Result<PreparedRuntimeLaunch> {
let paths = AppPaths::discover()?;
let mut state = AppState::load(&paths)?;
let profile_name = resolve_profile_name(&state, request.profile)?;
let mut selected_profile_name = profile_name.clone();
let explicit_profile_requested = request.profile.is_some();
let allow_auto_rotate = request.allow_auto_rotate;
let include_code_review = request.include_code_review;
let mut codex_home = state
.profiles
.get(&profile_name)
.with_context(|| format!("profile '{}' is missing", profile_name))?
.codex_home
.clone();
if !request.skip_quota_check {
if allow_auto_rotate && !explicit_profile_requested && state.profiles.len() > 1 {
let current_report = probe_run_profile(&state, &profile_name, 0, request.base_url)?;
if !run_profile_probe_is_ready(¤t_report, include_code_review) {
let persisted_usage_snapshots =
load_runtime_usage_snapshots(&paths, &state.profiles).unwrap_or_default();
let reports = run_preflight_reports_with_current_first(
&state,
&profile_name,
current_report,
request.base_url,
);
let ready_candidates = ready_profile_candidates(
&reports,
include_code_review,
Some(&profile_name),
&state,
Some(&persisted_usage_snapshots),
);
let selected_report = reports.iter().find(|report| report.name == profile_name);
if let Some(best_candidate) = ready_candidates.first() {
if best_candidate.name != profile_name {
print_wrapped_stderr(§ion_header("Quota Preflight"));
let mut selection_message = format!(
"Using profile '{}' ({})",
best_candidate.name,
format_main_windows_compact(&best_candidate.usage)
);
if let Some(report) = selected_report {
match &report.result {
Ok(usage) => {
let blocked =
collect_blocked_limits(usage, include_code_review);
if !blocked.is_empty() {
print_wrapped_stderr(&format!(
"Quota preflight blocked profile '{}': {}",
profile_name,
format_blocked_limits(&blocked)
));
selection_message = format!(
"Auto-rotating to profile '{}' using quota-pressure scoring ({}).",
best_candidate.name,
format_main_windows_compact(&best_candidate.usage)
);
} else {
selection_message = format!(
"Auto-selecting profile '{}' over active profile '{}' using quota-pressure scoring ({}).",
best_candidate.name,
profile_name,
format_main_windows_compact(&best_candidate.usage)
);
}
}
Err(err) => {
print_wrapped_stderr(&format!(
"Warning: quota preflight failed for '{}': {err}",
profile_name
));
selection_message = format!(
"Using ready profile '{}' after quota preflight failed ({})",
best_candidate.name,
format_main_windows_compact(&best_candidate.usage)
);
}
}
}
codex_home = state
.profiles
.get(&best_candidate.name)
.with_context(|| {
format!("profile '{}' is missing", best_candidate.name)
})?
.codex_home
.clone();
selected_profile_name = best_candidate.name.clone();
state.active_profile = Some(best_candidate.name.clone());
state.save(&paths)?;
print_wrapped_stderr(&selection_message);
}
} else if let Some(report) = selected_report {
match &report.result {
Ok(usage) => {
let blocked = collect_blocked_limits(usage, include_code_review);
print_wrapped_stderr(§ion_header("Quota Preflight"));
print_wrapped_stderr(&format!(
"Quota preflight blocked profile '{}': {}",
profile_name,
format_blocked_limits(&blocked)
));
print_wrapped_stderr("No ready profile was found.");
print_wrapped_stderr(&format!(
"Inspect with `prodex quota --profile {}` or bypass with `prodex run --skip-quota-check`.",
profile_name
));
std::process::exit(2);
}
Err(err) => {
print_wrapped_stderr(§ion_header("Quota Preflight"));
print_wrapped_stderr(&format!(
"Warning: quota preflight failed for '{}': {err:#}",
profile_name
));
print_wrapped_stderr("Continuing without quota gate.");
}
}
}
}
} else {
match fetch_usage(&codex_home, request.base_url) {
Ok(usage) => {
let blocked = collect_blocked_limits(&usage, include_code_review);
if !blocked.is_empty() {
let alternatives = find_ready_profiles(
&state,
&profile_name,
request.base_url,
include_code_review,
);
print_wrapped_stderr(§ion_header("Quota Preflight"));
print_wrapped_stderr(&format!(
"Quota preflight blocked profile '{}': {}",
profile_name,
format_blocked_limits(&blocked)
));
if allow_auto_rotate {
if let Some(next_profile) = alternatives.first() {
let next_profile = next_profile.clone();
codex_home = state
.profiles
.get(&next_profile)
.with_context(|| {
format!("profile '{}' is missing", next_profile)
})?
.codex_home
.clone();
selected_profile_name = next_profile.clone();
state.active_profile = Some(next_profile.clone());
state.save(&paths)?;
print_wrapped_stderr(&format!(
"Auto-rotating to profile '{}'.",
next_profile
));
} else {
print_wrapped_stderr("No other ready profile was found.");
print_wrapped_stderr(&format!(
"Inspect with `prodex quota --profile {}` or bypass with `prodex run --skip-quota-check`.",
profile_name
));
std::process::exit(2);
}
} else {
if !alternatives.is_empty() {
print_wrapped_stderr(&format!(
"Other profiles that look ready: {}",
alternatives.join(", ")
));
print_wrapped_stderr(
"Rerun without `--no-auto-rotate` to allow fallback.",
);
}
print_wrapped_stderr(&format!(
"Inspect with `prodex quota --profile {}` or bypass with `prodex run --skip-quota-check`.",
profile_name
));
std::process::exit(2);
}
}
}
Err(err) => {
print_wrapped_stderr(§ion_header("Quota Preflight"));
print_wrapped_stderr(&format!(
"Warning: quota preflight failed for '{}': {err:#}",
profile_name
));
print_wrapped_stderr("Continuing without quota gate.");
}
}
}
}
record_run_selection(&mut state, &selected_profile_name);
state.save(&paths)?;
let managed = state
.profiles
.get(&selected_profile_name)
.with_context(|| format!("profile '{}' is missing", selected_profile_name))?
.managed;
if managed {
prepare_managed_codex_home(&paths, &codex_home)?;
}
let runtime_upstream_base_url = quota_base_url(request.base_url);
let runtime_proxy = if request.force_runtime_proxy
|| should_enable_runtime_rotation_proxy(&state, &selected_profile_name, allow_auto_rotate)
{
Some(ensure_runtime_rotation_proxy_endpoint(
&paths,
&selected_profile_name,
runtime_upstream_base_url.as_str(),
include_code_review,
)?)
} else {
None
};
Ok(PreparedRuntimeLaunch {
paths,
codex_home,
managed,
runtime_proxy,
})
}
fn handle_runtime_broker(args: RuntimeBrokerArgs) -> Result<()> {
let paths = AppPaths::discover()?;
let state = AppState::load(&paths)?;
let proxy = start_runtime_rotation_proxy_with_listen_addr(
&paths,
&state,
&args.current_profile,
args.upstream_base_url.clone(),
args.include_code_review,
args.listen_addr.as_deref(),
)?;
if proxy.owner_lock.is_none() {
return Ok(());
}
let metadata = RuntimeBrokerMetadata {
broker_key: runtime_broker_key(&args.upstream_base_url, args.include_code_review),
listen_addr: proxy.listen_addr.to_string(),
started_at: Local::now().timestamp(),
current_profile: args.current_profile.clone(),
include_code_review: args.include_code_review,
instance_token: args.instance_token.clone(),
admin_token: args.admin_token.clone(),
};
register_runtime_broker_metadata(&proxy.log_path, metadata.clone());
let registry = RuntimeBrokerRegistry {
pid: std::process::id(),
listen_addr: proxy.listen_addr.to_string(),
started_at: metadata.started_at,
upstream_base_url: args.upstream_base_url.clone(),
include_code_review: args.include_code_review,
current_profile: args.current_profile.clone(),
instance_token: args.instance_token.clone(),
admin_token: args.admin_token.clone(),
openai_mount_path: Some(RUNTIME_PROXY_OPENAI_MOUNT_PATH.to_string()),
};
save_runtime_broker_registry(&paths, &args.broker_key, ®istry)?;
runtime_proxy_log_to_path(
&proxy.log_path,
&format!(
"runtime_broker_started listen_addr={} broker_key={} current_profile={} include_code_review={}",
proxy.listen_addr, args.broker_key, args.current_profile, args.include_code_review
),
);
audit_log_event_best_effort(
"runtime_broker",
"start",
"success",
serde_json::json!({
"broker_key": args.broker_key,
"listen_addr": proxy.listen_addr.to_string(),
"current_profile": args.current_profile,
"include_code_review": args.include_code_review,
"upstream_base_url": args.upstream_base_url,
}),
);
let startup_grace_until = metadata
.started_at
.saturating_add(runtime_broker_startup_grace_seconds());
let poll_interval = Duration::from_millis(RUNTIME_BROKER_POLL_INTERVAL_MS);
let lease_scan_interval = Duration::from_millis(
RUNTIME_BROKER_LEASE_SCAN_INTERVAL_MS.max(RUNTIME_BROKER_POLL_INTERVAL_MS),
);
let mut idle_started_at = None::<i64>;
let mut cached_live_leases = 0usize;
let mut last_lease_scan_at = Instant::now() - lease_scan_interval;
loop {
let active_requests = proxy.active_request_count.load(Ordering::SeqCst);
if active_requests == 0 && last_lease_scan_at.elapsed() >= lease_scan_interval {
cached_live_leases = cleanup_runtime_broker_stale_leases(&paths, &args.broker_key);
last_lease_scan_at = Instant::now();
}
if cached_live_leases > 0 || active_requests > 0 {
idle_started_at = None;
} else {
let now = Local::now().timestamp();
if now < startup_grace_until {
idle_started_at = None;
thread::sleep(poll_interval);
continue;
}
let idle_since = idle_started_at.get_or_insert(now);
if now.saturating_sub(*idle_since) >= RUNTIME_BROKER_IDLE_GRACE_SECONDS {
runtime_proxy_log_to_path(
&proxy.log_path,
&format!(
"runtime_broker_idle_shutdown broker_key={} idle_seconds={}",
args.broker_key,
now.saturating_sub(*idle_since)
),
);
break;
}
}
thread::sleep(poll_interval);
}
drop(proxy);
remove_runtime_broker_registry_if_token_matches(&paths, &args.broker_key, &args.instance_token);
Ok(())
}
fn normalize_run_codex_args(codex_args: &[OsString]) -> Vec<OsString> {
let Some(first) = codex_args.first().and_then(|arg| arg.to_str()) else {
return codex_args.to_vec();
};
if !looks_like_codex_session_id(first) {
return codex_args.to_vec();
}
let mut normalized = Vec::with_capacity(codex_args.len() + 1);
normalized.push(OsString::from("resume"));
normalized.extend(codex_args.iter().cloned());
normalized
}
fn looks_like_codex_session_id(value: &str) -> bool {
let parts = value.split('-').collect::<Vec<_>>();
if parts.len() != 5 {
return false;
}
let expected_lengths = [8usize, 4, 4, 4, 12];
parts.iter().zip(expected_lengths).all(|(part, expected)| {
part.len() == expected && part.chars().all(|ch| ch.is_ascii_hexdigit())
})
}
fn record_run_selection(state: &mut AppState, profile_name: &str) {
state
.last_run_selected_at
.retain(|name, _| state.profiles.contains_key(name));
state
.last_run_selected_at
.insert(profile_name.to_string(), Local::now().timestamp());
}
fn resolve_profile_name(state: &AppState, requested: Option<&str>) -> Result<String> {
if let Some(name) = requested {
if state.profiles.contains_key(name) {
return Ok(name.to_string());
}
bail!("profile '{}' does not exist", name);
}
if let Some(active) = state.active_profile.as_deref() {
if state.profiles.contains_key(active) {
return Ok(active.to_string());
}
bail!("active profile '{}' no longer exists", active);
}
if state.profiles.len() == 1 {
let (name, _) = state
.profiles
.iter()
.next()
.context("single profile lookup failed unexpectedly")?;
return Ok(name.clone());
}
bail!("no active profile selected; use `prodex use --profile <name>` or pass --profile")
}
fn ensure_path_is_unique(state: &AppState, candidate: &Path) -> Result<()> {
for (name, profile) in &state.profiles {
if same_path(&profile.codex_home, candidate) {
bail!(
"path {} is already used by profile '{}'",
candidate.display(),
name
);
}
}
Ok(())
}
fn validate_profile_name(name: &str) -> Result<()> {
if name.is_empty() {
bail!("profile name cannot be empty");
}
if name.contains(std::path::MAIN_SEPARATOR) {
bail!("profile name cannot contain path separators");
}
if name == "." || name == ".." {
bail!("profile name cannot be '.' or '..'");
}
if !name
.chars()
.all(|ch| ch.is_ascii_alphanumeric() || matches!(ch, '-' | '_' | '.'))
{
bail!("profile name may only contain letters, numbers, '.', '_' or '-'");
}
Ok(())
}
fn should_enable_runtime_rotation_proxy(
state: &AppState,
selected_profile_name: &str,
allow_auto_rotate: bool,
) -> bool {
if !allow_auto_rotate || state.profiles.len() <= 1 {
return false;
}
let Some(selected_profile) = state.profiles.get(selected_profile_name) else {
return false;
};
if !selected_profile.codex_home.exists() {
return false;
}
state
.profiles
.values()
.filter(|profile| read_auth_summary(&profile.codex_home).quota_compatible)
.next()
.is_some()
}
fn runtime_proxy_codex_args(
listen_addr: std::net::SocketAddr,
user_args: &[OsString],
) -> Vec<OsString> {
runtime_proxy_codex_args_with_mount_path(
listen_addr,
RUNTIME_PROXY_OPENAI_MOUNT_PATH,
user_args,
)
}
fn runtime_proxy_codex_args_with_mount_path(
listen_addr: std::net::SocketAddr,
openai_mount_path: &str,
user_args: &[OsString],
) -> Vec<OsString> {
let proxy_chatgpt_base = format!("http://{listen_addr}/backend-api");
let proxy_openai_base = format!("http://{listen_addr}{openai_mount_path}");
let overrides = [
format!(
"chatgpt_base_url={}",
toml_string_literal(&proxy_chatgpt_base)
),
format!(
"openai_base_url={}",
toml_string_literal(&proxy_openai_base),
),
];
let mut args = Vec::with_capacity((overrides.len() * 2) + user_args.len());
for override_entry in overrides {
args.push(OsString::from("-c"));
args.push(OsString::from(override_entry));
}
args.extend(user_args.iter().cloned());
args
}
#[cfg(test)]
fn start_runtime_rotation_proxy(
paths: &AppPaths,
state: &AppState,
current_profile: &str,
upstream_base_url: String,
include_code_review: bool,
) -> Result<RuntimeRotationProxy> {
start_runtime_rotation_proxy_with_listen_addr(
paths,
state,
current_profile,
upstream_base_url,
include_code_review,
None,
)
}
fn start_runtime_rotation_proxy_with_listen_addr(
paths: &AppPaths,
state: &AppState,
current_profile: &str,
upstream_base_url: String,
include_code_review: bool,
preferred_listen_addr: Option<&str>,
) -> Result<RuntimeRotationProxy> {
let log_path = initialize_runtime_proxy_log_path();
let (server, listen_addr) = match preferred_listen_addr {
Some(preferred) => match TinyServer::http(preferred) {
Ok(server) => {
let server = Arc::new(server);
let listen_addr = server.server_addr().to_ip().with_context(|| {
format!(
"runtime auto-rotate proxy did not expose a TCP listen address after binding {preferred}"
)
})?;
(server, listen_addr)
}
Err(err) => {
runtime_proxy_log_to_path(
&log_path,
&format!(
"runtime proxy preferred_listen_addr_unavailable requested={preferred} error={err}"
),
);
let server = Arc::new(TinyServer::http("127.0.0.1:0").map_err(|fallback_err| {
anyhow::anyhow!(
"failed to bind runtime auto-rotate proxy on {preferred}: {err}; fallback bind also failed: {fallback_err}"
)
})?);
let listen_addr = server.server_addr().to_ip().context(
"runtime auto-rotate proxy did not expose a TCP listen address after fallback bind",
)?;
(server, listen_addr)
}
},
None => {
let server = Arc::new(TinyServer::http("127.0.0.1:0").map_err(|err| {
anyhow::anyhow!("failed to bind runtime auto-rotate proxy: {err}")
})?);
let listen_addr = server
.server_addr()
.to_ip()
.context("runtime auto-rotate proxy did not expose a TCP listen address")?;
(server, listen_addr)
}
};
let owner_lock = try_acquire_runtime_owner_lock(paths)?;
let persistence_enabled = owner_lock.is_some();
let async_worker_count = runtime_proxy_async_worker_count();
let async_runtime = Arc::new(
TokioRuntimeBuilder::new_multi_thread()
.worker_threads(async_worker_count)
.enable_all()
.build()
.context("failed to build runtime auto-rotate async runtime")?,
);
let worker_count = runtime_proxy_worker_count();
let long_lived_worker_count = runtime_proxy_long_lived_worker_count();
let long_lived_queue_capacity =
runtime_proxy_long_lived_queue_capacity(long_lived_worker_count);
let active_request_limit =
runtime_proxy_active_request_limit(worker_count, long_lived_worker_count);
let lane_admission = RuntimeProxyLaneAdmission::new(runtime_proxy_lane_limits(
active_request_limit,
worker_count,
long_lived_worker_count,
));
let persisted_state = AppState::load_with_recovery(paths).unwrap_or(RecoveredLoad {
value: state.clone(),
recovered_from_backup: false,
});
let mut restored_state = merge_runtime_state_snapshot(state.clone(), &persisted_state.value);
let persisted_continuations =
load_runtime_continuations_with_recovery(paths, &restored_state.profiles).unwrap_or(
RecoveredLoad {
value: RuntimeContinuationStore::default(),
recovered_from_backup: false,
},
);
let continuation_journal =
load_runtime_continuation_journal_with_recovery(paths, &restored_state.profiles).unwrap_or(
RecoveredLoad {
value: RuntimeContinuationJournal::default(),
recovered_from_backup: false,
},
);
let fallback_continuations = runtime_continuation_store_from_app_state(&restored_state);
let restored_continuations = merge_runtime_continuation_store(
&merge_runtime_continuation_store(
&fallback_continuations,
&persisted_continuations.value,
&restored_state.profiles,
),
&continuation_journal.value.continuations,
&restored_state.profiles,
);
let continuation_sidecar_present = runtime_continuations_file_path(paths).exists()
|| runtime_continuations_last_good_file_path(paths).exists();
let continuation_migration_needed = !continuation_sidecar_present
&& (restored_continuations != RuntimeContinuationStore::default());
let restored_session_id_bindings = merge_profile_bindings(
&restored_continuations.session_profile_bindings,
&runtime_external_session_id_bindings(&restored_continuations.session_id_bindings),
&restored_state.profiles,
);
let restored_runtime_session_id_bindings = merge_profile_bindings(
&restored_continuations.session_id_bindings,
&restored_continuations.session_profile_bindings,
&restored_state.profiles,
);
restored_state.response_profile_bindings =
restored_continuations.response_profile_bindings.clone();
restored_state.session_profile_bindings = restored_session_id_bindings.clone();
let persisted_profile_scores =
load_runtime_profile_scores_with_recovery(paths, &restored_state.profiles).unwrap_or(
RecoveredLoad {
value: BTreeMap::new(),
recovered_from_backup: false,
},
);
let persisted_usage_snapshots =
load_runtime_usage_snapshots_with_recovery(paths, &restored_state.profiles).unwrap_or(
RecoveredLoad {
value: BTreeMap::new(),
recovered_from_backup: false,
},
);
let mut persisted_backoffs =
load_runtime_profile_backoffs_with_recovery(paths, &restored_state.profiles).unwrap_or(
RecoveredLoad {
value: RuntimeProfileBackoffs::default(),
recovered_from_backup: false,
},
);
let startup_now = Local::now().timestamp();
let persisted_backoffs_softened = runtime_soften_persisted_backoffs_for_startup(
&mut persisted_backoffs.value,
&persisted_profile_scores.value,
startup_now,
);
let persisted_profile_scores_count = persisted_profile_scores.value.len();
let persisted_usage_snapshots_count = persisted_usage_snapshots.value.len();
let persisted_response_binding_count = restored_continuations.response_profile_bindings.len();
let persisted_session_binding_count = restored_continuations.session_profile_bindings.len();
let persisted_turn_state_binding_count = restored_continuations.turn_state_bindings.len();
let persisted_session_id_binding_count = restored_runtime_session_id_bindings.len();
let persisted_retry_backoffs_count = persisted_backoffs.value.retry_backoff_until.len();
let persisted_transport_backoffs_count = persisted_backoffs.value.transport_backoff_until.len();
let persisted_route_circuit_count = persisted_backoffs.value.route_circuit_open_until.len();
let expired_usage_snapshot_count = persisted_usage_snapshots
.value
.values()
.filter(|snapshot| !runtime_usage_snapshot_is_usable(snapshot, startup_now))
.count();
let restored_global_scores_count = persisted_profile_scores
.value
.keys()
.filter(|key| !key.starts_with("__route_"))
.count();
let restored_route_scores_count = persisted_profile_scores
.value
.keys()
.filter(|key| key.starts_with("__route_health__"))
.count();
let restored_bad_pairing_count = persisted_profile_scores
.value
.keys()
.filter(|key| key.starts_with("__route_bad_pairing__"))
.count();
let restored_success_streak_count = persisted_profile_scores
.value
.keys()
.filter(|key| key.starts_with("__route_success__"))
.count();
let shared = RuntimeRotationProxyShared {
async_client: reqwest::Client::builder()
.connect_timeout(Duration::from_millis(
runtime_proxy_http_connect_timeout_ms(),
))
.read_timeout(Duration::from_millis(runtime_proxy_stream_idle_timeout_ms()))
.build()
.context("failed to build runtime auto-rotate async HTTP client")?,
async_runtime,
log_path: log_path.clone(),
request_sequence: Arc::new(AtomicU64::new(1)),
state_save_revision: Arc::new(AtomicU64::new(0)),
local_overload_backoff_until: Arc::new(AtomicU64::new(0)),
active_request_count: Arc::new(AtomicUsize::new(0)),
active_request_limit,
lane_admission: lane_admission.clone(),
runtime: Arc::new(Mutex::new(RuntimeRotationState {
paths: paths.clone(),
state: restored_state.clone(),
upstream_base_url: upstream_base_url.clone(),
include_code_review,
current_profile: current_profile.to_string(),
profile_usage_auth: load_runtime_profile_usage_auth_cache(&restored_state),
turn_state_bindings: restored_continuations.turn_state_bindings.clone(),
session_id_bindings: restored_runtime_session_id_bindings,
continuation_statuses: restored_continuations.statuses.clone(),
profile_probe_cache: BTreeMap::new(),
profile_usage_snapshots: persisted_usage_snapshots.value,
profile_retry_backoff_until: persisted_backoffs.value.retry_backoff_until,
profile_transport_backoff_until: persisted_backoffs.value.transport_backoff_until,
profile_route_circuit_open_until: persisted_backoffs.value.route_circuit_open_until,
profile_inflight: BTreeMap::new(),
profile_health: persisted_profile_scores.value,
})),
};
register_runtime_proxy_persistence_mode(&log_path, persistence_enabled);
runtime_proxy_log_to_path(
&log_path,
&format!(
"runtime proxy started listen_addr={listen_addr} current_profile={current_profile} include_code_review={include_code_review} upstream_base_url={upstream_base_url} persistence_mode={}",
if persistence_enabled {
"owner"
} else {
"follower"
}
),
);
runtime_proxy_log_to_path(
&log_path,
&format!(
"runtime_proxy_restore_counts persisted_scores={} persisted_usage_snapshots={} expired_usage_snapshots={} response_bindings={} session_bindings={} turn_state_bindings={} session_id_bindings={} retry_backoffs={} transport_backoffs={} route_circuits={} global_scores={} route_scores={} bad_pairing_scores={} success_streak_scores={} recovered_state={} recovered_continuations={} recovered_scores={} recovered_usage_snapshots={} recovered_backoffs={} recovered_continuation_journal={}",
persisted_profile_scores_count,
persisted_usage_snapshots_count,
expired_usage_snapshot_count,
persisted_response_binding_count,
persisted_session_binding_count,
persisted_turn_state_binding_count,
persisted_session_id_binding_count,
persisted_retry_backoffs_count,
persisted_transport_backoffs_count,
persisted_route_circuit_count,
restored_global_scores_count,
restored_route_scores_count,
restored_bad_pairing_count,
restored_success_streak_count,
persisted_state.recovered_from_backup,
persisted_continuations.recovered_from_backup,
persisted_profile_scores.recovered_from_backup,
persisted_usage_snapshots.recovered_from_backup,
persisted_backoffs.recovered_from_backup,
continuation_journal.recovered_from_backup,
),
);
audit_runtime_proxy_startup_state(&shared);
schedule_runtime_startup_probe_warmup(&shared);
if persisted_backoffs_softened && let Ok(runtime) = shared.runtime.lock() {
schedule_runtime_state_save_from_runtime(&shared, &runtime, "startup_backoff_soften");
}
if continuation_migration_needed && let Ok(runtime) = shared.runtime.lock() {
schedule_runtime_state_save_from_runtime(
&shared,
&runtime,
"startup_continuation_migration",
);
}
let shutdown = Arc::new(AtomicBool::new(false));
let mut worker_threads = Vec::new();
let (long_lived_sender, long_lived_receiver) =
mpsc::sync_channel::<tiny_http::Request>(long_lived_queue_capacity);
let long_lived_receiver = Arc::new(Mutex::new(long_lived_receiver));
runtime_proxy_log_to_path(
&log_path,
&format!(
"runtime proxy worker_count={worker_count} async_worker_count={async_worker_count} long_lived_worker_count={long_lived_worker_count} long_lived_queue_capacity={long_lived_queue_capacity} active_request_limit={active_request_limit} lane_limits=responses:{} compact:{} websocket:{} standard:{}",
lane_admission.limits.responses,
lane_admission.limits.compact,
lane_admission.limits.websocket,
lane_admission.limits.standard
),
);
for _ in 0..long_lived_worker_count {
let shutdown = Arc::clone(&shutdown);
let shared = shared.clone();
let receiver = Arc::clone(&long_lived_receiver);
worker_threads.push(thread::spawn(move || {
loop {
let request = {
let guard = receiver.lock();
let Ok(receiver) = guard else {
break;
};
receiver.recv()
};
match request {
Ok(request) => {
let (mutex, condvar) = &*shared.lane_admission.wait;
let _guard = mutex
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
condvar.notify_all();
drop(_guard);
handle_runtime_rotation_proxy_request(request, &shared);
}
Err(_) => break,
}
if shutdown.load(Ordering::SeqCst) {
break;
}
}
}));
}
for _ in 0..worker_count {
let server: Arc<TinyServer> = Arc::clone(&server);
let shutdown = Arc::clone(&shutdown);
let shared = shared.clone();
let long_lived_sender = long_lived_sender.clone();
worker_threads.push(thread::spawn(move || {
loop {
match server.recv() {
Ok(request) => {
let websocket = is_tiny_http_websocket_upgrade(&request);
let long_lived =
runtime_proxy_request_is_long_lived(request.url(), websocket);
if long_lived {
match enqueue_runtime_proxy_long_lived_request_with_wait(
&long_lived_sender,
request,
&shared,
) {
Ok(()) => {}
Err((RuntimeProxyQueueRejection::Full, request)) => {
mark_runtime_proxy_local_overload(
&shared,
"long_lived_queue_full",
);
reject_runtime_proxy_overloaded_request(
request,
&shared,
"long_lived_queue_full",
);
}
Err((RuntimeProxyQueueRejection::Disconnected, request)) => {
mark_runtime_proxy_local_overload(
&shared,
"long_lived_queue_disconnected",
);
reject_runtime_proxy_overloaded_request(
request,
&shared,
"long_lived_queue_disconnected",
);
}
}
} else {
handle_runtime_rotation_proxy_request(request, &shared);
}
}
Err(_) if shutdown.load(Ordering::SeqCst) => break,
Err(_) => {}
}
if shutdown.load(Ordering::SeqCst) {
break;
}
}
}));
}
Ok(RuntimeRotationProxy {
server,
shutdown,
worker_threads,
accept_worker_count: worker_count,
listen_addr,
log_path,
active_request_count: Arc::clone(&shared.active_request_count),
owner_lock,
})
}
impl Drop for RuntimeRotationProxy {
fn drop(&mut self) {
unregister_runtime_proxy_persistence_mode(&self.log_path);
unregister_runtime_broker_metadata(&self.log_path);
self.shutdown.store(true, Ordering::SeqCst);
for _ in 0..self.accept_worker_count {
self.server.unblock();
}
while let Some(worker) = self.worker_threads.pop() {
let _ = worker.join();
}
let _ = self.owner_lock.take();
}
}
fn reject_runtime_proxy_overloaded_request(
request: tiny_http::Request,
shared: &RuntimeRotationProxyShared,
reason: &str,
) {
let path = request.url().to_string();
let websocket = is_tiny_http_websocket_upgrade(&request);
runtime_proxy_log(
shared,
format!(
"runtime_proxy_queue_overloaded transport={} path={} reason={reason}",
if websocket { "websocket" } else { "http" },
path
),
);
let response = if websocket {
build_runtime_proxy_text_response(
503,
"Runtime auto-rotate proxy is temporarily saturated. Retry the request.",
)
} else if is_runtime_anthropic_messages_path(&path) {
build_runtime_proxy_response_from_parts(build_runtime_anthropic_error_parts(
503,
runtime_anthropic_error_type_for_status(503),
"Runtime auto-rotate proxy is temporarily saturated. Retry the request.",
))
} else if is_runtime_responses_path(&path) || is_runtime_compact_path(&path) {
build_runtime_proxy_json_error_response(
503,
"service_unavailable",
"Runtime auto-rotate proxy is temporarily saturated. Retry the request.",
)
} else {
build_runtime_proxy_text_response(
503,
"Runtime auto-rotate proxy is temporarily saturated. Retry the request.",
)
};
let _ = request.respond(response);
}
fn runtime_proxy_admin_token(request: &tiny_http::Request) -> Option<String> {
request
.headers()
.iter()
.find(|header| header.field.equiv("X-Prodex-Admin-Token"))
.map(|header| header.value.as_str().trim().to_string())
.filter(|value| !value.is_empty())
}
fn build_runtime_proxy_json_response(status: u16, body: String) -> tiny_http::ResponseBox {
let mut response = TinyResponse::from_string(body).with_status_code(status);
if let Ok(header) = TinyHeader::from_bytes("Content-Type", "application/json") {
response = response.with_header(header);
}
response.boxed()
}
fn build_runtime_proxy_string_response(
status: u16,
body: String,
content_type: &str,
) -> tiny_http::ResponseBox {
let mut response = TinyResponse::from_string(body).with_status_code(status);
if let Ok(header) = TinyHeader::from_bytes("Content-Type", content_type) {
response = response.with_header(header);
}
response.boxed()
}
fn build_runtime_proxy_prometheus_response(status: u16, body: String) -> tiny_http::ResponseBox {
build_runtime_proxy_string_response(status, body, "text/plain; version=0.0.4; charset=utf-8")
}
fn update_runtime_broker_current_profile(log_path: &Path, current_profile: &str) {
let mut metadata_by_path = runtime_broker_metadata_by_log_path()
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
if let Some(metadata) = metadata_by_path.get_mut(log_path) {
metadata.current_profile = current_profile.to_string();
}
}
fn runtime_broker_continuation_metrics(
statuses: &RuntimeContinuationStatuses,
) -> RuntimeBrokerContinuationMetrics {
let mut metrics = RuntimeBrokerContinuationMetrics {
response_bindings: statuses.response.len(),
turn_state_bindings: statuses.turn_state.len(),
session_id_bindings: statuses.session_id.len(),
warm: 0,
verified: 0,
suspect: 0,
dead: 0,
};
for status in statuses
.response
.values()
.chain(statuses.turn_state.values())
.chain(statuses.session_id.values())
{
match status.state {
RuntimeContinuationBindingLifecycle::Warm => metrics.warm += 1,
RuntimeContinuationBindingLifecycle::Verified => metrics.verified += 1,
RuntimeContinuationBindingLifecycle::Suspect => metrics.suspect += 1,
RuntimeContinuationBindingLifecycle::Dead => metrics.dead += 1,
}
}
metrics
}
fn runtime_broker_prometheus_snapshot(
metadata: &RuntimeBrokerMetadata,
metrics: &RuntimeBrokerMetrics,
) -> runtime_metrics::RuntimeBrokerSnapshot {
let profile_inflight = metrics
.profile_inflight
.iter()
.map(|(profile, count)| (profile.clone(), *count as u64))
.collect();
runtime_metrics::RuntimeBrokerSnapshot {
broker_key: metadata.broker_key.clone(),
listen_addr: metadata.listen_addr.clone(),
pid: metrics.health.pid,
started_at_unix_seconds: metrics.health.started_at,
current_profile: metrics.health.current_profile.clone(),
include_code_review: metrics.health.include_code_review,
persistence_role: metrics.health.persistence_role.clone(),
active_requests: metrics.health.active_requests as u64,
active_request_limit: metrics.active_request_limit as u64,
local_overload_backoff_remaining_seconds: metrics.local_overload_backoff_remaining_seconds,
traffic: runtime_metrics::RuntimeBrokerTrafficMetrics {
responses: runtime_metrics::RuntimeBrokerLaneMetrics {
active: metrics.traffic.responses.active as u64,
limit: metrics.traffic.responses.limit as u64,
},
compact: runtime_metrics::RuntimeBrokerLaneMetrics {
active: metrics.traffic.compact.active as u64,
limit: metrics.traffic.compact.limit as u64,
},
websocket: runtime_metrics::RuntimeBrokerLaneMetrics {
active: metrics.traffic.websocket.active as u64,
limit: metrics.traffic.websocket.limit as u64,
},
standard: runtime_metrics::RuntimeBrokerLaneMetrics {
active: metrics.traffic.standard.active as u64,
limit: metrics.traffic.standard.limit as u64,
},
},
profile_inflight,
retry_backoffs: metrics.retry_backoffs as u64,
transport_backoffs: metrics.transport_backoffs as u64,
route_circuits: metrics.route_circuits as u64,
degraded_profiles: metrics.degraded_profiles as u64,
degraded_routes: metrics.degraded_routes as u64,
continuations: runtime_metrics::RuntimeBrokerContinuationMetrics {
response_bindings: metrics.continuations.response_bindings as u64,
turn_state_bindings: metrics.continuations.turn_state_bindings as u64,
session_id_bindings: metrics.continuations.session_id_bindings as u64,
warm: metrics.continuations.warm as u64,
verified: metrics.continuations.verified as u64,
suspect: metrics.continuations.suspect as u64,
dead: metrics.continuations.dead as u64,
},
}
}
fn runtime_broker_metrics_snapshot(
shared: &RuntimeRotationProxyShared,
metadata: &RuntimeBrokerMetadata,
) -> Result<RuntimeBrokerMetrics> {
let now = Local::now().timestamp();
let now_u64 = now.max(0) as u64;
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let health = RuntimeBrokerHealth {
pid: std::process::id(),
started_at: metadata.started_at,
current_profile: metadata.current_profile.clone(),
include_code_review: metadata.include_code_review,
active_requests: shared.active_request_count.load(Ordering::SeqCst),
instance_token: metadata.instance_token.clone(),
persistence_role: if runtime_proxy_persistence_enabled(shared) {
"owner".to_string()
} else {
"follower".to_string()
},
};
let degraded_profiles = runtime
.profile_health
.iter()
.filter(|(key, entry)| {
!key.starts_with("__") && runtime_profile_effective_health_score(entry, now) > 0
})
.count();
let degraded_routes = runtime
.profile_health
.iter()
.filter(|(key, entry)| {
key.starts_with("__route_health__:")
&& runtime_profile_effective_health_score(entry, now) > 0
})
.count();
Ok(RuntimeBrokerMetrics {
health,
active_request_limit: shared.active_request_limit,
local_overload_backoff_remaining_seconds: shared
.local_overload_backoff_until
.load(Ordering::SeqCst)
.saturating_sub(now_u64),
traffic: RuntimeBrokerTrafficMetrics {
responses: RuntimeBrokerLaneMetrics {
active: shared
.lane_admission
.responses_active
.load(Ordering::SeqCst),
limit: shared.lane_admission.limits.responses,
},
compact: RuntimeBrokerLaneMetrics {
active: shared.lane_admission.compact_active.load(Ordering::SeqCst),
limit: shared.lane_admission.limits.compact,
},
websocket: RuntimeBrokerLaneMetrics {
active: shared
.lane_admission
.websocket_active
.load(Ordering::SeqCst),
limit: shared.lane_admission.limits.websocket,
},
standard: RuntimeBrokerLaneMetrics {
active: shared.lane_admission.standard_active.load(Ordering::SeqCst),
limit: shared.lane_admission.limits.standard,
},
},
profile_inflight: runtime.profile_inflight.clone(),
retry_backoffs: runtime
.profile_retry_backoff_until
.values()
.filter(|until| **until > now)
.count(),
transport_backoffs: runtime
.profile_transport_backoff_until
.values()
.filter(|until| **until > now)
.count(),
route_circuits: runtime
.profile_route_circuit_open_until
.values()
.filter(|until| **until > now)
.count(),
degraded_profiles,
degraded_routes,
continuations: runtime_broker_continuation_metrics(&runtime.continuation_statuses),
})
}
fn handle_runtime_proxy_admin_request(
request: &mut tiny_http::Request,
shared: &RuntimeRotationProxyShared,
) -> Option<tiny_http::ResponseBox> {
let path = path_without_query(request.url());
if path != "/__prodex/runtime/health"
&& path != "/__prodex/runtime/metrics"
&& path != "/__prodex/runtime/metrics/prometheus"
&& path != "/__prodex/runtime/activate"
{
return None;
}
let Some(metadata) = runtime_broker_metadata_for_log_path(&shared.log_path) else {
return Some(build_runtime_proxy_json_error_response(
404,
"not_found",
"runtime broker admin endpoint is not enabled for this proxy",
));
};
if runtime_proxy_admin_token(request).as_deref() != Some(metadata.admin_token.as_str()) {
return Some(build_runtime_proxy_json_error_response(
403,
"forbidden",
"missing or invalid runtime broker admin token",
));
}
if path == "/__prodex/runtime/health" {
let health = RuntimeBrokerHealth {
pid: std::process::id(),
started_at: metadata.started_at,
current_profile: metadata.current_profile,
include_code_review: metadata.include_code_review,
active_requests: shared.active_request_count.load(Ordering::SeqCst),
instance_token: metadata.instance_token,
persistence_role: if runtime_proxy_persistence_enabled(shared) {
"owner".to_string()
} else {
"follower".to_string()
},
};
let body = serde_json::to_string(&health).ok()?;
return Some(build_runtime_proxy_json_response(200, body));
}
if path == "/__prodex/runtime/metrics" {
let metrics = match runtime_broker_metrics_snapshot(shared, &metadata) {
Ok(metrics) => metrics,
Err(err) => {
return Some(build_runtime_proxy_json_error_response(
500,
"internal_error",
&err.to_string(),
));
}
};
let body = serde_json::to_string(&metrics).ok()?;
return Some(build_runtime_proxy_json_response(200, body));
}
if path == "/__prodex/runtime/metrics/prometheus" {
let metrics = match runtime_broker_metrics_snapshot(shared, &metadata) {
Ok(metrics) => metrics,
Err(err) => {
return Some(build_runtime_proxy_json_error_response(
500,
"internal_error",
&err.to_string(),
));
}
};
let snapshot = runtime_broker_prometheus_snapshot(&metadata, &metrics);
let body = runtime_metrics::render_runtime_broker_prometheus(&snapshot);
return Some(build_runtime_proxy_prometheus_response(200, body));
}
if request.method().as_str() != "POST" {
return Some(build_runtime_proxy_json_error_response(
405,
"method_not_allowed",
"runtime broker activation requires POST",
));
}
let mut body = Vec::new();
if let Err(err) = request.as_reader().read_to_end(&mut body) {
return Some(build_runtime_proxy_json_error_response(
400,
"invalid_request",
&format!("failed to read runtime broker activation body: {err}"),
));
}
let current_profile = match serde_json::from_slice::<serde_json::Value>(&body)
.ok()
.and_then(|value| {
value
.get("current_profile")
.and_then(|value| value.as_str())
.map(str::trim)
.map(str::to_string)
})
.filter(|value| !value.is_empty())
{
Some(current_profile) => current_profile,
None => {
return Some(build_runtime_proxy_json_error_response(
400,
"invalid_request",
"runtime broker activation requires a non-empty current_profile",
));
}
};
let update_result = (|| -> Result<()> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
runtime.current_profile = current_profile.clone();
runtime.state.active_profile = Some(current_profile.clone());
Ok(())
})();
if let Err(err) = update_result {
return Some(build_runtime_proxy_json_error_response(
500,
"internal_error",
&err.to_string(),
));
}
update_runtime_broker_current_profile(&shared.log_path, ¤t_profile);
runtime_proxy_log(
shared,
format!("runtime_broker_activate current_profile={current_profile}"),
);
audit_log_event_best_effort(
"runtime_broker",
"activate_profile",
"success",
serde_json::json!({
"broker_key": metadata.broker_key,
"listen_addr": metadata.listen_addr,
"current_profile": current_profile,
}),
);
Some(build_runtime_proxy_json_response(
200,
serde_json::json!({
"ok": true,
"current_profile": current_profile,
})
.to_string(),
))
}
fn handle_runtime_proxy_anthropic_compat_request(
request: &tiny_http::Request,
) -> Option<tiny_http::ResponseBox> {
let path = path_without_query(request.url());
let method = request.method().as_str();
if !method.eq_ignore_ascii_case("GET") && !method.eq_ignore_ascii_case("HEAD") {
return None;
}
match path {
"/" => Some(build_runtime_proxy_json_response(
200,
serde_json::json!({
"service": "prodex",
"status": "ok",
"version": env!("CARGO_PKG_VERSION"),
})
.to_string(),
)),
RUNTIME_PROXY_ANTHROPIC_HEALTH_PATH => Some(build_runtime_proxy_json_response(
200,
serde_json::json!({
"status": "ok",
})
.to_string(),
)),
RUNTIME_PROXY_ANTHROPIC_MODELS_PATH => Some(build_runtime_proxy_json_response(
200,
runtime_proxy_anthropic_models_list().to_string(),
)),
_ => runtime_proxy_anthropic_model_id_from_path(path).map(|model_id| {
build_runtime_proxy_json_response(
200,
runtime_proxy_anthropic_model_descriptor(model_id).to_string(),
)
}),
}
}
fn runtime_proxy_anthropic_models_list() -> serde_json::Value {
let data = runtime_proxy_responses_model_descriptors()
.iter()
.map(|descriptor| runtime_proxy_anthropic_model_descriptor(descriptor.id))
.collect::<Vec<_>>();
let first_id = data
.first()
.and_then(|model| model.get("id"))
.and_then(serde_json::Value::as_str)
.unwrap_or_default();
let last_id = data
.last()
.and_then(|model| model.get("id"))
.and_then(serde_json::Value::as_str)
.unwrap_or_default();
serde_json::json!({
"data": data,
"first_id": first_id,
"has_more": false,
"last_id": last_id,
})
}
fn runtime_proxy_anthropic_model_descriptor(model_id: &str) -> serde_json::Value {
let supported_effort_levels = runtime_proxy_responses_model_supported_effort_levels(model_id);
serde_json::json!({
"type": "model",
"id": model_id,
"display_name": runtime_proxy_anthropic_model_display_name(model_id),
"created_at": RUNTIME_PROXY_ANTHROPIC_MODEL_CREATED_AT,
"supportsEffort": true,
"supportedEffortLevels": supported_effort_levels,
})
}
fn runtime_proxy_responses_model_descriptors() -> &'static [RuntimeProxyResponsesModelDescriptor] {
&[
RuntimeProxyResponsesModelDescriptor {
id: "gpt-5.4",
display_name: "GPT-5.4",
description: "Latest frontier agentic coding model.",
claude_alias: Some(RuntimeProxyClaudeModelAlias::Opus),
claude_picker_model: Some("claude-opus-4-6"),
supports_xhigh: true,
},
RuntimeProxyResponsesModelDescriptor {
id: "gpt-5.4-mini",
display_name: "GPT-5.4 Mini",
description: "Smaller frontier agentic coding model.",
claude_alias: Some(RuntimeProxyClaudeModelAlias::Haiku),
claude_picker_model: Some("claude-haiku-4-5"),
supports_xhigh: true,
},
RuntimeProxyResponsesModelDescriptor {
id: "gpt-5.3-codex",
display_name: "GPT-5.3 Codex",
description: "Frontier Codex-optimized agentic coding model.",
claude_alias: Some(RuntimeProxyClaudeModelAlias::Sonnet),
claude_picker_model: Some("claude-sonnet-4-6"),
supports_xhigh: true,
},
RuntimeProxyResponsesModelDescriptor {
id: "gpt-5.2-codex",
display_name: "GPT-5.2 Codex",
description: "Frontier agentic coding model.",
claude_alias: None,
claude_picker_model: Some("claude-sonnet-4-5"),
supports_xhigh: true,
},
RuntimeProxyResponsesModelDescriptor {
id: "gpt-5.2",
display_name: "GPT-5.2",
description: "Optimized for professional work and long-running agents.",
claude_alias: None,
claude_picker_model: Some("claude-opus-4-5"),
supports_xhigh: true,
},
RuntimeProxyResponsesModelDescriptor {
id: "gpt-5.1-codex-max",
display_name: "GPT-5.1 Codex Max",
description: "Codex-optimized model for deep and fast reasoning.",
claude_alias: None,
claude_picker_model: Some("claude-opus-4-1"),
supports_xhigh: false,
},
RuntimeProxyResponsesModelDescriptor {
id: "gpt-5.1-codex-mini",
display_name: "GPT-5.1 Codex Mini",
description: "Optimized for Codex. Cheaper, faster, but less capable.",
claude_alias: None,
claude_picker_model: Some("claude-haiku-4"),
supports_xhigh: false,
},
RuntimeProxyResponsesModelDescriptor {
id: "gpt-5",
display_name: "GPT-5",
description: "General-purpose GPT-5 model.",
claude_alias: None,
claude_picker_model: Some("claude-sonnet-4"),
supports_xhigh: false,
},
RuntimeProxyResponsesModelDescriptor {
id: "gpt-5-mini",
display_name: "GPT-5 Mini",
description: "Smaller GPT-5 model for fast, lower-cost tasks.",
claude_alias: None,
claude_picker_model: Some("claude-haiku-3-5"),
supports_xhigh: false,
},
]
}
fn runtime_proxy_anthropic_model_display_name(model_id: &str) -> String {
runtime_proxy_responses_model_descriptor(model_id)
.map(|descriptor| descriptor.display_name.to_string())
.unwrap_or_else(|| model_id.to_string())
}
fn runtime_proxy_anthropic_model_id_from_path(path: &str) -> Option<&str> {
path.strip_prefix(&format!("{RUNTIME_PROXY_ANTHROPIC_MODELS_PATH}/"))
.filter(|model_id| !model_id.is_empty())
}
fn mark_runtime_proxy_local_overload(shared: &RuntimeRotationProxyShared, reason: &str) {
let now = Local::now().timestamp().max(0) as u64;
let until = now.saturating_add(RUNTIME_PROXY_LOCAL_OVERLOAD_BACKOFF_SECONDS.max(1) as u64);
let current = shared.local_overload_backoff_until.load(Ordering::SeqCst);
if until > current {
shared
.local_overload_backoff_until
.store(until, Ordering::SeqCst);
}
runtime_proxy_log(
shared,
format!("runtime_proxy_overload_backoff until={until} reason={reason}"),
);
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RuntimeProxyAdmissionRejection {
GlobalLimit,
LaneLimit(RuntimeRouteKind),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RuntimeProxyQueueRejection {
Full,
Disconnected,
}
fn runtime_proxy_request_lane(path: &str, websocket: bool) -> RuntimeRouteKind {
if websocket {
RuntimeRouteKind::Websocket
} else if is_runtime_compact_path(path) {
RuntimeRouteKind::Compact
} else if is_runtime_responses_path(path) || is_runtime_anthropic_messages_path(path) {
RuntimeRouteKind::Responses
} else {
RuntimeRouteKind::Standard
}
}
fn runtime_proxy_request_origin<'a>(headers: &'a [(String, String)]) -> Option<&'a str> {
runtime_proxy_request_header_value(headers, PRODEX_INTERNAL_REQUEST_ORIGIN_HEADER)
}
fn runtime_proxy_request_prefers_interactive_inflight_wait(request: &RuntimeProxyRequest) -> bool {
runtime_proxy_request_origin(&request.headers).is_some_and(|origin| {
origin.eq_ignore_ascii_case(PRODEX_INTERNAL_REQUEST_ORIGIN_ANTHROPIC_MESSAGES)
})
}
fn runtime_proxy_request_prefers_inflight_wait(request: &RuntimeProxyRequest) -> bool {
is_runtime_responses_path(&request.path_and_query)
|| runtime_proxy_request_prefers_interactive_inflight_wait(request)
}
fn runtime_proxy_request_inflight_wait_budget(
request: &RuntimeProxyRequest,
pressure_mode: bool,
) -> Duration {
if runtime_proxy_request_prefers_interactive_inflight_wait(request) {
runtime_proxy_admission_wait_budget(RUNTIME_PROXY_ANTHROPIC_MESSAGES_PATH, pressure_mode)
} else if runtime_proxy_request_prefers_inflight_wait(request) {
runtime_proxy_admission_wait_budget(&request.path_and_query, pressure_mode)
} else {
Duration::ZERO
}
}
fn runtime_proxy_request_is_long_lived(path: &str, websocket: bool) -> bool {
websocket || is_runtime_responses_path(path) || is_runtime_anthropic_messages_path(path)
}
fn runtime_proxy_interactive_wait_budget_ms(path: &str, base_budget_ms: u64) -> u64 {
if is_runtime_anthropic_messages_path(path) {
base_budget_ms.saturating_mul(RUNTIME_PROXY_INTERACTIVE_WAIT_MULTIPLIER)
} else {
base_budget_ms
}
}
fn runtime_proxy_admission_wait_budget(path: &str, pressure_mode: bool) -> Duration {
let base_budget_ms = if pressure_mode {
runtime_proxy_pressure_admission_wait_budget_ms()
} else {
runtime_proxy_admission_wait_budget_ms()
};
Duration::from_millis(runtime_proxy_interactive_wait_budget_ms(
path,
base_budget_ms,
))
}
fn runtime_proxy_long_lived_queue_wait_budget(path: &str, pressure_mode: bool) -> Duration {
let base_budget_ms = if pressure_mode {
runtime_proxy_pressure_long_lived_queue_wait_budget_ms()
} else {
runtime_proxy_long_lived_queue_wait_budget_ms()
};
Duration::from_millis(runtime_proxy_interactive_wait_budget_ms(
path,
base_budget_ms,
))
}
fn runtime_proxy_local_overload_pressure_active(shared: &RuntimeRotationProxyShared) -> bool {
let now = Local::now().timestamp().max(0) as u64;
shared.local_overload_backoff_until.load(Ordering::SeqCst) > now
}
fn runtime_proxy_background_queue_pressure_active() -> bool {
runtime_proxy_queue_pressure_active(
runtime_state_save_queue_backlog(),
runtime_continuation_journal_queue_backlog(),
runtime_probe_refresh_queue_backlog(),
)
}
fn runtime_proxy_pressure_mode_active(shared: &RuntimeRotationProxyShared) -> bool {
runtime_proxy_local_overload_pressure_active(shared)
|| runtime_proxy_background_queue_pressure_active()
}
fn runtime_proxy_background_queue_pressure_affects_route(route_kind: RuntimeRouteKind) -> bool {
matches!(
route_kind,
RuntimeRouteKind::Compact | RuntimeRouteKind::Standard
)
}
fn runtime_proxy_pressure_mode_for_route(
route_kind: RuntimeRouteKind,
local_overload_pressure: bool,
background_queue_pressure: bool,
) -> bool {
local_overload_pressure
|| (background_queue_pressure
&& runtime_proxy_background_queue_pressure_affects_route(route_kind))
}
fn runtime_proxy_pressure_mode_active_for_route(
shared: &RuntimeRotationProxyShared,
route_kind: RuntimeRouteKind,
) -> bool {
runtime_proxy_pressure_mode_for_route(
route_kind,
runtime_proxy_local_overload_pressure_active(shared),
runtime_proxy_background_queue_pressure_active(),
)
}
fn runtime_proxy_pressure_mode_active_for_request_path(
shared: &RuntimeRotationProxyShared,
path: &str,
websocket: bool,
) -> bool {
runtime_proxy_pressure_mode_active_for_route(
shared,
runtime_proxy_request_lane(path, websocket),
)
}
fn runtime_proxy_sync_probe_pressure_mode_active(shared: &RuntimeRotationProxyShared) -> bool {
runtime_proxy_local_overload_pressure_active(shared)
|| runtime_proxy_background_queue_pressure_active()
}
fn runtime_proxy_lane_limit_marks_global_overload(lane: RuntimeRouteKind) -> bool {
lane == RuntimeRouteKind::Responses
}
fn runtime_proxy_should_shed_fresh_compact_request(
pressure_mode: bool,
session_profile: Option<&str>,
) -> bool {
pressure_mode && session_profile.is_none()
}
fn audit_runtime_proxy_startup_state(shared: &RuntimeRotationProxyShared) {
let Ok(mut runtime) = shared.runtime.lock() else {
return;
};
let now = Local::now().timestamp();
let orphan_managed_dirs = collect_orphan_managed_profile_dirs(&runtime.paths, &runtime.state);
let missing_managed_dirs = runtime
.state
.profiles
.values()
.filter(|profile| profile.managed && !profile.codex_home.exists())
.count();
let valid_profiles = runtime
.state
.profiles
.iter()
.filter(|(_, profile)| !profile.managed || profile.codex_home.exists())
.map(|(name, _)| name.clone())
.collect::<BTreeSet<_>>();
let stale_response_bindings = runtime
.state
.response_profile_bindings
.values()
.filter(|binding| !valid_profiles.contains(&binding.profile_name))
.count();
let stale_session_bindings = runtime
.state
.session_profile_bindings
.values()
.filter(|binding| !valid_profiles.contains(&binding.profile_name))
.count();
let stale_probe_cache = runtime
.profile_probe_cache
.keys()
.filter(|profile_name| !valid_profiles.contains(*profile_name))
.count();
let stale_usage_snapshots = runtime
.profile_usage_snapshots
.keys()
.filter(|profile_name| !valid_profiles.contains(*profile_name))
.count();
let stale_retry_backoffs = runtime
.profile_retry_backoff_until
.keys()
.filter(|profile_name| !valid_profiles.contains(*profile_name))
.count();
let stale_transport_backoffs = runtime
.profile_transport_backoff_until
.keys()
.filter(|key| !runtime_profile_transport_backoff_key_valid(key, &valid_profiles))
.count();
let stale_route_circuits = runtime
.profile_route_circuit_open_until
.keys()
.filter(|key| !valid_profiles.contains(runtime_profile_route_circuit_profile_name(key)))
.count();
let stale_health_scores = runtime
.profile_health
.keys()
.filter(|key| !valid_profiles.contains(runtime_profile_score_profile_name(key)))
.count();
let active_profile_missing_dir = runtime
.state
.active_profile
.as_deref()
.and_then(|name| runtime.state.profiles.get(name))
.is_some_and(|profile| profile.managed && !profile.codex_home.exists());
runtime
.state
.response_profile_bindings
.retain(|_, binding| valid_profiles.contains(&binding.profile_name));
runtime
.state
.session_profile_bindings
.retain(|_, binding| valid_profiles.contains(&binding.profile_name));
runtime
.turn_state_bindings
.retain(|_, binding| valid_profiles.contains(&binding.profile_name));
runtime
.session_id_bindings
.retain(|_, binding| valid_profiles.contains(&binding.profile_name));
runtime
.profile_probe_cache
.retain(|profile_name, _| valid_profiles.contains(profile_name));
runtime
.profile_usage_snapshots
.retain(|profile_name, _| valid_profiles.contains(profile_name));
runtime
.profile_retry_backoff_until
.retain(|profile_name, _| valid_profiles.contains(profile_name));
runtime
.profile_transport_backoff_until
.retain(|key, _| runtime_profile_transport_backoff_key_valid(key, &valid_profiles));
runtime
.profile_route_circuit_open_until
.retain(|key, _| valid_profiles.contains(runtime_profile_route_circuit_profile_name(key)));
runtime
.profile_health
.retain(|key, _| valid_profiles.contains(runtime_profile_score_profile_name(key)));
let route_circuit_count_after_profile_prune = runtime.profile_route_circuit_open_until.len();
prune_runtime_profile_route_circuits(&mut runtime, now);
let expired_route_circuits = route_circuit_count_after_profile_prune
.saturating_sub(runtime.profile_route_circuit_open_until.len());
let changed = stale_response_bindings > 0
|| stale_session_bindings > 0
|| stale_probe_cache > 0
|| stale_usage_snapshots > 0
|| stale_retry_backoffs > 0
|| stale_transport_backoffs > 0
|| stale_route_circuits > 0
|| expired_route_circuits > 0
|| stale_health_scores > 0;
runtime_proxy_log(
shared,
format!(
"runtime_proxy_startup_audit missing_managed_dirs={missing_managed_dirs} orphan_managed_dirs={} stale_response_bindings={stale_response_bindings} stale_session_bindings={stale_session_bindings} stale_probe_cache={stale_probe_cache} stale_usage_snapshots={stale_usage_snapshots} stale_retry_backoffs={stale_retry_backoffs} stale_transport_backoffs={stale_transport_backoffs} stale_route_circuits={stale_route_circuits} expired_route_circuits={expired_route_circuits} stale_health_scores={stale_health_scores} active_profile_missing_dir={active_profile_missing_dir}",
orphan_managed_dirs.len(),
),
);
if changed {
schedule_runtime_state_save_from_runtime(shared, &runtime, "startup_audit");
}
drop(runtime);
}
fn try_acquire_runtime_proxy_active_request_slot(
shared: &RuntimeRotationProxyShared,
transport: &str,
path: &str,
) -> Result<RuntimeProxyActiveRequestGuard, RuntimeProxyAdmissionRejection> {
let lane = runtime_proxy_request_lane(path, transport == "websocket");
let lane_active_count = shared.lane_admission.active_counter(lane);
let lane_limit = shared.lane_admission.limit(lane);
loop {
let active = shared.active_request_count.load(Ordering::SeqCst);
if active >= shared.active_request_limit {
runtime_proxy_log(
shared,
format!(
"runtime_proxy_active_limit_reached transport={transport} path={path} active={active} limit={}",
shared.active_request_limit
),
);
return Err(RuntimeProxyAdmissionRejection::GlobalLimit);
}
let lane_active = lane_active_count.load(Ordering::SeqCst);
if lane_active >= lane_limit {
runtime_proxy_log(
shared,
format!(
"runtime_proxy_lane_limit_reached transport={transport} path={path} lane={} active={lane_active} limit={lane_limit}",
runtime_route_kind_label(lane)
),
);
return Err(RuntimeProxyAdmissionRejection::LaneLimit(lane));
}
if shared
.active_request_count
.compare_exchange(
active,
active.saturating_add(1),
Ordering::SeqCst,
Ordering::SeqCst,
)
.is_ok()
{
if lane_active_count
.compare_exchange(
lane_active,
lane_active.saturating_add(1),
Ordering::SeqCst,
Ordering::SeqCst,
)
.is_ok()
{
return Ok(RuntimeProxyActiveRequestGuard {
active_request_count: Arc::clone(&shared.active_request_count),
lane_active_count,
wait: Arc::clone(&shared.lane_admission.wait),
});
}
shared.active_request_count.fetch_sub(1, Ordering::SeqCst);
}
}
}
fn acquire_runtime_proxy_active_request_slot_with_wait(
shared: &RuntimeRotationProxyShared,
transport: &str,
path: &str,
) -> Result<RuntimeProxyActiveRequestGuard, RuntimeProxyAdmissionRejection> {
let started_at = Instant::now();
let pressure_mode =
runtime_proxy_pressure_mode_active_for_request_path(shared, path, transport == "websocket");
let budget = runtime_proxy_admission_wait_budget(path, pressure_mode);
let mut waited = false;
loop {
match try_acquire_runtime_proxy_active_request_slot(shared, transport, path) {
Ok(guard) => {
if waited {
runtime_proxy_log(
shared,
format!(
"runtime_proxy_admission_recovered transport={transport} path={path} waited_ms={}",
started_at.elapsed().as_millis()
),
);
}
return Ok(guard);
}
Err(rejection) => {
let elapsed = started_at.elapsed();
if elapsed >= budget {
runtime_proxy_log(
shared,
format!(
"runtime_proxy_admission_wait_exhausted transport={transport} path={path} waited_ms={} reason={} pressure_mode={pressure_mode}",
elapsed.as_millis(),
match rejection {
RuntimeProxyAdmissionRejection::GlobalLimit =>
"active_request_limit",
RuntimeProxyAdmissionRejection::LaneLimit(lane) =>
runtime_route_kind_label(lane),
}
),
);
return Err(rejection);
}
if !waited {
runtime_proxy_log(
shared,
format!(
"runtime_proxy_admission_wait_started transport={transport} path={path} budget_ms={} wait_timeout_ms={} reason={} pressure_mode={pressure_mode}",
budget.as_millis(),
budget.saturating_sub(elapsed).as_millis(),
match rejection {
RuntimeProxyAdmissionRejection::GlobalLimit =>
"active_request_limit",
RuntimeProxyAdmissionRejection::LaneLimit(lane) =>
runtime_route_kind_label(lane),
}
),
);
}
waited = true;
let (mutex, condvar) = &*shared.lane_admission.wait;
let wait_guard = mutex
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
if let Ok(guard) =
try_acquire_runtime_proxy_active_request_slot(shared, transport, path)
{
return Ok(guard);
}
let wait_for = budget.saturating_sub(elapsed);
if !wait_for.is_zero() {
let _ = condvar
.wait_timeout(wait_guard, wait_for)
.unwrap_or_else(|poisoned| poisoned.into_inner());
}
}
}
}
}
fn wait_for_runtime_proxy_queue_capacity<T, F>(
mut item: T,
shared: &RuntimeRotationProxyShared,
transport: &str,
path: &str,
mut try_enqueue: F,
) -> Result<(), (RuntimeProxyQueueRejection, T)>
where
F: FnMut(T) -> Result<(), (RuntimeProxyQueueRejection, T)>,
{
let started_at = Instant::now();
let pressure_mode =
runtime_proxy_pressure_mode_active_for_request_path(shared, path, transport == "websocket");
let budget = runtime_proxy_long_lived_queue_wait_budget(path, pressure_mode);
let mut waited = false;
loop {
match try_enqueue(item) {
Ok(()) => {
if waited {
runtime_proxy_log(
shared,
format!(
"runtime_proxy_queue_recovered transport={transport} path={path} waited_ms={}",
started_at.elapsed().as_millis()
),
);
}
return Ok(());
}
Err((RuntimeProxyQueueRejection::Full, returned_item)) => {
item = returned_item;
let elapsed = started_at.elapsed();
if elapsed >= budget {
runtime_proxy_log(
shared,
format!(
"runtime_proxy_queue_wait_exhausted transport={transport} path={path} waited_ms={} reason=long_lived_queue_full pressure_mode={pressure_mode}",
elapsed.as_millis()
),
);
return Err((RuntimeProxyQueueRejection::Full, item));
}
if !waited {
runtime_proxy_log(
shared,
format!(
"runtime_proxy_queue_wait_started transport={transport} path={path} budget_ms={} wait_timeout_ms={} reason=long_lived_queue_full pressure_mode={pressure_mode}",
budget.as_millis(),
budget.saturating_sub(elapsed).as_millis()
),
);
}
waited = true;
let (mutex, condvar) = &*shared.lane_admission.wait;
let wait_guard = mutex
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
match try_enqueue(item) {
Ok(()) => return Ok(()),
Err((RuntimeProxyQueueRejection::Full, returned_item)) => {
item = returned_item;
}
Err((RuntimeProxyQueueRejection::Disconnected, returned_item)) => {
runtime_proxy_log(
shared,
format!(
"runtime_proxy_queue_wait_exhausted transport={transport} path={path} waited_ms={} reason=long_lived_queue_disconnected",
started_at.elapsed().as_millis()
),
);
return Err((RuntimeProxyQueueRejection::Disconnected, returned_item));
}
}
let wait_for = budget.saturating_sub(elapsed);
if !wait_for.is_zero() {
let _ = condvar
.wait_timeout(wait_guard, wait_for)
.unwrap_or_else(|poisoned| poisoned.into_inner());
}
continue;
}
Err((RuntimeProxyQueueRejection::Disconnected, returned_item)) => {
runtime_proxy_log(
shared,
format!(
"runtime_proxy_queue_wait_exhausted transport={transport} path={path} waited_ms={} reason=long_lived_queue_disconnected",
started_at.elapsed().as_millis()
),
);
return Err((RuntimeProxyQueueRejection::Disconnected, returned_item));
}
}
}
}
fn runtime_profile_inflight_release_revision(shared: &RuntimeRotationProxyShared) -> u64 {
shared
.lane_admission
.inflight_release_revision
.load(Ordering::SeqCst)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RuntimeProfileInFlightWaitOutcome {
InflightRelease,
OtherNotify,
Timeout,
}
fn runtime_profile_inflight_wait_outcome_label(
outcome: RuntimeProfileInFlightWaitOutcome,
) -> &'static str {
match outcome {
RuntimeProfileInFlightWaitOutcome::InflightRelease => "inflight_release",
RuntimeProfileInFlightWaitOutcome::OtherNotify => "other_notify",
RuntimeProfileInFlightWaitOutcome::Timeout => "timeout",
}
}
fn runtime_profile_inflight_wait_outcome_since(
shared: &RuntimeRotationProxyShared,
timeout: Duration,
observed_revision: u64,
) -> RuntimeProfileInFlightWaitOutcome {
if timeout.is_zero() {
return RuntimeProfileInFlightWaitOutcome::Timeout;
}
if runtime_profile_inflight_release_revision(shared) != observed_revision {
return RuntimeProfileInFlightWaitOutcome::InflightRelease;
}
let (mutex, condvar) = &*shared.lane_admission.wait;
let guard = mutex
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
if runtime_profile_inflight_release_revision(shared) != observed_revision {
return RuntimeProfileInFlightWaitOutcome::InflightRelease;
}
let (_guard, result) = condvar
.wait_timeout(guard, timeout)
.unwrap_or_else(|poisoned| poisoned.into_inner());
if runtime_profile_inflight_release_revision(shared) != observed_revision {
RuntimeProfileInFlightWaitOutcome::InflightRelease
} else if result.timed_out() {
RuntimeProfileInFlightWaitOutcome::Timeout
} else {
RuntimeProfileInFlightWaitOutcome::OtherNotify
}
}
#[cfg(test)]
fn wait_for_runtime_profile_inflight_relief_since(
shared: &RuntimeRotationProxyShared,
timeout: Duration,
observed_revision: u64,
) -> bool {
matches!(
runtime_profile_inflight_wait_outcome_since(shared, timeout, observed_revision),
RuntimeProfileInFlightWaitOutcome::InflightRelease
)
}
fn enqueue_runtime_proxy_long_lived_request_with_wait(
sender: &mpsc::SyncSender<tiny_http::Request>,
request: tiny_http::Request,
shared: &RuntimeRotationProxyShared,
) -> Result<(), (RuntimeProxyQueueRejection, tiny_http::Request)> {
let path = request.url().to_string();
let transport = if is_tiny_http_websocket_upgrade(&request) {
"websocket"
} else {
"http"
};
wait_for_runtime_proxy_queue_capacity(request, shared, transport, &path, |request| match sender
.try_send(request)
{
Ok(()) => Ok(()),
Err(TrySendError::Full(returned_request)) => {
Err((RuntimeProxyQueueRejection::Full, returned_request))
}
Err(TrySendError::Disconnected(returned_request)) => {
Err((RuntimeProxyQueueRejection::Disconnected, returned_request))
}
})
}
fn handle_runtime_rotation_proxy_request(
mut request: tiny_http::Request,
shared: &RuntimeRotationProxyShared,
) {
if let Some(response) = handle_runtime_proxy_admin_request(&mut request, shared) {
let _ = request.respond(response);
return;
}
if let Some(response) = handle_runtime_proxy_anthropic_compat_request(&request) {
let _ = request.respond(response);
return;
}
let request_path = request.url().to_string();
let request_transport = if is_tiny_http_websocket_upgrade(&request) {
"websocket"
} else {
"http"
};
let _active_request_guard = match acquire_runtime_proxy_active_request_slot_with_wait(
shared,
request_transport,
&request_path,
) {
Ok(guard) => guard,
Err(RuntimeProxyAdmissionRejection::GlobalLimit) => {
mark_runtime_proxy_local_overload(shared, "active_request_limit");
reject_runtime_proxy_overloaded_request(request, shared, "active_request_limit");
return;
}
Err(RuntimeProxyAdmissionRejection::LaneLimit(lane)) => {
let reason = format!("lane_limit:{}", runtime_route_kind_label(lane));
if runtime_proxy_lane_limit_marks_global_overload(lane) {
mark_runtime_proxy_local_overload(shared, &reason);
}
reject_runtime_proxy_overloaded_request(request, shared, &reason);
return;
}
};
let request_id = runtime_proxy_next_request_id(shared);
if is_tiny_http_websocket_upgrade(&request) {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket upgrade path={}",
request.url()
),
);
proxy_runtime_responses_websocket_request(request_id, request, shared);
return;
}
let captured = match capture_runtime_proxy_request(&mut request) {
Ok(captured) => captured,
Err(err) => {
runtime_proxy_log(
shared,
format!("request={request_id} transport=http capture_error={err}"),
);
let _ = request.respond(build_runtime_proxy_text_response(502, &err.to_string()));
return;
}
};
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http path={} previous_response_id={:?} turn_state={:?} body_bytes={}",
captured.path_and_query,
runtime_request_previous_response_id(&captured),
runtime_request_turn_state(&captured),
captured.body.len()
),
);
if is_runtime_anthropic_messages_path(&captured.path_and_query)
&& std::env::var_os("PRODEX_DEBUG_ANTHROPIC_COMPAT").is_some()
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http anthropic_compat headers={:?} body_snippet={}",
captured.headers,
runtime_proxy_body_snippet(&captured.body, 1024),
),
);
}
if is_runtime_anthropic_messages_path(&captured.path_and_query) {
let response = match proxy_runtime_anthropic_messages_request(request_id, &captured, shared)
{
Ok(response) => response,
Err(err) => {
if is_runtime_proxy_transport_failure(&err) {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http anthropic_transport_failure={err:#}"
),
);
return;
} else {
runtime_proxy_log(
shared,
format!("request={request_id} transport=http anthropic_error={err:#}"),
);
RuntimeResponsesReply::Buffered(build_runtime_anthropic_error_parts(
502,
"api_error",
&err.to_string(),
))
}
}
};
respond_runtime_responses_reply(request, response);
return;
}
if is_runtime_responses_path(&captured.path_and_query) {
let response = match proxy_runtime_responses_request(request_id, &captured, shared) {
Ok(response) => response,
Err(err) => {
if is_runtime_proxy_transport_failure(&err) {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http responses_transport_failure={err:#}"
),
);
return;
} else {
runtime_proxy_log(
shared,
format!("request={request_id} transport=http responses_error={err:#}"),
);
RuntimeResponsesReply::Buffered(build_runtime_proxy_text_response_parts(
502,
&err.to_string(),
))
}
}
};
respond_runtime_responses_reply(request, response);
return;
}
let response = match proxy_runtime_standard_request(request_id, &captured, shared) {
Ok(response) => response,
Err(err) => {
if is_runtime_proxy_transport_failure(&err) {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http standard_transport_failure={err:#}"
),
);
return;
} else {
runtime_proxy_log(
shared,
format!("request={request_id} transport=http standard_error={err:#}"),
);
build_runtime_proxy_text_response(502, &err.to_string())
}
}
};
let _ = request.respond(response);
}
fn respond_runtime_responses_reply(request: tiny_http::Request, response: RuntimeResponsesReply) {
match response {
RuntimeResponsesReply::Buffered(parts) => {
let _ = request.respond(build_runtime_proxy_response_from_parts(parts));
}
RuntimeResponsesReply::Streaming(response) => {
let writer = request.into_writer();
let _ = write_runtime_streaming_response(writer, response);
}
}
}
fn is_tiny_http_websocket_upgrade(request: &tiny_http::Request) -> bool {
request.headers().iter().any(|header| {
header.field.equiv("Upgrade") && header.value.as_str().eq_ignore_ascii_case("websocket")
})
}
fn capture_runtime_proxy_request(request: &mut tiny_http::Request) -> Result<RuntimeProxyRequest> {
let mut body = Vec::new();
request
.as_reader()
.read_to_end(&mut body)
.context("failed to read proxied Codex request body")?;
Ok(RuntimeProxyRequest {
method: request.method().as_str().to_string(),
path_and_query: request.url().to_string(),
headers: runtime_proxy_request_headers(request),
body,
})
}
fn capture_runtime_proxy_websocket_request(request: &tiny_http::Request) -> RuntimeProxyRequest {
RuntimeProxyRequest {
method: request.method().as_str().to_string(),
path_and_query: request.url().to_string(),
headers: runtime_proxy_request_headers(request),
body: Vec::new(),
}
}
fn runtime_proxy_request_headers(request: &tiny_http::Request) -> Vec<(String, String)> {
request
.headers()
.iter()
.map(|header| {
(
header.field.as_str().as_str().to_string(),
header.value.as_str().to_string(),
)
})
.collect()
}
#[derive(Debug, Clone)]
struct RuntimeAnthropicMessagesRequest {
translated_request: RuntimeProxyRequest,
requested_model: String,
stream: bool,
want_thinking: bool,
server_tools: RuntimeAnthropicServerTools,
}
#[derive(Debug, Clone, Default)]
struct RuntimeAnthropicServerTools {
web_search: bool,
}
impl RuntimeAnthropicServerTools {
fn needs_buffered_translation(&self) -> bool {
self.web_search
}
}
#[derive(Debug, Clone, Default)]
struct RuntimeAnthropicTranslatedTools {
tools: Vec<serde_json::Value>,
server_tools: RuntimeAnthropicServerTools,
}
fn runtime_proxy_request_header_value<'a>(
headers: &'a [(String, String)],
name: &str,
) -> Option<&'a str> {
headers
.iter()
.find_map(|(header_name, value)| {
header_name
.eq_ignore_ascii_case(name)
.then_some(value.as_str())
})
.map(str::trim)
.filter(|value| !value.is_empty())
}
fn runtime_proxy_claude_session_id(request: &RuntimeProxyRequest) -> Option<String> {
runtime_proxy_request_header_value(&request.headers, "x-claude-code-session-id")
.or_else(|| runtime_proxy_request_header_value(&request.headers, "session_id"))
.map(str::to_string)
}
fn runtime_proxy_claude_target_model(requested_model: &str) -> String {
if let Some(override_model) = runtime_proxy_claude_model_override() {
return override_model;
}
let normalized = requested_model.trim();
if let Some(descriptor) = runtime_proxy_responses_model_descriptor(normalized) {
return descriptor.id.to_string();
}
if let Some(descriptor) = runtime_proxy_claude_picker_model_descriptor(normalized) {
return descriptor.id.to_string();
}
let lower = normalized.to_ascii_lowercase();
if lower.starts_with("gpt-")
|| lower.starts_with("o1")
|| lower.starts_with("o3")
|| lower.starts_with("o4")
|| lower.contains("codex")
{
normalized.to_string()
} else if lower == "best" || lower == "default" || lower.contains("opus") {
runtime_proxy_claude_alias_model(RuntimeProxyClaudeModelAlias::Opus)
.id
.to_string()
} else if lower.contains("sonnet") {
runtime_proxy_claude_alias_model(RuntimeProxyClaudeModelAlias::Sonnet)
.id
.to_string()
} else if lower.contains("haiku") {
runtime_proxy_claude_alias_model(RuntimeProxyClaudeModelAlias::Haiku)
.id
.to_string()
} else {
DEFAULT_PRODEX_CLAUDE_MODEL.to_string()
}
}
fn runtime_proxy_anthropic_wants_thinking(value: &serde_json::Value) -> bool {
value
.get("thinking")
.and_then(|thinking| thinking.get("type"))
.and_then(serde_json::Value::as_str)
.is_some_and(|thinking| matches!(thinking, "enabled" | "adaptive"))
}
fn runtime_proxy_translate_anthropic_reasoning_effort(
effort: &str,
target_model: &str,
) -> Option<&'static str> {
match effort.trim().to_ascii_lowercase().as_str() {
"low" => Some("low"),
"medium" => Some("medium"),
"high" => Some("high"),
"max" => Some(
if runtime_proxy_responses_model_supports_xhigh(target_model) {
"xhigh"
} else {
"high"
},
),
_ => None,
}
}
fn runtime_proxy_anthropic_reasoning_effort(
value: &serde_json::Value,
target_model: &str,
) -> Option<String> {
if let Some(effort) = runtime_proxy_claude_reasoning_effort_override() {
return Some(effort);
}
if let Some(effort) = value
.get("output_config")
.and_then(|config| config.get("effort"))
.and_then(serde_json::Value::as_str)
.and_then(|effort| runtime_proxy_translate_anthropic_reasoning_effort(effort, target_model))
{
return Some(effort.to_string());
}
let budget_tokens = value
.get("thinking")
.and_then(|thinking| thinking.get("budget_tokens"))
.and_then(serde_json::Value::as_u64)?;
Some(
if budget_tokens <= 2_048 {
"low"
} else if budget_tokens <= 8_192 {
"medium"
} else {
"high"
}
.to_string(),
)
}
fn runtime_proxy_anthropic_system_instructions(
value: &serde_json::Value,
) -> Result<Option<String>> {
let Some(system) = value.get("system") else {
return Ok(None);
};
if let Some(text) = system.as_str() {
let text = text.trim();
return Ok((!text.is_empty()).then(|| text.to_string()));
}
let blocks = system
.as_array()
.context("Anthropic system content must be a string or an array of text blocks")?;
let text = blocks
.iter()
.filter_map(|block| {
(block.get("type").and_then(serde_json::Value::as_str) == Some("text"))
.then(|| block.get("text").and_then(serde_json::Value::as_str))
.flatten()
})
.collect::<Vec<_>>()
.join("\n\n");
Ok((!text.trim().is_empty()).then_some(text))
}
fn runtime_proxy_anthropic_normalize_tool_schema(schema: &serde_json::Value) -> serde_json::Value {
match schema {
serde_json::Value::Object(map)
if map.get("type").and_then(serde_json::Value::as_str) == Some("object")
&& !map.contains_key("properties") =>
{
let mut normalized = map.clone();
normalized.insert(
"properties".to_string(),
serde_json::Value::Object(serde_json::Map::new()),
);
serde_json::Value::Object(normalized)
}
_ => schema.clone(),
}
}
fn runtime_proxy_translate_anthropic_tools(
value: &serde_json::Value,
) -> Result<RuntimeAnthropicTranslatedTools> {
let Some(tools) = value.get("tools") else {
return Ok(RuntimeAnthropicTranslatedTools::default());
};
let tools = tools
.as_array()
.context("Anthropic tools must be an array when present")?;
let mut translated = RuntimeAnthropicTranslatedTools::default();
for tool in tools {
let (tool_value, tool_state) = runtime_proxy_translate_anthropic_tool(tool)?;
translated.tools.push(tool_value);
translated.server_tools.web_search |= tool_state.web_search;
}
Ok(translated)
}
fn runtime_proxy_translate_anthropic_tool(
tool: &serde_json::Value,
) -> Result<(serde_json::Value, RuntimeAnthropicServerTools)> {
let tool_type = tool
.get("type")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty());
if tool_type.is_some_and(|value| value.starts_with("web_search_")) {
let mut translated = serde_json::Map::new();
translated.insert(
"type".to_string(),
serde_json::Value::String("web_search".to_string()),
);
let allowed_domains = tool
.get("allowed_domains")
.and_then(serde_json::Value::as_array)
.map(|domains| {
domains
.iter()
.filter_map(|domain| {
domain
.as_str()
.map(str::trim)
.filter(|value| !value.is_empty())
.map(|value| serde_json::Value::String(value.to_string()))
})
.collect::<Vec<_>>()
})
.filter(|domains| !domains.is_empty());
if let Some(allowed_domains) = allowed_domains {
translated.insert(
"filters".to_string(),
serde_json::json!({
"allowed_domains": allowed_domains,
}),
);
}
if let Some(user_location) = tool
.get("user_location")
.filter(|value| value.is_object())
.cloned()
{
translated.insert("user_location".to_string(), user_location);
}
return Ok((
serde_json::Value::Object(translated),
RuntimeAnthropicServerTools { web_search: true },
));
}
let name = tool
.get("name")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.context("Anthropic tool definition is missing a non-empty name")?;
let mut translated = serde_json::Map::new();
translated.insert(
"type".to_string(),
serde_json::Value::String("function".to_string()),
);
translated.insert(
"name".to_string(),
serde_json::Value::String(name.to_string()),
);
if let Some(description) = tool
.get("description")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
{
translated.insert(
"description".to_string(),
serde_json::Value::String(description.to_string()),
);
}
if let Some(schema) = tool.get("input_schema") {
translated.insert(
"parameters".to_string(),
runtime_proxy_anthropic_normalize_tool_schema(schema),
);
}
Ok((
serde_json::Value::Object(translated),
RuntimeAnthropicServerTools::default(),
))
}
fn runtime_proxy_translate_anthropic_tool_choice(
value: &serde_json::Value,
server_tools: &RuntimeAnthropicServerTools,
) -> Result<Option<serde_json::Value>> {
let Some(choice) = value.get("tool_choice") else {
return Ok(None);
};
let choice_type = choice
.get("type")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.context("Anthropic tool_choice requires a non-empty type")?;
Ok(match choice_type {
"auto" => Some(serde_json::Value::String("auto".to_string())),
"any" => Some(serde_json::Value::String("required".to_string())),
"tool" => {
let name = choice
.get("name")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.context("Anthropic tool_choice type=tool requires a non-empty name")?;
if server_tools.web_search && name == "web_search" {
return Ok(Some(serde_json::Value::String("required".to_string())));
}
Some(serde_json::json!({
"type": "function",
"name": name,
}))
}
other => bail!("Unsupported Anthropic tool_choice type '{other}'"),
})
}
fn runtime_proxy_translate_anthropic_image_part(
block: &serde_json::Value,
) -> Option<serde_json::Value> {
let source = block.get("source")?;
if source.get("type").and_then(serde_json::Value::as_str) != Some("base64") {
return None;
}
let media_type = source
.get("media_type")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())?;
let data = source
.get("data")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())?;
Some(serde_json::json!({
"type": "input_image",
"image_url": format!("data:{media_type};base64,{data}"),
}))
}
fn runtime_proxy_translate_anthropic_message_content(
role: &str,
content: &serde_json::Value,
) -> Result<Vec<serde_json::Value>> {
if let Some(text) = content.as_str() {
return Ok(vec![serde_json::json!({
"role": role,
"content": text,
})]);
}
let blocks = content
.as_array()
.context("Anthropic message content must be a string or an array of content blocks")?;
let mut input_items = Vec::new();
let has_tool_blocks = blocks.iter().any(|block| {
block
.get("type")
.and_then(serde_json::Value::as_str)
.is_some_and(|block_type| matches!(block_type, "tool_use" | "tool_result"))
});
match role {
"user" => {
if let Some(message_content) =
runtime_proxy_translate_anthropic_user_content_blocks(blocks)
{
input_items.push(serde_json::json!({
"role": "user",
"content": message_content,
}));
} else if !has_tool_blocks {
input_items.push(serde_json::json!({
"role": "user",
"content": "",
}));
}
}
"assistant" => {
let text = runtime_proxy_translate_anthropic_text_blocks(blocks);
if !text.is_empty() || !has_tool_blocks {
input_items.push(serde_json::json!({
"role": "assistant",
"content": text,
}));
}
}
other => bail!("Unsupported Anthropic role '{other}'"),
}
for block in blocks {
match block.get("type").and_then(serde_json::Value::as_str) {
Some("tool_use") => {
let name = block
.get("name")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.context("Anthropic tool_use block requires a non-empty name")?;
let call_id = block
.get("id")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.context("Anthropic tool_use block requires a non-empty id")?;
let arguments = serde_json::to_string(
block
.get("input")
.unwrap_or(&serde_json::Value::Object(serde_json::Map::new())),
)
.context("failed to serialize Anthropic tool_use input")?;
input_items.push(serde_json::json!({
"type": "function_call",
"call_id": call_id,
"name": name,
"arguments": arguments,
}));
}
Some("tool_result") => {
input_items.extend(runtime_proxy_translate_anthropic_tool_result(block)?);
}
_ => {}
}
}
Ok(input_items)
}
fn runtime_proxy_translate_anthropic_user_content_blocks(
blocks: &[serde_json::Value],
) -> Option<serde_json::Value> {
let mut text_blocks = Vec::new();
let mut parts = Vec::new();
let mut saw_image = false;
for block in blocks {
match block.get("type").and_then(serde_json::Value::as_str) {
Some("text") => {
let text = block
.get("text")
.and_then(serde_json::Value::as_str)
.unwrap_or_default();
if saw_image {
parts.push(serde_json::json!({
"type": "input_text",
"text": text,
}));
} else {
text_blocks.push(text.to_string());
}
}
Some("image") => {
if !saw_image {
for text in text_blocks.drain(..) {
parts.push(serde_json::json!({
"type": "input_text",
"text": text,
}));
}
}
saw_image = true;
if let Some(part) = runtime_proxy_translate_anthropic_image_part(block) {
parts.push(part);
}
}
_ => {}
}
}
if saw_image {
(!parts.is_empty()).then_some(serde_json::Value::Array(parts))
} else {
let text = text_blocks.join("\n");
(!text.is_empty()).then_some(serde_json::Value::String(text))
}
}
fn runtime_proxy_translate_anthropic_text_blocks(blocks: &[serde_json::Value]) -> String {
blocks
.iter()
.filter_map(|block| {
(block.get("type").and_then(serde_json::Value::as_str) == Some("text"))
.then(|| block.get("text").and_then(serde_json::Value::as_str))
.flatten()
})
.collect::<Vec<_>>()
.join("\n")
}
fn runtime_proxy_translate_anthropic_tool_result(
block: &serde_json::Value,
) -> Result<Vec<serde_json::Value>> {
let call_id = block
.get("tool_use_id")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.context("Anthropic tool_result block requires a non-empty tool_use_id")?;
let mut output_text = String::new();
let mut image_parts = Vec::new();
match block.get("content") {
Some(serde_json::Value::String(text)) => {
output_text = text.clone();
}
Some(serde_json::Value::Array(items)) => {
output_text = items
.iter()
.filter_map(|item| {
(item.get("type").and_then(serde_json::Value::as_str) == Some("text"))
.then(|| item.get("text").and_then(serde_json::Value::as_str))
.flatten()
})
.collect::<Vec<_>>()
.join("\n");
image_parts.extend(
items
.iter()
.filter_map(runtime_proxy_translate_anthropic_image_part),
);
}
Some(_) => {
bail!("Anthropic tool_result content must be a string or an array of content blocks")
}
None => {}
}
if block
.get("is_error")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false)
{
output_text = if output_text.is_empty() {
"Error".to_string()
} else {
format!("Error: {output_text}")
};
}
let mut translated = vec![serde_json::json!({
"type": "function_call_output",
"call_id": call_id,
"output": output_text,
})];
if !image_parts.is_empty() {
translated.push(serde_json::json!({
"role": "user",
"content": image_parts,
}));
}
Ok(translated)
}
fn translate_runtime_anthropic_messages_request(
request: &RuntimeProxyRequest,
) -> Result<RuntimeAnthropicMessagesRequest> {
let value = serde_json::from_slice::<serde_json::Value>(&request.body)
.context("failed to parse Anthropic request body as JSON")?;
let requested_model = value
.get("model")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.context("Anthropic request requires a non-empty model")?
.to_string();
let messages = value
.get("messages")
.and_then(serde_json::Value::as_array)
.context("Anthropic request requires a messages array")?;
if messages.is_empty() {
bail!("Anthropic request requires at least one message");
}
let mut input = Vec::new();
for message in messages {
let role = message
.get("role")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.context("Anthropic message is missing a non-empty role")?;
let content = message
.get("content")
.context("Anthropic message is missing content")?;
input.extend(runtime_proxy_translate_anthropic_message_content(
role, content,
)?);
}
if input.is_empty() {
input.push(serde_json::json!({
"role": "user",
"content": "",
}));
}
let mut translated_body = serde_json::Map::new();
let target_model = runtime_proxy_claude_target_model(&requested_model);
translated_body.insert(
"model".to_string(),
serde_json::Value::String(target_model.clone()),
);
translated_body.insert("input".to_string(), serde_json::Value::Array(input));
translated_body.insert(
"stream".to_string(),
serde_json::Value::Bool(
value
.get("stream")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false),
),
);
translated_body.insert("store".to_string(), serde_json::Value::Bool(false));
if let Some(instructions) = runtime_proxy_anthropic_system_instructions(&value)? {
translated_body.insert(
"instructions".to_string(),
serde_json::Value::String(instructions),
);
}
let translated_tools = runtime_proxy_translate_anthropic_tools(&value)?;
if !translated_tools.tools.is_empty() {
translated_body.insert(
"tools".to_string(),
serde_json::Value::Array(translated_tools.tools.clone()),
);
}
if translated_tools.server_tools.web_search {
translated_body.insert(
"include".to_string(),
serde_json::json!(["web_search_call.action.sources"]),
);
}
if let Some(tool_choice) =
runtime_proxy_translate_anthropic_tool_choice(&value, &translated_tools.server_tools)?
{
translated_body.insert("tool_choice".to_string(), tool_choice);
}
if let Some(effort) = runtime_proxy_anthropic_reasoning_effort(&value, &target_model) {
translated_body.insert(
"reasoning".to_string(),
serde_json::json!({
"summary": "auto",
"effort": effort,
}),
);
} else if runtime_proxy_anthropic_wants_thinking(&value) {
translated_body.insert(
"reasoning".to_string(),
serde_json::json!({
"summary": "auto",
}),
);
}
let mut translated_headers = vec![("Content-Type".to_string(), "application/json".to_string())];
if let Some(user_agent) = runtime_proxy_request_header_value(&request.headers, "User-Agent") {
translated_headers.push(("User-Agent".to_string(), user_agent.to_string()));
}
if let Some(session_id) = runtime_proxy_claude_session_id(request) {
translated_headers.push(("session_id".to_string(), session_id));
}
translated_headers.push((
PRODEX_INTERNAL_REQUEST_ORIGIN_HEADER.to_string(),
PRODEX_INTERNAL_REQUEST_ORIGIN_ANTHROPIC_MESSAGES.to_string(),
));
Ok(RuntimeAnthropicMessagesRequest {
translated_request: RuntimeProxyRequest {
method: request.method.clone(),
path_and_query: format!("{RUNTIME_PROXY_OPENAI_UPSTREAM_PATH}/responses"),
headers: translated_headers,
body: serde_json::to_vec(&serde_json::Value::Object(translated_body))
.context("failed to serialize translated Anthropic request")?,
},
requested_model,
stream: value
.get("stream")
.and_then(serde_json::Value::as_bool)
.unwrap_or(false),
want_thinking: runtime_proxy_anthropic_wants_thinking(&value),
server_tools: translated_tools.server_tools,
})
}
fn runtime_request_previous_response_id(request: &RuntimeProxyRequest) -> Option<String> {
runtime_request_previous_response_id_from_bytes(&request.body)
}
#[derive(Clone, Default)]
struct RuntimeWebsocketRequestMetadata {
previous_response_id: Option<String>,
session_id: Option<String>,
requires_previous_response_affinity: bool,
}
fn parse_runtime_websocket_request_metadata(request_text: &str) -> RuntimeWebsocketRequestMetadata {
let Ok(value) = serde_json::from_str::<serde_json::Value>(request_text) else {
return RuntimeWebsocketRequestMetadata::default();
};
RuntimeWebsocketRequestMetadata {
previous_response_id: runtime_request_previous_response_id_from_value(&value),
session_id: runtime_request_session_id_from_value(&value),
requires_previous_response_affinity:
runtime_request_value_requires_previous_response_affinity(&value),
}
}
fn runtime_request_previous_response_id_from_bytes(body: &[u8]) -> Option<String> {
if body.is_empty() {
return None;
}
let value = serde_json::from_slice::<serde_json::Value>(body).ok()?;
runtime_request_previous_response_id_from_value(&value)
}
#[cfg(test)]
#[cfg(test)]
#[cfg(test)]
fn runtime_request_previous_response_id_from_text(request_text: &str) -> Option<String> {
let value = serde_json::from_str::<serde_json::Value>(request_text).ok()?;
runtime_request_previous_response_id_from_value(&value)
}
fn runtime_request_previous_response_id_from_value(value: &serde_json::Value) -> Option<String> {
value
.get("previous_response_id")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.map(str::to_string)
}
fn runtime_request_without_previous_response_id(
request: &RuntimeProxyRequest,
) -> Option<RuntimeProxyRequest> {
let mut value = serde_json::from_slice::<serde_json::Value>(&request.body).ok()?;
let object = value.as_object_mut()?;
let removed = object.remove("previous_response_id")?;
if removed.as_str().map(str::trim).is_none_or(str::is_empty) {
return None;
}
let body = serde_json::to_vec(&value).ok()?;
Some(RuntimeProxyRequest {
method: request.method.clone(),
path_and_query: request.path_and_query.clone(),
headers: request.headers.clone(),
body,
})
}
fn runtime_request_without_turn_state_header(request: &RuntimeProxyRequest) -> RuntimeProxyRequest {
RuntimeProxyRequest {
method: request.method.clone(),
path_and_query: request.path_and_query.clone(),
headers: request
.headers
.iter()
.filter(|(name, _)| !name.eq_ignore_ascii_case("x-codex-turn-state"))
.cloned()
.collect(),
body: request.body.clone(),
}
}
fn runtime_request_without_previous_response_affinity(
request: &RuntimeProxyRequest,
) -> Option<RuntimeProxyRequest> {
let request = runtime_request_without_previous_response_id(request)?;
Some(runtime_request_without_turn_state_header(&request))
}
fn runtime_request_value_requires_previous_response_affinity(value: &serde_json::Value) -> bool {
value
.get("input")
.and_then(serde_json::Value::as_array)
.is_some_and(|items| {
items.iter().any(|item| {
let Some(object) = item.as_object() else {
return false;
};
let item_type = object
.get("type")
.and_then(serde_json::Value::as_str)
.unwrap_or_default();
let has_call_id = object
.get("call_id")
.and_then(serde_json::Value::as_str)
.is_some_and(|call_id| !call_id.trim().is_empty());
has_call_id && item_type.ends_with("_call_output")
})
})
}
fn runtime_request_requires_previous_response_affinity(request: &RuntimeProxyRequest) -> bool {
serde_json::from_slice::<serde_json::Value>(&request.body)
.map(|value| runtime_request_value_requires_previous_response_affinity(&value))
.unwrap_or(false)
}
fn runtime_request_text_without_previous_response_id(request_text: &str) -> Option<String> {
let mut value = serde_json::from_str::<serde_json::Value>(request_text).ok()?;
let object = value.as_object_mut()?;
let removed = object.remove("previous_response_id")?;
if removed.as_str().map(str::trim).is_none_or(str::is_empty) {
return None;
}
serde_json::to_string(&value).ok()
}
fn runtime_request_turn_state(request: &RuntimeProxyRequest) -> Option<String> {
request.headers.iter().find_map(|(name, value)| {
name.eq_ignore_ascii_case("x-codex-turn-state")
.then(|| value.trim().to_string())
.filter(|value| !value.is_empty())
})
}
fn runtime_request_session_id_from_value(value: &serde_json::Value) -> Option<String> {
value
.get("session_id")
.and_then(serde_json::Value::as_str)
.or_else(|| {
value
.get("client_metadata")
.and_then(|metadata| metadata.get("session_id"))
.and_then(serde_json::Value::as_str)
})
.map(str::trim)
.filter(|value| !value.is_empty())
.map(str::to_string)
}
fn runtime_request_session_id_from_turn_metadata(request: &RuntimeProxyRequest) -> Option<String> {
request
.headers
.iter()
.find_map(|(name, value)| {
name.eq_ignore_ascii_case("x-codex-turn-metadata")
.then(|| value.as_str())
})
.and_then(|value| serde_json::from_str::<serde_json::Value>(value).ok())
.and_then(|value| runtime_request_session_id_from_value(&value))
}
fn runtime_request_session_id(request: &RuntimeProxyRequest) -> Option<String> {
request
.headers
.iter()
.find_map(|(name, value)| {
name.eq_ignore_ascii_case("session_id")
.then(|| value.trim().to_string())
.filter(|value| !value.is_empty())
})
.or_else(|| runtime_request_session_id_from_turn_metadata(request))
.or_else(|| {
serde_json::from_slice::<serde_json::Value>(&request.body)
.ok()
.and_then(|value| runtime_request_session_id_from_value(&value))
})
}
fn runtime_binding_touch_should_persist(bound_at: i64, now: i64) -> bool {
// These timestamps are stored with second precision. Require strictly more
// than the interval so a boundary-crossing lookup does not persist nearly a
// second early.
now.saturating_sub(bound_at) > RUNTIME_BINDING_TOUCH_PERSIST_INTERVAL_SECONDS
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RuntimeContinuationBindingKind {
Response,
TurnState,
SessionId,
}
fn runtime_continuation_status_map(
statuses: &RuntimeContinuationStatuses,
kind: RuntimeContinuationBindingKind,
) -> &BTreeMap<String, RuntimeContinuationBindingStatus> {
match kind {
RuntimeContinuationBindingKind::Response => &statuses.response,
RuntimeContinuationBindingKind::TurnState => &statuses.turn_state,
RuntimeContinuationBindingKind::SessionId => &statuses.session_id,
}
}
fn runtime_continuation_status_map_mut(
statuses: &mut RuntimeContinuationStatuses,
kind: RuntimeContinuationBindingKind,
) -> &mut BTreeMap<String, RuntimeContinuationBindingStatus> {
match kind {
RuntimeContinuationBindingKind::Response => &mut statuses.response,
RuntimeContinuationBindingKind::TurnState => &mut statuses.turn_state,
RuntimeContinuationBindingKind::SessionId => &mut statuses.session_id,
}
}
fn runtime_continuation_next_event_at(status: &RuntimeContinuationBindingStatus, now: i64) -> i64 {
runtime_continuation_status_last_event_at(status)
.filter(|last| *last >= now)
.map_or(now, |last| last.saturating_add(1))
}
fn runtime_continuation_status_touches(
status: &mut RuntimeContinuationBindingStatus,
now: i64,
) -> bool {
let previous = status.clone();
let event_at = runtime_continuation_next_event_at(&previous, now);
status.last_touched_at = Some(event_at);
if status.state == RuntimeContinuationBindingLifecycle::Suspect {
if status.last_not_found_at.is_some_and(|last| {
event_at.saturating_sub(last) >= RUNTIME_CONTINUATION_SUSPECT_GRACE_SECONDS
}) {
status.state = RuntimeContinuationBindingLifecycle::Warm;
status.not_found_streak = 0;
status.last_not_found_at = None;
}
status.confidence = status
.confidence
.saturating_add(RUNTIME_CONTINUATION_TOUCH_CONFIDENCE_BONUS)
.min(RUNTIME_CONTINUATION_CONFIDENCE_MAX);
} else if status.state != RuntimeContinuationBindingLifecycle::Dead {
status.confidence = status
.confidence
.saturating_add(RUNTIME_CONTINUATION_TOUCH_CONFIDENCE_BONUS)
.min(RUNTIME_CONTINUATION_CONFIDENCE_MAX);
}
*status != previous
}
fn runtime_mark_continuation_status_touched(
statuses: &mut RuntimeContinuationStatuses,
kind: RuntimeContinuationBindingKind,
key: &str,
now: i64,
) -> bool {
let status = runtime_continuation_status_map_mut(statuses, kind)
.entry(key.to_string())
.or_default();
runtime_continuation_status_touches(status, now)
}
fn runtime_continuation_status_should_refresh_verified(
statuses: &RuntimeContinuationStatuses,
kind: RuntimeContinuationBindingKind,
key: &str,
now: i64,
verified_route: Option<RuntimeRouteKind>,
) -> bool {
let Some(status) = runtime_continuation_status_map(statuses, kind).get(key) else {
return true;
};
if status.state != RuntimeContinuationBindingLifecycle::Verified {
return true;
}
let verified_route_label = verified_route.map(runtime_route_kind_label);
if status.last_verified_route.as_deref() != verified_route_label {
return true;
}
status
.last_verified_at
.is_none_or(|last_verified_at| runtime_binding_touch_should_persist(last_verified_at, now))
}
fn runtime_continuation_status_should_persist_touch(
statuses: &RuntimeContinuationStatuses,
kind: RuntimeContinuationBindingKind,
key: &str,
now: i64,
) -> bool {
let Some(status) = runtime_continuation_status_map(statuses, kind).get(key) else {
return true;
};
if status.state == RuntimeContinuationBindingLifecycle::Suspect
&& status.last_not_found_at.is_some_and(|last_not_found_at| {
now.saturating_sub(last_not_found_at) >= RUNTIME_CONTINUATION_SUSPECT_GRACE_SECONDS
})
{
return true;
}
status
.last_touched_at
.is_none_or(|last_touched_at| runtime_binding_touch_should_persist(last_touched_at, now))
}
fn runtime_mark_continuation_status_verified(
statuses: &mut RuntimeContinuationStatuses,
kind: RuntimeContinuationBindingKind,
key: &str,
now: i64,
verified_route: Option<RuntimeRouteKind>,
) -> bool {
let status = runtime_continuation_status_map_mut(statuses, kind)
.entry(key.to_string())
.or_default();
let previous = status.clone();
let event_at = runtime_continuation_next_event_at(&previous, now);
status.state = RuntimeContinuationBindingLifecycle::Verified;
status.last_touched_at = Some(event_at);
status.last_verified_at = Some(event_at);
status.last_verified_route =
verified_route.map(|route_kind| runtime_route_kind_label(route_kind).to_string());
status.last_not_found_at = None;
status.not_found_streak = 0;
status.success_count = status.success_count.saturating_add(1);
status.failure_count = 0;
status.confidence = status
.confidence
.saturating_add(RUNTIME_CONTINUATION_VERIFIED_CONFIDENCE_BONUS)
.min(RUNTIME_CONTINUATION_CONFIDENCE_MAX);
*status != previous
}
fn runtime_mark_continuation_status_suspect(
statuses: &mut RuntimeContinuationStatuses,
kind: RuntimeContinuationBindingKind,
key: &str,
now: i64,
) -> bool {
let status = runtime_continuation_status_map_mut(statuses, kind)
.entry(key.to_string())
.or_default();
let previous = status.clone();
let event_at = runtime_continuation_next_event_at(&previous, now);
status.not_found_streak = status.not_found_streak.saturating_add(1);
status.last_touched_at = Some(event_at);
status.last_not_found_at = Some(event_at);
status.failure_count = status.failure_count.saturating_add(1);
let previous_confidence = status.confidence;
status.confidence = status
.confidence
.saturating_sub(RUNTIME_CONTINUATION_SUSPECT_CONFIDENCE_PENALTY);
if previous_confidence == 0 {
status.confidence = 1;
}
status.state = if status.not_found_streak >= RUNTIME_CONTINUATION_SUSPECT_NOT_FOUND_STREAK_LIMIT
|| (previous_confidence > 0 && status.confidence == 0)
{
RuntimeContinuationBindingLifecycle::Dead
} else {
RuntimeContinuationBindingLifecycle::Suspect
};
*status != previous
}
fn runtime_mark_continuation_status_dead(
statuses: &mut RuntimeContinuationStatuses,
kind: RuntimeContinuationBindingKind,
key: &str,
now: i64,
) -> bool {
let status = runtime_continuation_status_map_mut(statuses, kind)
.entry(key.to_string())
.or_default();
let previous = status.clone();
let event_at = runtime_continuation_next_event_at(&previous, now);
status.state = RuntimeContinuationBindingLifecycle::Dead;
status.confidence = 0;
status.last_touched_at = Some(event_at);
status.last_not_found_at = Some(event_at);
status.not_found_streak = status
.not_found_streak
.max(RUNTIME_CONTINUATION_SUSPECT_NOT_FOUND_STREAK_LIMIT);
status.failure_count = status.failure_count.saturating_add(1);
*status != previous
}
fn runtime_continuation_status_recently_suspect(
statuses: &RuntimeContinuationStatuses,
kind: RuntimeContinuationBindingKind,
key: &str,
now: i64,
) -> bool {
runtime_continuation_status_map(statuses, kind)
.get(key)
.is_some_and(|status| {
status.state == RuntimeContinuationBindingLifecycle::Suspect
&& !runtime_continuation_status_is_terminal(status)
&& status.last_not_found_at.is_some_and(|last| {
now.saturating_sub(last) < RUNTIME_CONTINUATION_SUSPECT_GRACE_SECONDS
})
})
}
fn runtime_continuation_status_label(status: &RuntimeContinuationBindingStatus) -> &'static str {
match status.state {
RuntimeContinuationBindingLifecycle::Warm => "warm",
RuntimeContinuationBindingLifecycle::Verified => "verified",
RuntimeContinuationBindingLifecycle::Suspect => "suspect",
RuntimeContinuationBindingLifecycle::Dead => "dead",
}
}
fn runtime_compact_session_lineage_key(session_id: &str) -> String {
format!("{RUNTIME_COMPACT_SESSION_LINEAGE_PREFIX}{session_id}")
}
fn runtime_compact_turn_state_lineage_key(turn_state: &str) -> String {
format!("{RUNTIME_COMPACT_TURN_STATE_LINEAGE_PREFIX}{turn_state}")
}
fn runtime_is_compact_session_lineage_key(key: &str) -> bool {
key.starts_with(RUNTIME_COMPACT_SESSION_LINEAGE_PREFIX)
}
fn runtime_external_session_id_bindings(
bindings: &BTreeMap<String, ResponseProfileBinding>,
) -> BTreeMap<String, ResponseProfileBinding> {
bindings
.iter()
.filter(|(key, _)| !runtime_is_compact_session_lineage_key(key))
.map(|(key, binding)| (key.clone(), binding.clone()))
.collect()
}
fn runtime_touch_compact_lineage_binding(
shared: &RuntimeRotationProxyShared,
runtime: &mut RuntimeRotationState,
key: &str,
reason: &str,
session_binding: bool,
) -> Option<String> {
let now = Local::now().timestamp();
let status_kind = if session_binding {
RuntimeContinuationBindingKind::SessionId
} else {
RuntimeContinuationBindingKind::TurnState
};
if runtime_age_stale_verified_continuation_status(
&mut runtime.continuation_statuses,
status_kind,
key,
now,
) {
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route=compact affinity={} profile=- reason=continuation_stale key={key}",
if session_binding {
"compact_session"
} else {
"compact_turn_state"
}
),
);
schedule_runtime_binding_touch_save(shared, runtime, &format!("continuation_stale:{key}"));
return None;
}
if runtime_continuation_status_recently_suspect(
&runtime.continuation_statuses,
status_kind,
key,
now,
) {
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route=compact affinity={} profile=- reason=continuation_recent_suspect key={key}",
if session_binding {
"compact_session"
} else {
"compact_turn_state"
}
),
);
return None;
}
if runtime_continuation_status_map(&runtime.continuation_statuses, status_kind)
.get(key)
.is_some_and(runtime_continuation_status_is_terminal)
{
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route=compact affinity={} profile=- reason=continuation_dead key={key}",
if session_binding {
"compact_session"
} else {
"compact_turn_state"
}
),
);
return None;
}
let bindings = if session_binding {
&mut runtime.session_id_bindings
} else {
&mut runtime.turn_state_bindings
};
let profile_name = bindings
.get(key)
.map(|binding| binding.profile_name.clone())
.filter(|profile_name| runtime.state.profiles.contains_key(profile_name));
let mut persist_touch = false;
if let Some(profile_name) = profile_name.as_deref()
&& let Some(binding) = bindings.get_mut(key)
&& binding.profile_name == profile_name
{
if runtime_binding_touch_should_persist(binding.bound_at, now) {
persist_touch = true;
}
if binding.bound_at < now {
binding.bound_at = now;
}
persist_touch = runtime_continuation_status_should_persist_touch(
&runtime.continuation_statuses,
status_kind,
key,
now,
) || persist_touch;
let _ = runtime_mark_continuation_status_touched(
&mut runtime.continuation_statuses,
status_kind,
key,
now,
);
}
if persist_touch {
schedule_runtime_binding_touch_save(shared, runtime, reason);
}
profile_name
}
fn runtime_compact_followup_bound_profile(
shared: &RuntimeRotationProxyShared,
turn_state: Option<&str>,
session_id: Option<&str>,
) -> Result<Option<(String, &'static str)>> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
if let Some(turn_state) = turn_state.map(str::trim).filter(|value| !value.is_empty()) {
let key = runtime_compact_turn_state_lineage_key(turn_state);
if let Some(profile_name) = runtime_touch_compact_lineage_binding(
shared,
&mut runtime,
&key,
&format!("compact_turn_state_touch:{turn_state}"),
false,
) {
return Ok(Some((profile_name, "turn_state")));
}
}
if let Some(session_id) = session_id.map(str::trim).filter(|value| !value.is_empty()) {
let key = runtime_compact_session_lineage_key(session_id);
if let Some(profile_name) = runtime_touch_compact_lineage_binding(
shared,
&mut runtime,
&key,
&format!("compact_session_touch:{session_id}"),
true,
) {
return Ok(Some((profile_name, "session_id")));
}
}
Ok(None)
}
fn runtime_previous_response_negative_cache_key(
previous_response_id: &str,
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> String {
format!(
"__previous_response_not_found__:{}:{}:{profile_name}",
runtime_route_kind_label(route_kind),
previous_response_id
)
}
fn runtime_previous_response_negative_cache_failures(
profile_health: &BTreeMap<String, RuntimeProfileHealth>,
previous_response_id: &str,
profile_name: &str,
route_kind: RuntimeRouteKind,
now: i64,
) -> u32 {
runtime_profile_effective_score_from_map(
profile_health,
&runtime_previous_response_negative_cache_key(
previous_response_id,
profile_name,
route_kind,
),
now,
RUNTIME_PREVIOUS_RESPONSE_NEGATIVE_CACHE_SECONDS,
)
}
fn runtime_previous_response_negative_cache_active(
profile_health: &BTreeMap<String, RuntimeProfileHealth>,
previous_response_id: &str,
profile_name: &str,
route_kind: RuntimeRouteKind,
now: i64,
) -> bool {
runtime_previous_response_negative_cache_failures(
profile_health,
previous_response_id,
profile_name,
route_kind,
now,
) > 0
}
fn clear_runtime_previous_response_negative_cache(
runtime: &mut RuntimeRotationState,
previous_response_id: &str,
profile_name: &str,
) -> bool {
let mut changed = false;
for route_kind in [
RuntimeRouteKind::Responses,
RuntimeRouteKind::Websocket,
RuntimeRouteKind::Compact,
RuntimeRouteKind::Standard,
] {
changed = runtime
.profile_health
.remove(&runtime_previous_response_negative_cache_key(
previous_response_id,
profile_name,
route_kind,
))
.is_some()
|| changed;
}
changed
}
fn note_runtime_previous_response_not_found(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
previous_response_id: Option<&str>,
route_kind: RuntimeRouteKind,
) -> Result<u32> {
let Some(previous_response_id) = previous_response_id
.map(str::trim)
.filter(|value| !value.is_empty())
else {
return Ok(0);
};
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
let key = runtime_previous_response_negative_cache_key(
previous_response_id,
profile_name,
route_kind,
);
let next_failures = runtime_profile_effective_score_from_map(
&runtime.profile_health,
&key,
now,
RUNTIME_PREVIOUS_RESPONSE_NEGATIVE_CACHE_SECONDS,
)
.saturating_add(1)
.min(RUNTIME_PROFILE_HEALTH_MAX_SCORE);
runtime.profile_health.insert(
key,
RuntimeProfileHealth {
score: next_failures,
updated_at: now,
},
);
let _ = runtime_mark_continuation_status_suspect(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::Response,
previous_response_id,
now,
);
runtime_proxy_log(
shared,
format!(
"previous_response_negative_cache profile={profile_name} route={} response_id={} failures={next_failures}",
runtime_route_kind_label(route_kind),
previous_response_id,
),
);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!(
"previous_response_negative_cache:{profile_name}:{}",
runtime_route_kind_label(route_kind)
),
);
drop(runtime);
if next_failures >= RUNTIME_PREVIOUS_RESPONSE_NEGATIVE_CACHE_FAILURE_THRESHOLD {
let _ = bump_runtime_profile_bad_pairing_score(
shared,
profile_name,
route_kind,
1,
"previous_response_not_found",
);
}
Ok(next_failures)
}
fn schedule_runtime_binding_touch_save(
shared: &RuntimeRotationProxyShared,
runtime: &RuntimeRotationState,
reason: &str,
) {
schedule_runtime_state_save_from_runtime(shared, runtime, reason);
}
fn runtime_response_bound_profile(
shared: &RuntimeRotationProxyShared,
previous_response_id: &str,
route_kind: RuntimeRouteKind,
) -> Result<Option<String>> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
let profile_name = runtime
.state
.response_profile_bindings
.get(previous_response_id)
.map(|binding| binding.profile_name.clone())
.filter(|profile_name| runtime.state.profiles.contains_key(profile_name));
if runtime_age_stale_verified_continuation_status(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::Response,
previous_response_id,
now,
) {
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route={} affinity=previous_response profile={} reason=continuation_stale response_id={previous_response_id}",
runtime_route_kind_label(route_kind),
profile_name.as_deref().unwrap_or("-"),
),
);
schedule_runtime_binding_touch_save(
shared,
&runtime,
&format!("continuation_stale:{previous_response_id}"),
);
return Ok(None);
}
if runtime_continuation_status_map(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::Response,
)
.get(previous_response_id)
.is_some_and(runtime_continuation_status_is_terminal)
{
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route={} affinity=previous_response profile=- reason=continuation_dead response_id={previous_response_id}",
runtime_route_kind_label(route_kind),
),
);
return Ok(None);
}
if runtime_continuation_status_recently_suspect(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::Response,
previous_response_id,
now,
) {
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route={} affinity=previous_response profile=- reason=continuation_suspect response_id={previous_response_id}",
runtime_route_kind_label(route_kind),
),
);
return Ok(None);
}
if let Some(profile_name) = profile_name.as_deref()
&& runtime_previous_response_negative_cache_active(
&runtime.profile_health,
previous_response_id,
profile_name,
route_kind,
now,
)
{
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route={} affinity=previous_response profile={} reason=negative_cache response_id={}",
runtime_route_kind_label(route_kind),
profile_name,
previous_response_id,
),
);
return Ok(None);
}
let mut persist_touch = false;
if let Some(profile_name) = profile_name.as_deref()
&& let Some(binding) = runtime
.state
.response_profile_bindings
.get_mut(previous_response_id)
&& binding.profile_name == profile_name
{
if runtime_binding_touch_should_persist(binding.bound_at, now) {
persist_touch = true;
}
if binding.bound_at < now {
binding.bound_at = now;
}
persist_touch = runtime_continuation_status_should_persist_touch(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::Response,
previous_response_id,
now,
) || persist_touch;
let _ = runtime_mark_continuation_status_touched(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::Response,
previous_response_id,
now,
);
}
if persist_touch {
schedule_runtime_binding_touch_save(
shared,
&runtime,
&format!("response_touch:{previous_response_id}"),
);
}
Ok(profile_name)
}
fn runtime_turn_state_bound_profile(
shared: &RuntimeRotationProxyShared,
turn_state: &str,
) -> Result<Option<String>> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
let profile_name = runtime
.turn_state_bindings
.get(turn_state)
.map(|binding| binding.profile_name.clone())
.filter(|profile_name| runtime.state.profiles.contains_key(profile_name));
if runtime_age_stale_verified_continuation_status(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::TurnState,
turn_state,
now,
) {
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route=responses affinity=turn_state profile={} reason=continuation_stale turn_state={turn_state}",
profile_name.as_deref().unwrap_or("-"),
),
);
schedule_runtime_binding_touch_save(
shared,
&runtime,
&format!("continuation_stale:{turn_state}"),
);
return Ok(None);
}
if runtime_continuation_status_map(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::TurnState,
)
.get(turn_state)
.is_some_and(runtime_continuation_status_is_terminal)
{
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route=responses affinity=turn_state profile=- reason=continuation_dead turn_state={turn_state}",
),
);
return Ok(None);
}
if runtime_continuation_status_recently_suspect(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::TurnState,
turn_state,
now,
) {
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route=responses affinity=turn_state profile=- reason=continuation_suspect turn_state={turn_state}",
),
);
return Ok(None);
}
let mut persist_touch = false;
if let Some(profile_name) = profile_name.as_deref()
&& let Some(binding) = runtime.turn_state_bindings.get_mut(turn_state)
&& binding.profile_name == profile_name
{
if runtime_binding_touch_should_persist(binding.bound_at, now) {
persist_touch = true;
}
if binding.bound_at < now {
binding.bound_at = now;
}
persist_touch = runtime_continuation_status_should_persist_touch(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::TurnState,
turn_state,
now,
) || persist_touch;
let _ = runtime_mark_continuation_status_touched(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::TurnState,
turn_state,
now,
);
}
if persist_touch {
schedule_runtime_binding_touch_save(
shared,
&runtime,
&format!("turn_state_touch:{turn_state}"),
);
}
Ok(profile_name)
}
fn runtime_session_bound_profile(
shared: &RuntimeRotationProxyShared,
session_id: &str,
) -> Result<Option<String>> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
let profile_name = runtime
.session_id_bindings
.get(session_id)
.map(|binding| binding.profile_name.clone())
.filter(|profile_name| runtime.state.profiles.contains_key(profile_name));
if runtime_age_stale_verified_continuation_status(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::SessionId,
session_id,
now,
) {
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route=compact affinity=session_id profile={} reason=continuation_stale session_id={session_id}",
profile_name.as_deref().unwrap_or("-"),
),
);
schedule_runtime_binding_touch_save(
shared,
&runtime,
&format!("continuation_stale:{session_id}"),
);
return Ok(None);
}
if runtime_continuation_status_map(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::SessionId,
)
.get(session_id)
.is_some_and(runtime_continuation_status_is_terminal)
{
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route=compact affinity=session_id profile=- reason=continuation_dead session_id={session_id}",
),
);
return Ok(None);
}
if runtime_continuation_status_recently_suspect(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::SessionId,
session_id,
now,
) {
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route=compact affinity=session_id profile=- reason=continuation_suspect session_id={session_id}",
),
);
return Ok(None);
}
let mut persist_touch = false;
if let Some(profile_name) = profile_name.as_deref() {
if let Some(binding) = runtime.session_id_bindings.get_mut(session_id)
&& binding.profile_name == profile_name
{
if runtime_binding_touch_should_persist(binding.bound_at, now) {
persist_touch = true;
}
if binding.bound_at < now {
binding.bound_at = now;
}
}
if let Some(binding) = runtime.state.session_profile_bindings.get_mut(session_id)
&& binding.profile_name == profile_name
{
if runtime_binding_touch_should_persist(binding.bound_at, now) {
persist_touch = true;
}
if binding.bound_at < now {
binding.bound_at = now;
}
}
persist_touch = runtime_continuation_status_should_persist_touch(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::SessionId,
session_id,
now,
) || persist_touch;
let _ = runtime_mark_continuation_status_touched(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::SessionId,
session_id,
now,
);
}
if persist_touch {
schedule_runtime_binding_touch_save(
shared,
&runtime,
&format!("session_touch:{session_id}"),
);
}
Ok(profile_name)
}
fn remember_runtime_turn_state(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
turn_state: Option<&str>,
verified_route: RuntimeRouteKind,
) -> Result<()> {
let Some(turn_state) = turn_state.map(str::trim).filter(|value| !value.is_empty()) else {
return Ok(());
};
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let bound_at = Local::now().timestamp();
let mut changed = false;
let should_refresh_binding = match runtime.turn_state_bindings.get_mut(turn_state) {
Some(binding) if binding.profile_name == profile_name => {
if binding.bound_at < bound_at {
binding.bound_at = bound_at;
}
false
}
Some(binding) => {
binding.profile_name = profile_name.to_string();
binding.bound_at = bound_at;
changed = true;
true
}
None => {
runtime.turn_state_bindings.insert(
turn_state.to_string(),
ResponseProfileBinding {
profile_name: profile_name.to_string(),
bound_at,
},
);
changed = true;
true
}
};
if should_refresh_binding
|| runtime_continuation_status_should_refresh_verified(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::TurnState,
turn_state,
bound_at,
Some(verified_route),
)
{
changed = runtime_mark_continuation_status_verified(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::TurnState,
turn_state,
bound_at,
Some(verified_route),
) || changed;
}
if changed {
prune_profile_bindings(
&mut runtime.turn_state_bindings,
TURN_STATE_PROFILE_BINDING_LIMIT,
);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("turn_state:{profile_name}"),
);
drop(runtime);
runtime_proxy_log(
shared,
format!("binding turn_state profile={profile_name} value={turn_state}"),
);
} else {
drop(runtime);
}
Ok(())
}
fn remember_runtime_session_id(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
session_id: Option<&str>,
verified_route: RuntimeRouteKind,
) -> Result<()> {
let Some(session_id) = session_id.map(str::trim).filter(|value| !value.is_empty()) else {
return Ok(());
};
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let bound_at = Local::now().timestamp();
let mut changed = false;
let mut should_refresh_binding = false;
match runtime.session_id_bindings.get_mut(session_id) {
Some(binding) if binding.profile_name == profile_name => {
if binding.bound_at < bound_at {
binding.bound_at = bound_at;
}
}
Some(binding) => {
binding.profile_name = profile_name.to_string();
binding.bound_at = bound_at;
changed = true;
should_refresh_binding = true;
}
None => {
runtime.session_id_bindings.insert(
session_id.to_string(),
ResponseProfileBinding {
profile_name: profile_name.to_string(),
bound_at,
},
);
changed = true;
should_refresh_binding = true;
}
}
match runtime.state.session_profile_bindings.get_mut(session_id) {
Some(binding) if binding.profile_name == profile_name => {
if binding.bound_at < bound_at {
binding.bound_at = bound_at;
}
}
Some(binding) => {
binding.profile_name = profile_name.to_string();
binding.bound_at = bound_at;
changed = true;
should_refresh_binding = true;
}
None => {
runtime.state.session_profile_bindings.insert(
session_id.to_string(),
ResponseProfileBinding {
profile_name: profile_name.to_string(),
bound_at,
},
);
changed = true;
should_refresh_binding = true;
}
}
if should_refresh_binding
|| runtime_continuation_status_should_refresh_verified(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::SessionId,
session_id,
bound_at,
Some(verified_route),
)
{
changed = runtime_mark_continuation_status_verified(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::SessionId,
session_id,
bound_at,
Some(verified_route),
) || changed;
}
if changed {
prune_profile_bindings(
&mut runtime.session_id_bindings,
SESSION_ID_PROFILE_BINDING_LIMIT,
);
prune_profile_bindings(
&mut runtime.state.session_profile_bindings,
SESSION_ID_PROFILE_BINDING_LIMIT,
);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("session_id:{profile_name}"),
);
drop(runtime);
runtime_proxy_log(
shared,
format!("binding session_id profile={profile_name} value={session_id}"),
);
} else {
drop(runtime);
}
Ok(())
}
fn remember_runtime_compact_lineage(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
session_id: Option<&str>,
turn_state: Option<&str>,
verified_route: RuntimeRouteKind,
) -> Result<()> {
let session_id = session_id.map(str::trim).filter(|value| !value.is_empty());
let turn_state = turn_state.map(str::trim).filter(|value| !value.is_empty());
if session_id.is_none() && turn_state.is_none() {
return Ok(());
}
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let bound_at = Local::now().timestamp();
let mut changed = false;
if let Some(session_id) = session_id {
let key = runtime_compact_session_lineage_key(session_id);
let should_refresh_binding = match runtime.session_id_bindings.get_mut(&key) {
Some(binding) if binding.profile_name == profile_name => {
if binding.bound_at < bound_at {
binding.bound_at = bound_at;
}
false
}
Some(binding) => {
binding.profile_name = profile_name.to_string();
binding.bound_at = bound_at;
changed = true;
true
}
None => {
runtime.session_id_bindings.insert(
key.clone(),
ResponseProfileBinding {
profile_name: profile_name.to_string(),
bound_at,
},
);
changed = true;
true
}
};
if should_refresh_binding
|| runtime_continuation_status_should_refresh_verified(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::SessionId,
&key,
bound_at,
Some(verified_route),
)
{
changed = runtime_mark_continuation_status_verified(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::SessionId,
&key,
bound_at,
Some(verified_route),
) || changed;
}
}
if let Some(turn_state) = turn_state {
let key = runtime_compact_turn_state_lineage_key(turn_state);
let should_refresh_binding = match runtime.turn_state_bindings.get_mut(&key) {
Some(binding) if binding.profile_name == profile_name => {
if binding.bound_at < bound_at {
binding.bound_at = bound_at;
}
false
}
Some(binding) => {
binding.profile_name = profile_name.to_string();
binding.bound_at = bound_at;
changed = true;
true
}
None => {
runtime.turn_state_bindings.insert(
key.clone(),
ResponseProfileBinding {
profile_name: profile_name.to_string(),
bound_at,
},
);
changed = true;
true
}
};
if should_refresh_binding
|| runtime_continuation_status_should_refresh_verified(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::TurnState,
&key,
bound_at,
Some(verified_route),
)
{
changed = runtime_mark_continuation_status_verified(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::TurnState,
&key,
bound_at,
Some(verified_route),
) || changed;
}
}
if changed {
prune_profile_bindings(
&mut runtime.turn_state_bindings,
TURN_STATE_PROFILE_BINDING_LIMIT,
);
prune_profile_bindings(
&mut runtime.session_id_bindings,
SESSION_ID_PROFILE_BINDING_LIMIT,
);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("compact_lineage:{profile_name}"),
);
drop(runtime);
} else {
drop(runtime);
}
Ok(())
}
fn release_runtime_compact_lineage(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
session_id: Option<&str>,
turn_state: Option<&str>,
reason: &str,
) -> Result<bool> {
let session_id = session_id.map(str::trim).filter(|value| !value.is_empty());
let turn_state = turn_state.map(str::trim).filter(|value| !value.is_empty());
if session_id.is_none() && turn_state.is_none() {
return Ok(false);
}
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let mut changed = false;
let now = Local::now().timestamp();
if let Some(session_id) = session_id {
let key = runtime_compact_session_lineage_key(session_id);
if runtime
.session_id_bindings
.get(&key)
.is_some_and(|binding| binding.profile_name == profile_name)
{
runtime.session_id_bindings.remove(&key);
let _ = runtime_mark_continuation_status_dead(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::SessionId,
&key,
now,
);
changed = true;
}
}
if let Some(turn_state) = turn_state {
let key = runtime_compact_turn_state_lineage_key(turn_state);
if runtime
.turn_state_bindings
.get(&key)
.is_some_and(|binding| binding.profile_name == profile_name)
{
runtime.turn_state_bindings.remove(&key);
let _ = runtime_mark_continuation_status_dead(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::TurnState,
&key,
now,
);
changed = true;
}
}
if changed {
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("compact_lineage_release:{profile_name}"),
);
drop(runtime);
runtime_proxy_log(
shared,
format!(
"compact_lineage_released profile={profile_name} reason={reason} session={} turn_state={}",
session_id.unwrap_or("-"),
turn_state.unwrap_or("-"),
),
);
} else {
drop(runtime);
}
Ok(changed)
}
fn remember_runtime_response_ids(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
response_ids: &[String],
verified_route: RuntimeRouteKind,
) -> Result<()> {
if response_ids.is_empty() {
return Ok(());
}
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let bound_at = Local::now().timestamp();
let mut changed = false;
for response_id in response_ids {
changed =
clear_runtime_previous_response_negative_cache(&mut runtime, response_id, profile_name)
|| changed;
let should_refresh_binding =
match runtime.state.response_profile_bindings.get_mut(response_id) {
Some(binding) if binding.profile_name == profile_name => {
if binding.bound_at < bound_at {
binding.bound_at = bound_at;
}
false
}
Some(binding) => {
binding.profile_name = profile_name.to_string();
binding.bound_at = bound_at;
changed = true;
true
}
None => {
runtime.state.response_profile_bindings.insert(
response_id.clone(),
ResponseProfileBinding {
profile_name: profile_name.to_string(),
bound_at,
},
);
changed = true;
true
}
};
if should_refresh_binding
|| runtime_continuation_status_should_refresh_verified(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::Response,
response_id,
bound_at,
Some(verified_route),
)
{
changed = runtime_mark_continuation_status_verified(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::Response,
response_id,
bound_at,
Some(verified_route),
) || changed;
}
}
if changed {
prune_profile_bindings(
&mut runtime.state.response_profile_bindings,
RESPONSE_PROFILE_BINDING_LIMIT,
);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("response_ids:{profile_name}"),
);
drop(runtime);
runtime_proxy_log(
shared,
format!(
"binding response_ids profile={profile_name} count={} first={:?}",
response_ids.len(),
response_ids.first()
),
);
} else {
drop(runtime);
}
Ok(())
}
fn remember_runtime_successful_previous_response_owner(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
previous_response_id: Option<&str>,
verified_route: RuntimeRouteKind,
) -> Result<()> {
let Some(previous_response_id) = previous_response_id
.map(str::trim)
.filter(|value| !value.is_empty())
else {
return Ok(());
};
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let bound_at = Local::now().timestamp();
let mut changed = clear_runtime_previous_response_negative_cache(
&mut runtime,
previous_response_id,
profile_name,
);
let should_refresh_binding = match runtime
.state
.response_profile_bindings
.get_mut(previous_response_id)
{
Some(binding) if binding.profile_name == profile_name => {
if binding.bound_at < bound_at {
binding.bound_at = bound_at;
}
false
}
Some(binding) => {
binding.profile_name = profile_name.to_string();
binding.bound_at = bound_at;
changed = true;
true
}
None => {
runtime.state.response_profile_bindings.insert(
previous_response_id.to_string(),
ResponseProfileBinding {
profile_name: profile_name.to_string(),
bound_at,
},
);
changed = true;
true
}
};
if should_refresh_binding
|| runtime_continuation_status_should_refresh_verified(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::Response,
previous_response_id,
bound_at,
Some(verified_route),
)
{
changed = runtime_mark_continuation_status_verified(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::Response,
previous_response_id,
bound_at,
Some(verified_route),
) || changed;
}
if changed {
prune_profile_bindings(
&mut runtime.state.response_profile_bindings,
RESPONSE_PROFILE_BINDING_LIMIT,
);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("previous_response_owner:{profile_name}"),
);
drop(runtime);
runtime_proxy_log(
shared,
format!(
"binding previous_response_owner profile={profile_name} response_id={previous_response_id}"
),
);
} else {
drop(runtime);
}
Ok(())
}
fn clear_runtime_stale_previous_response_binding(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
previous_response_id: Option<&str>,
) -> Result<bool> {
let Some(previous_response_id) = previous_response_id
.map(str::trim)
.filter(|value| !value.is_empty())
else {
return Ok(false);
};
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
if !runtime
.state
.response_profile_bindings
.get(previous_response_id)
.is_some_and(|binding| binding.profile_name == profile_name)
{
drop(runtime);
return Ok(false);
}
runtime
.state
.response_profile_bindings
.remove(previous_response_id);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("previous_response_binding_clear:{profile_name}"),
);
drop(runtime);
runtime_proxy_log(
shared,
format!(
"previous_response_binding_cleared profile={profile_name} response_id={previous_response_id}"
),
);
Ok(true)
}
fn release_runtime_quota_blocked_affinity(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
previous_response_id: Option<&str>,
turn_state: Option<&str>,
session_id: Option<&str>,
) -> Result<bool> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let mut changed = false;
let now = Local::now().timestamp();
if let Some(previous_response_id) = previous_response_id
&& runtime
.state
.response_profile_bindings
.get(previous_response_id)
.is_some_and(|binding| binding.profile_name == profile_name)
{
runtime
.state
.response_profile_bindings
.remove(previous_response_id);
let _ = runtime_mark_continuation_status_dead(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::Response,
previous_response_id,
now,
);
changed = true;
}
if let Some(turn_state) = turn_state
&& runtime
.turn_state_bindings
.get(turn_state)
.is_some_and(|binding| binding.profile_name == profile_name)
{
runtime.turn_state_bindings.remove(turn_state);
let _ = runtime_mark_continuation_status_dead(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::TurnState,
turn_state,
now,
);
changed = true;
}
if let Some(session_id) = session_id
&& runtime
.session_id_bindings
.get(session_id)
.is_some_and(|binding| binding.profile_name == profile_name)
{
runtime.session_id_bindings.remove(session_id);
runtime.state.session_profile_bindings.remove(session_id);
let _ = runtime_mark_continuation_status_dead(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::SessionId,
session_id,
now,
);
changed = true;
}
if changed {
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("quota_release:{profile_name}"),
);
drop(runtime);
runtime_proxy_log(
shared,
format!(
"quota_release_affinity profile={profile_name} previous_response_id={:?} turn_state={:?} session_id={:?}",
previous_response_id, turn_state, session_id
),
);
} else {
drop(runtime);
}
Ok(changed)
}
fn release_runtime_previous_response_affinity(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
previous_response_id: Option<&str>,
turn_state: Option<&str>,
session_id: Option<&str>,
route_kind: RuntimeRouteKind,
) -> Result<bool> {
let previous_response_failures = note_runtime_previous_response_not_found(
shared,
profile_name,
previous_response_id,
route_kind,
)?;
if previous_response_failures < RUNTIME_PREVIOUS_RESPONSE_NEGATIVE_CACHE_FAILURE_THRESHOLD {
runtime_proxy_log(
shared,
format!(
"previous_response_release_deferred profile={profile_name} route={} previous_response_id={:?} failures={previous_response_failures}",
runtime_route_kind_label(route_kind),
previous_response_id,
),
);
return Ok(false);
}
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let mut changed = false;
let now = Local::now().timestamp();
if let Some(previous_response_id) = previous_response_id
&& runtime
.state
.response_profile_bindings
.get(previous_response_id)
.is_some_and(|binding| binding.profile_name == profile_name)
{
runtime
.state
.response_profile_bindings
.remove(previous_response_id);
let _ = runtime_mark_continuation_status_dead(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::Response,
previous_response_id,
now,
);
changed = true;
}
if let Some(turn_state) = turn_state
&& runtime
.turn_state_bindings
.get(turn_state)
.is_some_and(|binding| binding.profile_name == profile_name)
{
runtime.turn_state_bindings.remove(turn_state);
let _ = runtime_mark_continuation_status_dead(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::TurnState,
turn_state,
now,
);
changed = true;
}
if let Some(session_id) = session_id
&& runtime
.session_id_bindings
.get(session_id)
.is_some_and(|binding| binding.profile_name == profile_name)
{
runtime.session_id_bindings.remove(session_id);
runtime.state.session_profile_bindings.remove(session_id);
let _ = runtime_mark_continuation_status_dead(
&mut runtime.continuation_statuses,
RuntimeContinuationBindingKind::SessionId,
session_id,
now,
);
changed = true;
}
if changed {
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("previous_response_release:{profile_name}"),
);
drop(runtime);
runtime_proxy_log(
shared,
format!(
"previous_response_release_affinity profile={profile_name} previous_response_id={:?} turn_state={:?} session_id={:?}",
previous_response_id, turn_state, session_id
),
);
} else {
drop(runtime);
}
Ok(changed)
}
fn prune_profile_bindings(
bindings: &mut BTreeMap<String, ResponseProfileBinding>,
max_entries: usize,
) {
if bindings.len() <= max_entries {
return;
}
let excess = bindings.len() - max_entries;
let mut oldest = bindings
.iter()
.map(|(response_id, binding)| (response_id.clone(), binding.bound_at))
.collect::<Vec<_>>();
oldest.sort_by_key(|(_, bound_at)| *bound_at);
for (response_id, _) in oldest.into_iter().take(excess) {
bindings.remove(&response_id);
}
}
fn runtime_previous_response_retry_delay(retry_index: usize) -> Option<Duration> {
RUNTIME_PREVIOUS_RESPONSE_RETRY_DELAYS_MS
.get(retry_index)
.copied()
.map(Duration::from_millis)
}
fn runtime_proxy_precommit_budget_exhausted(
started_at: Instant,
attempts: usize,
continuation: bool,
pressure_mode: bool,
) -> bool {
let (attempt_limit, budget) = runtime_proxy_precommit_budget(continuation, pressure_mode);
attempts >= attempt_limit || started_at.elapsed() >= budget
}
fn runtime_proxy_precommit_budget(continuation: bool, pressure_mode: bool) -> (usize, Duration) {
if continuation {
(
RUNTIME_PROXY_PRECOMMIT_CONTINUATION_ATTEMPT_LIMIT,
Duration::from_millis(RUNTIME_PROXY_PRECOMMIT_CONTINUATION_BUDGET_MS),
)
} else if pressure_mode {
(
RUNTIME_PROXY_PRESSURE_PRECOMMIT_ATTEMPT_LIMIT,
Duration::from_millis(RUNTIME_PROXY_PRESSURE_PRECOMMIT_BUDGET_MS),
)
} else {
(
RUNTIME_PROXY_PRECOMMIT_ATTEMPT_LIMIT,
Duration::from_millis(RUNTIME_PROXY_PRECOMMIT_BUDGET_MS),
)
}
}
fn runtime_proxy_has_continuation_priority(
previous_response_id: Option<&str>,
pinned_profile: Option<&str>,
request_turn_state: Option<&str>,
turn_state_profile: Option<&str>,
session_profile: Option<&str>,
) -> bool {
previous_response_id.is_some()
|| pinned_profile.is_some()
|| request_turn_state.is_some()
|| turn_state_profile.is_some()
|| session_profile.is_some()
}
fn runtime_wait_affinity_owner<'a>(
strict_affinity_profile: Option<&'a str>,
pinned_profile: Option<&'a str>,
turn_state_profile: Option<&'a str>,
session_profile: Option<&'a str>,
trusted_previous_response_affinity: bool,
) -> Option<&'a str> {
strict_affinity_profile
.or(turn_state_profile)
.or_else(|| {
trusted_previous_response_affinity
.then_some(pinned_profile)
.flatten()
})
.or(session_profile)
}
fn runtime_proxy_allows_direct_current_profile_fallback(
previous_response_id: Option<&str>,
pinned_profile: Option<&str>,
request_turn_state: Option<&str>,
turn_state_profile: Option<&str>,
session_profile: Option<&str>,
saw_inflight_saturation: bool,
saw_upstream_failure: bool,
) -> bool {
previous_response_id.is_none()
&& pinned_profile.is_none()
&& request_turn_state.is_none()
&& turn_state_profile.is_none()
&& session_profile.is_none()
&& !saw_inflight_saturation
&& !saw_upstream_failure
}
fn runtime_profile_codex_home(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
) -> Result<Option<PathBuf>> {
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
Ok(runtime
.state
.profiles
.get(profile_name)
.map(|profile| profile.codex_home.clone()))
}
fn runtime_has_alternative_quota_compatible_profile(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
) -> Result<bool> {
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
Ok(runtime.state.profiles.iter().any(|(name, profile)| {
name != profile_name && read_auth_summary(&profile.codex_home).quota_compatible
}))
}
fn refresh_runtime_profile_quota_inline(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
context: &str,
) -> Result<()> {
let Some(codex_home) = runtime_profile_codex_home(shared, profile_name)? else {
return Ok(());
};
runtime_proxy_log(shared, format!("{context}_start profile={profile_name}"));
run_runtime_probe_jobs_inline(
shared,
vec![(profile_name.to_string(), codex_home)],
context,
);
Ok(())
}
fn runtime_quota_summary_requires_precommit_live_probe(
summary: RuntimeQuotaSummary,
source: Option<RuntimeQuotaSource>,
route_kind: RuntimeRouteKind,
) -> bool {
matches!(
route_kind,
RuntimeRouteKind::Responses | RuntimeRouteKind::Websocket
) && !matches!(source, Some(RuntimeQuotaSource::LiveProbe))
&& (matches!(summary.five_hour.status, RuntimeQuotaWindowStatus::Critical)
|| matches!(summary.weekly.status, RuntimeQuotaWindowStatus::Critical)
|| matches!(summary.five_hour.status, RuntimeQuotaWindowStatus::Unknown)
|| matches!(summary.weekly.status, RuntimeQuotaWindowStatus::Unknown))
}
fn runtime_quota_summary_requires_live_source_after_probe(
summary: RuntimeQuotaSummary,
source: Option<RuntimeQuotaSource>,
route_kind: RuntimeRouteKind,
) -> bool {
matches!(
route_kind,
RuntimeRouteKind::Responses | RuntimeRouteKind::Websocket
) && !matches!(source, Some(RuntimeQuotaSource::LiveProbe))
&& (matches!(summary.five_hour.status, RuntimeQuotaWindowStatus::Unknown)
|| matches!(summary.weekly.status, RuntimeQuotaWindowStatus::Unknown))
}
fn ensure_runtime_profile_precommit_quota_ready(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
context: &str,
) -> Result<(RuntimeQuotaSummary, Option<RuntimeQuotaSource>)> {
let (mut quota_summary, mut quota_source) =
runtime_profile_quota_summary_for_route(shared, profile_name, route_kind)?;
if runtime_quota_summary_requires_precommit_live_probe(quota_summary, quota_source, route_kind)
{
refresh_runtime_profile_quota_inline(shared, profile_name, context)?;
(quota_summary, quota_source) =
runtime_profile_quota_summary_for_route(shared, profile_name, route_kind)?;
}
Ok((quota_summary, quota_source))
}
fn runtime_proxy_direct_current_fallback_profile(
shared: &RuntimeRotationProxyShared,
excluded_profiles: &BTreeSet<String>,
route_kind: RuntimeRouteKind,
) -> Result<Option<String>> {
let (profile_name, codex_home, auth_failure_active) = {
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let profile_name = runtime.current_profile.clone();
let Some(profile) = runtime.state.profiles.get(&profile_name) else {
return Ok(None);
};
let now = Local::now().timestamp();
(
profile_name.clone(),
profile.codex_home.clone(),
runtime_profile_auth_failure_active(&runtime, &profile_name, now),
)
};
if excluded_profiles.contains(&profile_name) {
return Ok(None);
}
if auth_failure_active {
runtime_proxy_log(
shared,
format!(
"selection_skip_current route={} profile={} reason=auth_failure_backoff",
runtime_route_kind_label(route_kind),
profile_name,
),
);
return Ok(None);
}
if !read_auth_summary(&codex_home).quota_compatible {
return Ok(None);
}
if runtime_profile_inflight_hard_limited_for_context(
shared,
&profile_name,
runtime_route_kind_inflight_context(route_kind),
)? {
return Ok(None);
}
if matches!(
route_kind,
RuntimeRouteKind::Responses | RuntimeRouteKind::Websocket
) {
let (quota_summary, quota_source) =
runtime_profile_quota_summary_for_route(shared, &profile_name, route_kind)?;
if quota_source.is_none()
|| runtime_quota_summary_requires_live_source_after_probe(
quota_summary,
quota_source,
route_kind,
)
|| runtime_quota_precommit_guard_reason(quota_summary, route_kind).is_some()
{
runtime_proxy_log(
shared,
format!(
"selection_skip_current route={} profile={} reason={} quota_source={} {}",
runtime_route_kind_label(route_kind),
profile_name,
runtime_quota_precommit_guard_reason(quota_summary, route_kind).unwrap_or_else(
|| {
runtime_quota_soft_affinity_rejection_reason(
quota_summary,
quota_source,
route_kind,
)
}
),
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
return Ok(None);
}
}
Ok(Some(profile_name))
}
fn runtime_proxy_local_selection_failure_message() -> &'static str {
"Runtime proxy could not secure a healthy upstream profile before the pre-commit retry budget was exhausted. Retry the request."
}
fn runtime_quota_window_usable_for_auto_rotate(status: RuntimeQuotaWindowStatus) -> bool {
matches!(
status,
RuntimeQuotaWindowStatus::Ready
| RuntimeQuotaWindowStatus::Thin
| RuntimeQuotaWindowStatus::Critical
)
}
fn runtime_quota_summary_allows_soft_affinity(
summary: RuntimeQuotaSummary,
source: Option<RuntimeQuotaSource>,
route_kind: RuntimeRouteKind,
) -> bool {
source.is_some()
&& runtime_quota_window_usable_for_auto_rotate(summary.five_hour.status)
&& runtime_quota_window_usable_for_auto_rotate(summary.weekly.status)
&& runtime_quota_precommit_guard_reason(summary, route_kind).is_none()
}
fn runtime_quota_soft_affinity_rejection_reason(
summary: RuntimeQuotaSummary,
source: Option<RuntimeQuotaSource>,
route_kind: RuntimeRouteKind,
) -> &'static str {
if source.is_none()
|| matches!(summary.five_hour.status, RuntimeQuotaWindowStatus::Unknown)
|| matches!(summary.weekly.status, RuntimeQuotaWindowStatus::Unknown)
{
"quota_windows_unavailable"
} else if let Some(reason) = runtime_quota_precommit_guard_reason(summary, route_kind) {
reason
} else if matches!(
summary.five_hour.status,
RuntimeQuotaWindowStatus::Exhausted
) || matches!(summary.weekly.status, RuntimeQuotaWindowStatus::Exhausted)
{
"quota_exhausted"
} else {
runtime_quota_pressure_band_reason(summary.route_band)
}
}
fn runtime_quota_source_sort_key(
route_kind: RuntimeRouteKind,
source: RuntimeQuotaSource,
) -> usize {
match (route_kind, source) {
(
RuntimeRouteKind::Responses | RuntimeRouteKind::Websocket,
RuntimeQuotaSource::LiveProbe,
) => 0,
(
RuntimeRouteKind::Responses | RuntimeRouteKind::Websocket,
RuntimeQuotaSource::PersistedSnapshot,
) => 1,
_ => 0,
}
}
fn runtime_profile_quota_summary_for_route(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> Result<(RuntimeQuotaSummary, Option<RuntimeQuotaSource>)> {
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
Ok(runtime
.profile_probe_cache
.get(profile_name)
.filter(|entry| runtime_profile_usage_cache_is_fresh(entry, now))
.and_then(|entry| entry.result.as_ref().ok())
.map(|usage| {
(
runtime_quota_summary_for_route(usage, route_kind),
Some(RuntimeQuotaSource::LiveProbe),
)
})
.or_else(|| {
runtime
.profile_usage_snapshots
.get(profile_name)
.filter(|snapshot| runtime_usage_snapshot_is_usable(snapshot, now))
.map(|snapshot| {
(
runtime_quota_summary_from_usage_snapshot(snapshot, route_kind),
Some(RuntimeQuotaSource::PersistedSnapshot),
)
})
})
.unwrap_or((
RuntimeQuotaSummary {
five_hour: RuntimeQuotaWindowSummary {
status: RuntimeQuotaWindowStatus::Unknown,
remaining_percent: 0,
reset_at: i64::MAX,
},
weekly: RuntimeQuotaWindowSummary {
status: RuntimeQuotaWindowStatus::Unknown,
remaining_percent: 0,
reset_at: i64::MAX,
},
route_band: RuntimeQuotaPressureBand::Unknown,
},
None,
)))
}
fn runtime_previous_response_affinity_is_trusted(
shared: &RuntimeRotationProxyShared,
previous_response_id: Option<&str>,
bound_profile: Option<&str>,
) -> Result<bool> {
let Some(previous_response_id) = previous_response_id else {
return Ok(false);
};
let Some(bound_profile) = bound_profile else {
return Ok(false);
};
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let Some(binding) = runtime
.state
.response_profile_bindings
.get(previous_response_id)
else {
return Ok(false);
};
if binding.profile_name != bound_profile {
return Ok(false);
}
Ok(runtime_continuation_status_map(
&runtime.continuation_statuses,
RuntimeContinuationBindingKind::Response,
)
.get(previous_response_id)
.is_none_or(|status| status.state == RuntimeContinuationBindingLifecycle::Verified))
}
fn runtime_previous_response_affinity_is_bound(
shared: &RuntimeRotationProxyShared,
previous_response_id: Option<&str>,
bound_profile: Option<&str>,
) -> Result<bool> {
let Some(previous_response_id) = previous_response_id else {
return Ok(false);
};
let Some(bound_profile) = bound_profile else {
return Ok(false);
};
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
Ok(runtime
.state
.response_profile_bindings
.get(previous_response_id)
.is_some_and(|binding| binding.profile_name == bound_profile))
}
fn runtime_candidate_has_hard_affinity(
route_kind: RuntimeRouteKind,
candidate_name: &str,
strict_affinity_profile: Option<&str>,
pinned_profile: Option<&str>,
turn_state_profile: Option<&str>,
session_profile: Option<&str>,
trusted_previous_response_affinity: bool,
) -> bool {
strict_affinity_profile.is_some_and(|profile_name| profile_name == candidate_name)
|| turn_state_profile.is_some_and(|profile_name| profile_name == candidate_name)
|| (trusted_previous_response_affinity
&& pinned_profile.is_some_and(|profile_name| profile_name == candidate_name))
|| (route_kind == RuntimeRouteKind::Compact
&& session_profile.is_some_and(|profile_name| profile_name == candidate_name))
}
fn runtime_quota_blocked_affinity_is_releasable(
route_kind: RuntimeRouteKind,
candidate_name: &str,
strict_affinity_profile: Option<&str>,
pinned_profile: Option<&str>,
turn_state_profile: Option<&str>,
session_profile: Option<&str>,
trusted_previous_response_affinity: bool,
request_requires_previous_response_affinity: bool,
) -> bool {
if strict_affinity_profile.is_some_and(|profile_name| profile_name == candidate_name)
|| turn_state_profile.is_some_and(|profile_name| profile_name == candidate_name)
|| (route_kind == RuntimeRouteKind::Compact
&& session_profile.is_some_and(|profile_name| profile_name == candidate_name))
{
return false;
}
if trusted_previous_response_affinity
&& pinned_profile.is_some_and(|profile_name| profile_name == candidate_name)
{
return !request_requires_previous_response_affinity;
}
true
}
fn runtime_quota_precommit_floor_percent(route_kind: RuntimeRouteKind) -> i64 {
match route_kind {
RuntimeRouteKind::Responses | RuntimeRouteKind::Websocket => {
runtime_proxy_responses_quota_critical_floor_percent()
}
RuntimeRouteKind::Compact | RuntimeRouteKind::Standard => 1,
}
}
fn runtime_quota_window_precommit_guard(
window: RuntimeQuotaWindowSummary,
floor_percent: i64,
) -> bool {
matches!(
window.status,
RuntimeQuotaWindowStatus::Critical | RuntimeQuotaWindowStatus::Exhausted
) && window.remaining_percent <= floor_percent
}
fn runtime_quota_precommit_guard_reason(
summary: RuntimeQuotaSummary,
route_kind: RuntimeRouteKind,
) -> Option<&'static str> {
let floor_percent = runtime_quota_precommit_floor_percent(route_kind);
if summary.route_band == RuntimeQuotaPressureBand::Exhausted {
return Some("quota_exhausted_before_send");
}
if matches!(
route_kind,
RuntimeRouteKind::Responses | RuntimeRouteKind::Websocket
) && (runtime_quota_window_precommit_guard(summary.five_hour, floor_percent)
|| runtime_quota_window_precommit_guard(summary.weekly, floor_percent))
{
return Some("quota_critical_floor_before_send");
}
None
}
fn select_runtime_response_candidate_for_route(
shared: &RuntimeRotationProxyShared,
excluded_profiles: &BTreeSet<String>,
strict_affinity_profile: Option<&str>,
pinned_profile: Option<&str>,
turn_state_profile: Option<&str>,
session_profile: Option<&str>,
discover_previous_response_owner: bool,
previous_response_id: Option<&str>,
route_kind: RuntimeRouteKind,
) -> Result<Option<String>> {
if let Some(profile_name) = strict_affinity_profile {
if excluded_profiles.contains(profile_name) {
return Ok(None);
}
if runtime_candidate_has_hard_affinity(
route_kind,
profile_name,
strict_affinity_profile,
pinned_profile,
turn_state_profile,
session_profile,
false,
) {
return Ok(Some(profile_name.to_string()));
}
let (quota_summary, quota_source) =
runtime_profile_quota_summary_for_route(shared, profile_name, route_kind)?;
let compact_followup_owner_without_probe = matches!(
route_kind,
RuntimeRouteKind::Responses | RuntimeRouteKind::Websocket
) && quota_source.is_none();
if runtime_quota_summary_allows_soft_affinity(quota_summary, quota_source, route_kind)
|| compact_followup_owner_without_probe
{
return Ok(Some(profile_name.to_string()));
}
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route={} affinity=compact_followup profile={} reason={} quota_source={} {}",
runtime_route_kind_label(route_kind),
profile_name,
runtime_quota_soft_affinity_rejection_reason(
quota_summary,
quota_source,
route_kind,
),
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
return Ok(None);
}
if let Some(profile_name) = pinned_profile.filter(|name| !excluded_profiles.contains(*name)) {
if runtime_previous_response_affinity_is_bound(
shared,
previous_response_id,
pinned_profile,
)? {
return Ok(Some(profile_name.to_string()));
}
if runtime_candidate_has_hard_affinity(
route_kind,
profile_name,
strict_affinity_profile,
pinned_profile,
turn_state_profile,
session_profile,
runtime_previous_response_affinity_is_trusted(
shared,
previous_response_id,
pinned_profile,
)?,
) {
return Ok(Some(profile_name.to_string()));
}
let (quota_summary, quota_source) =
runtime_profile_quota_summary_for_route(shared, profile_name, route_kind)?;
if quota_summary.route_band <= RuntimeQuotaPressureBand::Critical
&& runtime_quota_precommit_guard_reason(quota_summary, route_kind).is_none()
{
return Ok(Some(profile_name.to_string()));
}
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route={} affinity=pinned profile={} reason={} quota_source={} {}",
runtime_route_kind_label(route_kind),
profile_name,
runtime_quota_pressure_band_reason(quota_summary.route_band),
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
}
if let Some(profile_name) = turn_state_profile.filter(|name| !excluded_profiles.contains(*name))
{
if runtime_candidate_has_hard_affinity(
route_kind,
profile_name,
strict_affinity_profile,
pinned_profile,
turn_state_profile,
session_profile,
false,
) {
return Ok(Some(profile_name.to_string()));
}
let (quota_summary, quota_source) =
runtime_profile_quota_summary_for_route(shared, profile_name, route_kind)?;
if quota_summary.route_band <= RuntimeQuotaPressureBand::Critical
&& runtime_quota_precommit_guard_reason(quota_summary, route_kind).is_none()
{
return Ok(Some(profile_name.to_string()));
}
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route={} affinity=turn_state profile={} reason={} quota_source={} {}",
runtime_route_kind_label(route_kind),
profile_name,
runtime_quota_pressure_band_reason(quota_summary.route_band),
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
}
if discover_previous_response_owner {
return next_runtime_previous_response_candidate(
shared,
excluded_profiles,
previous_response_id,
route_kind,
);
}
if let Some(profile_name) = session_profile.filter(|name| !excluded_profiles.contains(*name)) {
if runtime_candidate_has_hard_affinity(
route_kind,
profile_name,
strict_affinity_profile,
pinned_profile,
turn_state_profile,
session_profile,
false,
) {
return Ok(Some(profile_name.to_string()));
}
let (quota_summary, quota_source) =
runtime_profile_quota_summary_for_route(shared, profile_name, route_kind)?;
let compact_session_owner_without_probe =
route_kind == RuntimeRouteKind::Compact && quota_source.is_none();
let websocket_reuse_current_profile = route_kind == RuntimeRouteKind::Websocket
&& quota_source.is_none()
&& runtime_proxy_current_profile(shared)? == profile_name;
if runtime_quota_summary_allows_soft_affinity(quota_summary, quota_source, route_kind)
|| compact_session_owner_without_probe
|| websocket_reuse_current_profile
{
return Ok(Some(profile_name.to_string()));
}
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route={} affinity=session profile={} reason={} quota_source={} {}",
runtime_route_kind_label(route_kind),
profile_name,
runtime_quota_soft_affinity_rejection_reason(
quota_summary,
quota_source,
route_kind,
),
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
}
if let Some(profile_name) =
runtime_proxy_optimistic_current_candidate_for_route(shared, excluded_profiles, route_kind)?
{
return Ok(Some(profile_name));
}
next_runtime_response_candidate_for_route(shared, excluded_profiles, route_kind)
}
fn next_runtime_previous_response_candidate(
shared: &RuntimeRotationProxyShared,
excluded_profiles: &BTreeSet<String>,
previous_response_id: Option<&str>,
route_kind: RuntimeRouteKind,
) -> Result<Option<String>> {
let (state, current_profile, profile_health, profile_usage_auth) = {
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
(
runtime.state.clone(),
runtime.current_profile.clone(),
runtime.profile_health.clone(),
runtime.profile_usage_auth.clone(),
)
};
let now = Local::now().timestamp();
if let Some(previous_response_id) = previous_response_id
&& let Some(binding) = state.response_profile_bindings.get(previous_response_id)
{
let owner = binding.profile_name.as_str();
if !excluded_profiles.contains(owner)
&& state.profiles.contains_key(owner)
&& !runtime_previous_response_negative_cache_active(
&profile_health,
previous_response_id,
owner,
route_kind,
now,
)
{
return Ok(Some(owner.to_string()));
}
}
for name in active_profile_selection_order(&state, ¤t_profile) {
if excluded_profiles.contains(&name) {
continue;
}
if let Some(previous_response_id) = previous_response_id
&& runtime_previous_response_negative_cache_active(
&profile_health,
previous_response_id,
&name,
route_kind,
now,
)
{
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route={} affinity=previous_response_discovery profile={} reason=negative_cache response_id={}",
runtime_route_kind_label(route_kind),
name,
previous_response_id,
),
);
continue;
}
let Some(profile) = state.profiles.get(&name) else {
continue;
};
if !read_auth_summary(&profile.codex_home).quota_compatible {
continue;
}
if runtime_profile_auth_failure_active_with_auth_cache(
&profile_health,
&profile_usage_auth,
&name,
now,
) {
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route={} affinity=previous_response_discovery profile={} reason=auth_failure_backoff",
runtime_route_kind_label(route_kind),
name,
),
);
continue;
}
let (quota_summary, quota_source) =
runtime_profile_quota_summary_for_route(shared, &name, route_kind)?;
if quota_summary.route_band == RuntimeQuotaPressureBand::Exhausted
|| runtime_quota_precommit_guard_reason(quota_summary, route_kind).is_some()
{
runtime_proxy_log(
shared,
format!(
"selection_skip_affinity route={} affinity=previous_response_discovery profile={} reason={} quota_source={} {}",
runtime_route_kind_label(route_kind),
name,
runtime_quota_precommit_guard_reason(quota_summary, route_kind).unwrap_or_else(
|| runtime_quota_pressure_band_reason(quota_summary.route_band)
),
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
continue;
}
return Ok(Some(name));
}
Ok(None)
}
fn runtime_proxy_optimistic_current_candidate_for_route(
shared: &RuntimeRotationProxyShared,
excluded_profiles: &BTreeSet<String>,
route_kind: RuntimeRouteKind,
) -> Result<Option<String>> {
let pressure_mode = runtime_proxy_pressure_mode_active(shared);
let (
current_profile,
codex_home,
has_alternative_quota_compatible_profile,
in_selection_backoff,
circuit_open_until,
inflight_count,
health_score,
performance_score,
auth_failure_active,
) = {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
prune_runtime_profile_selection_backoff(&mut runtime, now);
if excluded_profiles.contains(&runtime.current_profile) {
return Ok(None);
}
let Some(profile) = runtime.state.profiles.get(&runtime.current_profile) else {
return Ok(None);
};
(
runtime.current_profile.clone(),
profile.codex_home.clone(),
runtime.state.profiles.iter().any(|(name, profile)| {
name != &runtime.current_profile
&& read_auth_summary(&profile.codex_home).quota_compatible
}),
runtime_profile_in_selection_backoff(
&runtime,
&runtime.current_profile,
route_kind,
now,
),
runtime_profile_route_circuit_open_until(
&runtime,
&runtime.current_profile,
route_kind,
now,
),
runtime_profile_inflight_count(&runtime, &runtime.current_profile),
runtime_profile_health_score(&runtime, &runtime.current_profile, now, route_kind),
runtime_profile_route_performance_score(
&runtime.profile_health,
&runtime.current_profile,
now,
route_kind,
),
runtime_profile_auth_failure_active_with_auth_cache(
&runtime.profile_health,
&runtime.profile_usage_auth,
&runtime.current_profile,
now,
),
)
};
let (quota_summary, quota_source) =
runtime_profile_quota_summary_for_route(shared, ¤t_profile, route_kind)?;
let inflight_soft_limit = runtime_profile_inflight_soft_limit(route_kind, pressure_mode);
let quota_evidence_required =
has_alternative_quota_compatible_profile && quota_source.is_none();
let live_quota_probe_required = has_alternative_quota_compatible_profile
&& matches!(
route_kind,
RuntimeRouteKind::Responses | RuntimeRouteKind::Websocket
)
&& !matches!(quota_source, Some(RuntimeQuotaSource::LiveProbe));
let unknown_quota_allowed = quota_summary.route_band == RuntimeQuotaPressureBand::Unknown
&& !has_alternative_quota_compatible_profile;
let quota_band_blocks_current =
quota_summary.route_band > RuntimeQuotaPressureBand::Healthy && !unknown_quota_allowed;
if auth_failure_active
|| in_selection_backoff
|| circuit_open_until.is_some()
|| health_score > 0
|| performance_score > 0
|| quota_evidence_required
|| live_quota_probe_required
|| inflight_count >= inflight_soft_limit
|| quota_band_blocks_current
{
let reason = if auth_failure_active {
"auth_failure_backoff"
} else if in_selection_backoff {
"selection_backoff"
} else if circuit_open_until.is_some() {
"route_circuit_open"
} else if health_score > 0 {
"profile_health"
} else if performance_score > 0 {
"profile_performance"
} else if quota_evidence_required {
"quota_probe_unavailable"
} else if live_quota_probe_required {
if matches!(quota_source, Some(RuntimeQuotaSource::PersistedSnapshot)) {
"stale_persisted_quota"
} else {
"quota_probe_unavailable"
}
} else if quota_band_blocks_current {
runtime_quota_pressure_band_reason(quota_summary.route_band)
} else {
"profile_inflight_soft_limit"
};
runtime_proxy_log(
shared,
format!(
"selection_skip_current route={} profile={} reason={} inflight={} health={} performance={} soft_limit={} circuit_until={} quota_source={} {}",
runtime_route_kind_label(route_kind),
current_profile,
reason,
inflight_count,
health_score,
performance_score,
inflight_soft_limit,
circuit_open_until.unwrap_or_default(),
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
return Ok(None);
}
if !read_auth_summary(&codex_home).quota_compatible {
runtime_proxy_log(
shared,
format!(
"selection_skip_current route={} profile={} reason=auth_not_quota_compatible",
runtime_route_kind_label(route_kind),
current_profile
),
);
return Ok(None);
}
runtime_proxy_log(
shared,
format!(
"selection_keep_current route={} profile={} inflight={} health={} performance={} quota_source={} {}",
runtime_route_kind_label(route_kind),
current_profile,
inflight_count,
health_score,
performance_score,
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
if !reserve_runtime_profile_route_circuit_half_open_probe(shared, ¤t_profile, route_kind)?
{
runtime_proxy_log(
shared,
format!(
"selection_skip_current route={} profile={} reason=route_circuit_half_open_probe_wait inflight={} health={} performance={} quota_source={} {}",
runtime_route_kind_label(route_kind),
current_profile,
inflight_count,
health_score,
performance_score,
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
return Ok(None);
}
Ok(Some(current_profile))
}
fn proxy_runtime_responses_websocket_request(
request_id: u64,
request: tiny_http::Request,
shared: &RuntimeRotationProxyShared,
) {
if !is_runtime_responses_path(request.url()) {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket unsupported_path={}",
request.url()
),
);
let _ = request.respond(build_runtime_proxy_text_response(
404,
"Runtime websocket proxy only supports Codex responses endpoints.",
));
return;
}
let handshake_request = capture_runtime_proxy_websocket_request(&request);
let Some(websocket_key) = runtime_proxy_websocket_key(&handshake_request) else {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket missing_sec_websocket_key path={}",
handshake_request.path_and_query
),
);
let _ = request.respond(build_runtime_proxy_text_response(
400,
"Missing Sec-WebSocket-Key header for runtime auto-rotate websocket proxy.",
));
return;
};
let response = build_runtime_proxy_websocket_upgrade_response(&websocket_key);
let upgraded = request.upgrade("websocket", response);
let mut local_socket = WsSocket::from_raw_socket(upgraded, WsRole::Server, None);
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket upgraded path={} previous_response_id={:?} turn_state={:?}",
handshake_request.path_and_query,
runtime_request_previous_response_id(&handshake_request),
runtime_request_turn_state(&handshake_request)
),
);
if let Err(err) = run_runtime_proxy_websocket_session(
request_id,
&mut local_socket,
&handshake_request,
shared,
) {
runtime_proxy_log(
shared,
format!("request={request_id} transport=websocket session_error={err:#}"),
);
if !is_runtime_proxy_transport_failure(&err) {
let _ = local_socket.close(None);
}
}
}
fn runtime_proxy_websocket_key(request: &RuntimeProxyRequest) -> Option<String> {
request.headers.iter().find_map(|(name, value)| {
name.eq_ignore_ascii_case("Sec-WebSocket-Key")
.then(|| value.trim().to_string())
.filter(|value| !value.is_empty())
})
}
fn build_runtime_proxy_websocket_upgrade_response(key: &str) -> TinyResponse<std::io::Empty> {
let accept = derive_accept_key(key.as_bytes());
TinyResponse::new_empty(TinyStatusCode(101))
.with_header(TinyHeader::from_bytes("Upgrade", "websocket").expect("upgrade header"))
.with_header(TinyHeader::from_bytes("Connection", "Upgrade").expect("connection header"))
.with_header(
TinyHeader::from_bytes("Sec-WebSocket-Accept", accept.as_bytes())
.expect("accept header"),
)
}
#[derive(Default)]
struct RuntimeWebsocketSessionState {
upstream_socket: Option<RuntimeUpstreamWebSocket>,
profile_name: Option<String>,
turn_state: Option<String>,
inflight_guard: Option<RuntimeProfileInFlightGuard>,
last_terminal_at: Option<Instant>,
}
impl RuntimeWebsocketSessionState {
fn can_reuse(&self, profile_name: &str, turn_state_override: Option<&str>) -> bool {
self.upstream_socket.is_some()
&& self.profile_name.as_deref() == Some(profile_name)
&& turn_state_override.is_none_or(|value| self.turn_state.as_deref() == Some(value))
}
fn take_socket(&mut self) -> Option<RuntimeUpstreamWebSocket> {
self.upstream_socket.take()
}
fn last_terminal_elapsed(&self) -> Option<Duration> {
self.last_terminal_at.map(|timestamp| timestamp.elapsed())
}
fn store(
&mut self,
socket: RuntimeUpstreamWebSocket,
profile_name: &str,
turn_state: Option<String>,
inflight_guard: Option<RuntimeProfileInFlightGuard>,
) {
self.upstream_socket = Some(socket);
self.profile_name = Some(profile_name.to_string());
self.turn_state = turn_state;
self.last_terminal_at = Some(Instant::now());
if let Some(inflight_guard) = inflight_guard {
self.inflight_guard = Some(inflight_guard);
}
}
fn reset(&mut self) {
self.upstream_socket = None;
self.profile_name = None;
self.turn_state = None;
self.inflight_guard = None;
}
fn close(&mut self) {
if let Some(mut socket) = self.upstream_socket.take() {
let _ = socket.close(None);
}
self.profile_name = None;
self.turn_state = None;
self.inflight_guard = None;
}
}
fn acquire_runtime_profile_inflight_guard(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
context: &'static str,
) -> Result<RuntimeProfileInFlightGuard> {
let weight = runtime_profile_inflight_weight(context);
let count = {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let count = runtime
.profile_inflight
.entry(profile_name.to_string())
.or_insert(0);
*count = count.saturating_add(weight);
*count
};
runtime_proxy_log(
shared,
format!(
"profile_inflight profile={profile_name} count={count} weight={weight} context={context} event=acquire"
),
);
Ok(RuntimeProfileInFlightGuard {
shared: shared.clone(),
profile_name: profile_name.to_string(),
context,
weight,
})
}
fn run_runtime_proxy_websocket_session(
session_id: u64,
local_socket: &mut RuntimeLocalWebSocket,
handshake_request: &RuntimeProxyRequest,
shared: &RuntimeRotationProxyShared,
) -> Result<()> {
let mut websocket_session = RuntimeWebsocketSessionState::default();
loop {
match local_socket.read() {
Ok(WsMessage::Text(text)) => {
let message_id = runtime_proxy_next_request_id(shared);
let request_metadata = parse_runtime_websocket_request_metadata(text.as_ref());
runtime_proxy_log(
shared,
format!(
"request={message_id} websocket_session={session_id} inbound_text previous_response_id={:?} turn_state={:?} bytes={}",
request_metadata.previous_response_id,
runtime_request_turn_state(handshake_request),
text.len()
),
);
proxy_runtime_websocket_text_message(
session_id,
message_id,
local_socket,
handshake_request,
text.as_ref(),
&request_metadata,
shared,
&mut websocket_session,
)?;
}
Ok(WsMessage::Binary(_)) => {
runtime_proxy_log(
shared,
format!("websocket_session={session_id} inbound_binary_rejected"),
);
send_runtime_proxy_websocket_error(
local_socket,
400,
"invalid_request_error",
"Binary websocket messages are not supported by the runtime auto-rotate proxy.",
)?;
}
Ok(WsMessage::Ping(payload)) => {
local_socket
.send(WsMessage::Pong(payload))
.context("failed to respond to runtime websocket ping")?;
}
Ok(WsMessage::Pong(_)) | Ok(WsMessage::Frame(_)) => {}
Ok(WsMessage::Close(frame)) => {
runtime_proxy_log(
shared,
format!("websocket_session={session_id} local_close"),
);
websocket_session.close();
let _ = local_socket.close(frame);
break;
}
Err(WsError::ConnectionClosed) | Err(WsError::AlreadyClosed) => {
runtime_proxy_log(
shared,
format!("websocket_session={session_id} local_connection_closed"),
);
websocket_session.close();
break;
}
Err(err) => {
runtime_proxy_log(
shared,
format!("websocket_session={session_id} local_read_error={err}"),
);
websocket_session.close();
return Err(anyhow::anyhow!(
"runtime websocket session ended unexpectedly: {err}"
));
}
}
}
Ok(())
}
fn proxy_runtime_websocket_text_message(
session_id: u64,
request_id: u64,
local_socket: &mut RuntimeLocalWebSocket,
handshake_request: &RuntimeProxyRequest,
request_text: &str,
request_metadata: &RuntimeWebsocketRequestMetadata,
shared: &RuntimeRotationProxyShared,
websocket_session: &mut RuntimeWebsocketSessionState,
) -> Result<()> {
let mut handshake_request = handshake_request.clone();
let mut request_text = request_text.to_string();
let request_requires_previous_response_affinity =
request_metadata.requires_previous_response_affinity;
let mut previous_response_id = request_metadata.previous_response_id.clone();
let mut request_turn_state = runtime_request_turn_state(&handshake_request);
let request_session_id = runtime_request_session_id(&handshake_request)
.or_else(|| request_metadata.session_id.clone());
let mut bound_profile = previous_response_id
.as_deref()
.map(|response_id| {
runtime_response_bound_profile(shared, response_id, RuntimeRouteKind::Websocket)
})
.transpose()?
.flatten();
let mut trusted_previous_response_affinity = runtime_previous_response_affinity_is_trusted(
shared,
previous_response_id.as_deref(),
bound_profile.as_deref(),
)?;
let mut turn_state_profile = request_turn_state
.as_deref()
.map(|value| runtime_turn_state_bound_profile(shared, value))
.transpose()?
.flatten();
let mut compact_followup_profile = if previous_response_id.is_none()
&& bound_profile.is_none()
&& turn_state_profile.is_none()
{
runtime_compact_followup_bound_profile(
shared,
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?
} else {
None
};
if let Some((profile_name, source)) = compact_followup_profile.as_ref() {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} compact_followup_owner profile={profile_name} source={source}"
),
);
}
let mut session_profile = if previous_response_id.is_none()
&& bound_profile.is_none()
&& turn_state_profile.is_none()
&& compact_followup_profile.is_none()
{
websocket_session.profile_name.clone().or(request_session_id
.as_deref()
.map(|session_id| runtime_session_bound_profile(shared, session_id))
.transpose()?
.flatten())
} else {
None
};
let mut pinned_profile = bound_profile.clone().or(compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.clone()));
let mut excluded_profiles = BTreeSet::new();
let mut last_failure = None;
let mut previous_response_retry_candidate: Option<String> = None;
let mut previous_response_retry_index = 0usize;
let mut candidate_turn_state_retry_profile: Option<String> = None;
let mut candidate_turn_state_retry_value: Option<String> = None;
let mut saw_inflight_saturation = false;
let mut selection_started_at = Instant::now();
let mut selection_attempts = 0usize;
let mut previous_response_fresh_fallback_used = false;
let mut saw_previous_response_not_found = false;
let mut websocket_reuse_fresh_retry_profiles = BTreeSet::new();
loop {
let pressure_mode =
runtime_proxy_pressure_mode_active_for_route(shared, RuntimeRouteKind::Websocket);
if runtime_proxy_precommit_budget_exhausted(
selection_started_at,
selection_attempts,
runtime_proxy_has_continuation_priority(
previous_response_id.as_deref(),
pinned_profile.as_deref(),
request_turn_state.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
),
pressure_mode,
) {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} precommit_budget_exhausted attempts={selection_attempts} elapsed_ms={} pressure_mode={pressure_mode}",
selection_started_at.elapsed().as_millis()
),
);
if previous_response_id.is_some()
&& saw_previous_response_not_found
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request_text) =
runtime_request_text_without_previous_response_id(&request_text)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_fresh_fallback reason=precommit_budget_exhausted"
),
);
request_text = fresh_request_text;
handshake_request = runtime_request_without_turn_state_header(&handshake_request);
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
pinned_profile = None;
turn_state_profile = None;
session_profile = None;
websocket_reuse_fresh_retry_profiles.clear();
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
if let Some((profile_name, source)) = compact_followup_profile.as_ref() {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} compact_fresh_fallback_blocked profile={profile_name} source={source} reason=precommit_budget_exhausted"
),
);
match last_failure {
Some(RuntimeUpstreamFailureResponse::Websocket(payload)) => {
forward_runtime_proxy_websocket_error(local_socket, &payload)?;
}
_ if saw_inflight_saturation => {
send_runtime_proxy_websocket_error(
local_socket,
503,
"service_unavailable",
"All runtime auto-rotate candidates are temporarily saturated. Retry the request.",
)?;
}
_ => {
send_runtime_proxy_websocket_error(
local_socket,
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
)?;
}
}
return Ok(());
}
if runtime_proxy_allows_direct_current_profile_fallback(
previous_response_id.as_deref(),
pinned_profile.as_deref(),
request_turn_state.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
saw_inflight_saturation,
last_failure.is_some(),
) {
if let Some(current_profile) = runtime_proxy_direct_current_fallback_profile(
shared,
&excluded_profiles,
RuntimeRouteKind::Websocket,
)? {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} direct_current_profile_fallback profile={current_profile} reason=precommit_budget_exhausted"
),
);
match attempt_runtime_websocket_request(
request_id,
local_socket,
&handshake_request,
&request_text,
previous_response_id.as_deref(),
request_session_id.as_deref(),
request_turn_state.as_deref(),
shared,
websocket_session,
¤t_profile,
request_turn_state.as_deref(),
)? {
RuntimeWebsocketAttempt::Delivered => return Ok(()),
RuntimeWebsocketAttempt::QuotaBlocked {
profile_name,
payload,
} => {
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Websocket,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
forward_runtime_proxy_websocket_error(local_socket, &payload)?;
return Ok(());
}
let released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?;
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref()
== Some(profile_name.as_str())
{
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
previous_response_retry_index = 0;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} quota_blocked_affinity_released profile={profile_name} via=direct_current_profile_fallback"
),
);
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request_text) =
runtime_request_text_without_previous_response_id(&request_text)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_fresh_fallback reason=quota_blocked via=direct_current_profile_fallback"
),
);
request_text = fresh_request_text;
handshake_request =
runtime_request_without_turn_state_header(&handshake_request);
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
session_profile = None;
pinned_profile = None;
turn_state_profile = None;
websocket_reuse_fresh_retry_profiles.clear();
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Websocket(payload));
continue;
}
RuntimeWebsocketAttempt::Overloaded {
profile_name,
payload,
} => {
let overload_message =
extract_runtime_proxy_overload_message_from_websocket_payload(
&payload,
);
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} upstream_overloaded route=websocket profile={profile_name} via=direct_current_profile_fallback message={}",
overload_message.as_deref().unwrap_or("-"),
),
);
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
let _ = bump_runtime_profile_health_score(
shared,
&profile_name,
RuntimeRouteKind::Websocket,
RUNTIME_PROFILE_OVERLOAD_HEALTH_PENALTY,
"websocket_overload",
);
let _ = bump_runtime_profile_bad_pairing_score(
shared,
&profile_name,
RuntimeRouteKind::Websocket,
RUNTIME_PROFILE_BAD_PAIRING_PENALTY,
"websocket_overload",
);
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Websocket,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} upstream_overload_passthrough route=websocket profile={profile_name} reason=hard_affinity via=direct_current_profile_fallback"
),
);
forward_runtime_proxy_websocket_error(local_socket, &payload)?;
return Ok(());
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request_text) =
runtime_request_text_without_previous_response_id(&request_text)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_fresh_fallback reason=upstream_overloaded via=direct_current_profile_fallback"
),
);
request_text = fresh_request_text;
handshake_request =
runtime_request_without_turn_state_header(&handshake_request);
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
session_profile = None;
pinned_profile = None;
turn_state_profile = None;
websocket_reuse_fresh_retry_profiles.clear();
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Websocket(payload));
continue;
}
RuntimeWebsocketAttempt::PreviousResponseNotFound {
profile_name,
payload,
turn_state,
} => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket route=websocket websocket_session={session_id} previous_response_not_found profile={profile_name} retry_index={previous_response_retry_index} replay_turn_state={:?} via=direct_current_profile_fallback",
turn_state
),
);
saw_previous_response_not_found = true;
if previous_response_retry_candidate.as_deref()
!= Some(profile_name.as_str())
{
previous_response_retry_candidate = Some(profile_name.clone());
previous_response_retry_index = 0;
}
let has_turn_state_retry = turn_state.is_some();
if has_turn_state_retry {
candidate_turn_state_retry_profile = Some(profile_name.clone());
candidate_turn_state_retry_value = turn_state;
}
if has_turn_state_retry
&& let Some(delay) = runtime_previous_response_retry_delay(
previous_response_retry_index,
)
{
previous_response_retry_index += 1;
last_failure =
Some(RuntimeUpstreamFailureResponse::Websocket(payload));
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_retry_immediate profile={profile_name} delay_ms={} reason=non_blocking_retry via=direct_current_profile_fallback",
delay.as_millis()
),
);
continue;
}
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
if !has_turn_state_retry && !request_requires_previous_response_affinity
{
let _ = clear_runtime_stale_previous_response_binding(
shared,
&profile_name,
previous_response_id.as_deref(),
)?;
}
let released_affinity = release_runtime_previous_response_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
RuntimeRouteKind::Websocket,
)?;
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_affinity_released profile={profile_name} via=direct_current_profile_fallback"
),
);
}
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref()
== Some(profile_name.as_str())
{
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
previous_response_retry_index = 0;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if compact_followup_profile
.as_ref()
.is_some_and(|(owner, _)| owner == &profile_name)
{
compact_followup_profile = None;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Websocket(payload));
continue;
}
RuntimeWebsocketAttempt::ReuseWatchdogTripped { profile_name, .. } => {
excluded_profiles.insert(profile_name);
continue;
}
RuntimeWebsocketAttempt::LocalSelectionBlocked {
profile_name,
reason,
} => {
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Websocket,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
send_runtime_proxy_websocket_error(
local_socket,
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
)?;
return Ok(());
}
let released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?;
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref()
== Some(profile_name.as_str())
{
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
previous_response_retry_index = 0;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} quota_blocked_affinity_released profile={profile_name} reason={reason} via=direct_current_profile_fallback"
),
);
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request_text) =
runtime_request_text_without_previous_response_id(&request_text)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_fresh_fallback reason={reason} via=direct_current_profile_fallback"
),
);
request_text = fresh_request_text;
handshake_request =
runtime_request_without_turn_state_header(&handshake_request);
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
session_profile = None;
pinned_profile = None;
turn_state_profile = None;
websocket_reuse_fresh_retry_profiles.clear();
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
continue;
}
}
}
}
match last_failure {
Some(RuntimeUpstreamFailureResponse::Websocket(payload)) => {
forward_runtime_proxy_websocket_error(local_socket, &payload)?;
}
_ if saw_inflight_saturation => {
send_runtime_proxy_websocket_error(
local_socket,
503,
"service_unavailable",
"All runtime auto-rotate candidates are temporarily saturated. Retry the request.",
)?;
}
_ => {
send_runtime_proxy_websocket_error(
local_socket,
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
)?;
}
}
return Ok(());
}
let Some(candidate_name) = select_runtime_response_candidate_for_route(
shared,
&excluded_profiles,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
previous_response_id.is_some(),
previous_response_id.as_deref(),
RuntimeRouteKind::Websocket,
)?
else {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} candidate_exhausted last_failure={}",
match &last_failure {
Some(RuntimeUpstreamFailureResponse::Websocket(_)) => "websocket",
Some(RuntimeUpstreamFailureResponse::Http(_)) => "http",
None => "none",
}
),
);
if previous_response_id.is_some()
&& saw_previous_response_not_found
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request_text) =
runtime_request_text_without_previous_response_id(&request_text)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_fresh_fallback reason=candidate_exhausted"
),
);
request_text = fresh_request_text;
handshake_request = runtime_request_without_turn_state_header(&handshake_request);
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
pinned_profile = None;
turn_state_profile = None;
session_profile = None;
websocket_reuse_fresh_retry_profiles.clear();
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
if let Some((profile_name, source)) = compact_followup_profile.as_ref() {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} compact_fresh_fallback_blocked profile={profile_name} source={source} reason=candidate_exhausted"
),
);
match last_failure {
Some(RuntimeUpstreamFailureResponse::Websocket(payload)) => {
forward_runtime_proxy_websocket_error(local_socket, &payload)?;
}
_ if saw_inflight_saturation => {
send_runtime_proxy_websocket_error(
local_socket,
503,
"service_unavailable",
"All runtime auto-rotate candidates are temporarily saturated. Retry the request.",
)?;
}
_ => {
send_runtime_proxy_websocket_error(
local_socket,
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
)?;
}
}
return Ok(());
}
if runtime_proxy_allows_direct_current_profile_fallback(
previous_response_id.as_deref(),
pinned_profile.as_deref(),
request_turn_state.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
saw_inflight_saturation,
last_failure.is_some(),
) {
if let Some(current_profile) = runtime_proxy_direct_current_fallback_profile(
shared,
&excluded_profiles,
RuntimeRouteKind::Websocket,
)? {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} direct_current_profile_fallback profile={current_profile} reason=candidate_exhausted"
),
);
match attempt_runtime_websocket_request(
request_id,
local_socket,
&handshake_request,
&request_text,
previous_response_id.as_deref(),
request_session_id.as_deref(),
request_turn_state.as_deref(),
shared,
websocket_session,
¤t_profile,
request_turn_state.as_deref(),
)? {
RuntimeWebsocketAttempt::Delivered => return Ok(()),
RuntimeWebsocketAttempt::QuotaBlocked {
profile_name,
payload,
} => {
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Websocket,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
forward_runtime_proxy_websocket_error(local_socket, &payload)?;
return Ok(());
}
let released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?;
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref()
== Some(profile_name.as_str())
{
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
previous_response_retry_index = 0;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} quota_blocked_affinity_released profile={profile_name} via=direct_current_profile_fallback"
),
);
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request_text) =
runtime_request_text_without_previous_response_id(&request_text)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_fresh_fallback reason=quota_blocked via=direct_current_profile_fallback"
),
);
request_text = fresh_request_text;
handshake_request =
runtime_request_without_turn_state_header(&handshake_request);
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
session_profile = None;
pinned_profile = None;
turn_state_profile = None;
websocket_reuse_fresh_retry_profiles.clear();
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Websocket(payload));
continue;
}
RuntimeWebsocketAttempt::Overloaded {
profile_name,
payload,
} => {
let overload_message =
extract_runtime_proxy_overload_message_from_websocket_payload(
&payload,
);
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} upstream_overloaded route=websocket profile={profile_name} via=direct_current_profile_fallback message={}",
overload_message.as_deref().unwrap_or("-"),
),
);
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
let _ = bump_runtime_profile_health_score(
shared,
&profile_name,
RuntimeRouteKind::Websocket,
RUNTIME_PROFILE_OVERLOAD_HEALTH_PENALTY,
"websocket_overload",
);
let _ = bump_runtime_profile_bad_pairing_score(
shared,
&profile_name,
RuntimeRouteKind::Websocket,
RUNTIME_PROFILE_BAD_PAIRING_PENALTY,
"websocket_overload",
);
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Websocket,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} upstream_overload_passthrough route=websocket profile={profile_name} reason=hard_affinity via=direct_current_profile_fallback"
),
);
forward_runtime_proxy_websocket_error(local_socket, &payload)?;
return Ok(());
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Websocket(payload));
continue;
}
RuntimeWebsocketAttempt::PreviousResponseNotFound {
profile_name,
payload,
turn_state,
} => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket route=websocket websocket_session={session_id} previous_response_not_found profile={profile_name} retry_index={previous_response_retry_index} replay_turn_state={:?} via=direct_current_profile_fallback",
turn_state
),
);
saw_previous_response_not_found = true;
if previous_response_retry_candidate.as_deref()
!= Some(profile_name.as_str())
{
previous_response_retry_candidate = Some(profile_name.clone());
previous_response_retry_index = 0;
}
let has_turn_state_retry = turn_state.is_some();
if has_turn_state_retry {
candidate_turn_state_retry_profile = Some(profile_name.clone());
candidate_turn_state_retry_value = turn_state;
}
if has_turn_state_retry
&& let Some(delay) = runtime_previous_response_retry_delay(
previous_response_retry_index,
)
{
previous_response_retry_index += 1;
last_failure =
Some(RuntimeUpstreamFailureResponse::Websocket(payload));
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_retry_immediate profile={profile_name} delay_ms={} reason=non_blocking_retry via=direct_current_profile_fallback",
delay.as_millis()
),
);
continue;
}
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
if !has_turn_state_retry && !request_requires_previous_response_affinity
{
let _ = clear_runtime_stale_previous_response_binding(
shared,
&profile_name,
previous_response_id.as_deref(),
)?;
}
let released_affinity = release_runtime_previous_response_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
RuntimeRouteKind::Websocket,
)?;
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_affinity_released profile={profile_name} via=direct_current_profile_fallback"
),
);
}
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref()
== Some(profile_name.as_str())
{
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if compact_followup_profile
.as_ref()
.is_some_and(|(owner, _)| owner == &profile_name)
{
compact_followup_profile = None;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Websocket(payload));
continue;
}
RuntimeWebsocketAttempt::ReuseWatchdogTripped { profile_name, .. } => {
excluded_profiles.insert(profile_name);
continue;
}
RuntimeWebsocketAttempt::LocalSelectionBlocked {
profile_name,
reason,
} => {
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Websocket,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
send_runtime_proxy_websocket_error(
local_socket,
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
)?;
return Ok(());
}
let released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?;
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref()
== Some(profile_name.as_str())
{
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} quota_blocked_affinity_released profile={profile_name} reason={reason} via=direct_current_profile_fallback"
),
);
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request_text) =
runtime_request_text_without_previous_response_id(&request_text)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_fresh_fallback reason={reason} via=direct_current_profile_fallback"
),
);
request_text = fresh_request_text;
handshake_request =
runtime_request_without_turn_state_header(&handshake_request);
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
session_profile = None;
pinned_profile = None;
turn_state_profile = None;
websocket_reuse_fresh_retry_profiles.clear();
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
continue;
}
}
}
}
match last_failure {
Some(RuntimeUpstreamFailureResponse::Websocket(payload)) => {
forward_runtime_proxy_websocket_error(local_socket, &payload)?;
}
_ if saw_inflight_saturation => {
send_runtime_proxy_websocket_error(
local_socket,
503,
"service_unavailable",
"All runtime auto-rotate candidates are temporarily saturated. Retry the request.",
)?;
}
_ => {
send_runtime_proxy_websocket_error(
local_socket,
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
)?;
}
}
return Ok(());
};
selection_attempts = selection_attempts.saturating_add(1);
let turn_state_override =
if candidate_turn_state_retry_profile.as_deref() == Some(candidate_name.as_str()) {
candidate_turn_state_retry_value.as_deref()
} else {
request_turn_state.as_deref()
};
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} candidate={} pinned={:?} turn_state_profile={:?} turn_state_override={:?} excluded_count={}",
candidate_name,
pinned_profile,
turn_state_profile,
turn_state_override,
excluded_profiles.len()
),
);
let session_affinity_candidate =
session_profile.as_deref() == Some(candidate_name.as_str());
if previous_response_id.is_none()
&& pinned_profile.is_none()
&& turn_state_profile.is_none()
&& !session_affinity_candidate
&& runtime_profile_inflight_hard_limited_for_context(
shared,
&candidate_name,
"websocket_session",
)?
{
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} profile_inflight_saturated profile={candidate_name} hard_limit={}",
runtime_proxy_profile_inflight_hard_limit(),
),
);
excluded_profiles.insert(candidate_name);
saw_inflight_saturation = true;
continue;
}
match attempt_runtime_websocket_request(
request_id,
local_socket,
&handshake_request,
&request_text,
previous_response_id.as_deref(),
request_session_id.as_deref(),
request_turn_state.as_deref(),
shared,
websocket_session,
&candidate_name,
turn_state_override,
)? {
RuntimeWebsocketAttempt::Delivered => return Ok(()),
RuntimeWebsocketAttempt::QuotaBlocked {
profile_name,
payload,
} => {
let quota_message =
extract_runtime_proxy_quota_message_from_websocket_payload(&payload);
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} quota_blocked profile={profile_name}"
),
);
mark_runtime_profile_quota_quarantine(
shared,
&profile_name,
RuntimeRouteKind::Websocket,
quota_message.as_deref(),
)?;
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Websocket,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} upstream_usage_limit_passthrough route=websocket profile={profile_name} reason=hard_affinity"
),
);
forward_runtime_proxy_websocket_error(local_socket, &payload)?;
return Ok(());
}
let released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?;
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref() == Some(profile_name.as_str()) {
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
previous_response_retry_index = 0;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} quota_blocked_affinity_released profile={profile_name}"
),
);
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request_text) =
runtime_request_text_without_previous_response_id(&request_text)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_fresh_fallback reason=quota_blocked"
),
);
request_text = fresh_request_text;
handshake_request =
runtime_request_without_turn_state_header(&handshake_request);
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
session_profile = None;
pinned_profile = None;
turn_state_profile = None;
websocket_reuse_fresh_retry_profiles.clear();
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Websocket(payload));
}
RuntimeWebsocketAttempt::Overloaded {
profile_name,
payload,
} => {
let overload_message =
extract_runtime_proxy_overload_message_from_websocket_payload(&payload);
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} upstream_overloaded route=websocket profile={profile_name} message={}",
overload_message.as_deref().unwrap_or("-"),
),
);
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
let _ = bump_runtime_profile_health_score(
shared,
&profile_name,
RuntimeRouteKind::Websocket,
RUNTIME_PROFILE_OVERLOAD_HEALTH_PENALTY,
"websocket_overload",
);
let _ = bump_runtime_profile_bad_pairing_score(
shared,
&profile_name,
RuntimeRouteKind::Websocket,
RUNTIME_PROFILE_BAD_PAIRING_PENALTY,
"websocket_overload",
);
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Websocket,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} upstream_overload_passthrough route=websocket profile={profile_name} reason=hard_affinity"
),
);
forward_runtime_proxy_websocket_error(local_socket, &payload)?;
return Ok(());
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request_text) =
runtime_request_text_without_previous_response_id(&request_text)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_fresh_fallback reason=upstream_overloaded"
),
);
request_text = fresh_request_text;
handshake_request =
runtime_request_without_turn_state_header(&handshake_request);
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
session_profile = None;
pinned_profile = None;
turn_state_profile = None;
websocket_reuse_fresh_retry_profiles.clear();
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Websocket(payload));
}
RuntimeWebsocketAttempt::LocalSelectionBlocked {
profile_name,
reason,
} => {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} local_selection_blocked profile={profile_name} reason={reason}"
),
);
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Websocket,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
send_runtime_proxy_websocket_error(
local_socket,
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
)?;
return Ok(());
}
let released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?;
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref() == Some(profile_name.as_str()) {
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
previous_response_retry_index = 0;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} quota_blocked_affinity_released profile={profile_name} reason={reason}"
),
);
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request_text) =
runtime_request_text_without_previous_response_id(&request_text)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_fresh_fallback reason={reason}"
),
);
request_text = fresh_request_text;
handshake_request =
runtime_request_without_turn_state_header(&handshake_request);
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
session_profile = None;
pinned_profile = None;
turn_state_profile = None;
websocket_reuse_fresh_retry_profiles.clear();
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
}
RuntimeWebsocketAttempt::ReuseWatchdogTripped {
profile_name,
event,
} => {
let reuse_terminal_idle = websocket_session.last_terminal_elapsed();
let retry_same_profile_with_fresh_connect = !websocket_reuse_fresh_retry_profiles
.contains(&profile_name)
&& (bound_profile.as_deref() == Some(profile_name.as_str())
|| turn_state_profile.as_deref() == Some(profile_name.as_str())
|| compact_followup_profile
.as_ref()
.is_some_and(|(owner, _)| owner == &profile_name)
|| (request_session_id.is_some()
&& session_profile.as_deref() == Some(profile_name.as_str())));
let reuse_failed_bound_previous_response = previous_response_id.is_some()
&& !previous_response_fresh_fallback_used
&& (bound_profile.as_deref() == Some(profile_name.as_str())
|| pinned_profile.as_deref() == Some(profile_name.as_str()));
let nonreplayable_previous_response_reuse = previous_response_id.is_some()
&& !previous_response_fresh_fallback_used
&& turn_state_override.is_none();
let stale_previous_response_reuse = nonreplayable_previous_response_reuse
&& turn_state_override.is_none()
&& reuse_terminal_idle.is_some_and(|elapsed| {
elapsed
>= Duration::from_millis(
runtime_proxy_websocket_previous_response_reuse_stale_ms(),
)
});
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} websocket_reuse_watchdog_timeout profile={profile_name} event={event}"
),
);
if nonreplayable_previous_response_reuse {
if stale_previous_response_reuse {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} websocket_reuse_stale_previous_response_blocked profile={profile_name} event={event} elapsed_ms={} threshold_ms={}",
reuse_terminal_idle
.map(|elapsed| elapsed.as_millis())
.unwrap_or(0),
runtime_proxy_websocket_previous_response_reuse_stale_ms(),
),
);
} else {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} websocket_reuse_previous_response_blocked profile={profile_name} event={event} reason=missing_turn_state elapsed_ms={}",
reuse_terminal_idle
.map(|elapsed| elapsed.as_millis())
.unwrap_or(0),
),
);
}
return Err(anyhow::anyhow!(
"runtime websocket upstream closed before response.completed for previous_response_id continuation without replayable turn_state: profile={profile_name} event={event}"
));
}
if retry_same_profile_with_fresh_connect {
websocket_reuse_fresh_retry_profiles.insert(profile_name.clone());
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} websocket_reuse_owner_fresh_retry profile={profile_name} event={event}"
),
);
continue;
}
if reuse_failed_bound_previous_response
&& !request_requires_previous_response_affinity
&& let Some(fresh_request_text) =
runtime_request_text_without_previous_response_id(&request_text)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_fresh_fallback reason=websocket_reuse_watchdog"
),
);
request_text = fresh_request_text;
handshake_request =
runtime_request_without_turn_state_header(&handshake_request);
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
session_profile = None;
pinned_profile = None;
turn_state_profile = None;
websocket_reuse_fresh_retry_profiles.clear();
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref() == Some(profile_name.as_str()) {
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
previous_response_retry_index = 0;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
excluded_profiles.insert(profile_name);
}
RuntimeWebsocketAttempt::PreviousResponseNotFound {
profile_name,
payload,
turn_state,
} => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket route=websocket websocket_session={session_id} previous_response_not_found profile={profile_name} retry_index={previous_response_retry_index} replay_turn_state={:?}",
turn_state
),
);
saw_previous_response_not_found = true;
if previous_response_retry_candidate.as_deref() != Some(profile_name.as_str()) {
previous_response_retry_candidate = Some(profile_name.clone());
previous_response_retry_index = 0;
}
let has_turn_state_retry = turn_state.is_some();
if has_turn_state_retry {
candidate_turn_state_retry_profile = Some(profile_name.clone());
candidate_turn_state_retry_value = turn_state;
}
if has_turn_state_retry
&& let Some(delay) =
runtime_previous_response_retry_delay(previous_response_retry_index)
{
previous_response_retry_index += 1;
last_failure = Some(RuntimeUpstreamFailureResponse::Websocket(payload));
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_retry_immediate profile={profile_name} delay_ms={} reason=non_blocking_retry",
delay.as_millis()
),
);
continue;
}
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
if !has_turn_state_retry && !request_requires_previous_response_affinity {
let _ = clear_runtime_stale_previous_response_binding(
shared,
&profile_name,
previous_response_id.as_deref(),
)?;
}
let released_affinity = release_runtime_previous_response_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
RuntimeRouteKind::Websocket,
)?;
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} websocket_session={session_id} previous_response_affinity_released profile={profile_name}"
),
);
}
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref() == Some(profile_name.as_str()) {
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
}
trusted_previous_response_affinity = false;
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if compact_followup_profile
.as_ref()
.is_some_and(|(owner, _)| owner == &profile_name)
{
compact_followup_profile = None;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Websocket(payload));
}
}
}
}
fn attempt_runtime_websocket_request(
request_id: u64,
local_socket: &mut RuntimeLocalWebSocket,
handshake_request: &RuntimeProxyRequest,
request_text: &str,
request_previous_response_id: Option<&str>,
request_session_id: Option<&str>,
request_turn_state: Option<&str>,
shared: &RuntimeRotationProxyShared,
websocket_session: &mut RuntimeWebsocketSessionState,
profile_name: &str,
turn_state_override: Option<&str>,
) -> Result<RuntimeWebsocketAttempt> {
let promote_committed_profile = request_previous_response_id.is_none()
&& request_session_id.is_none()
&& request_turn_state.is_none();
let (initial_quota_summary, initial_quota_source) =
runtime_profile_quota_summary_for_route(shared, profile_name, RuntimeRouteKind::Websocket)?;
if (request_previous_response_id.is_some()
|| request_session_id.is_some()
|| request_turn_state.is_some())
&& matches!(
initial_quota_source,
Some(RuntimeQuotaSource::PersistedSnapshot)
)
&& let Some(reason) =
runtime_quota_precommit_guard_reason(initial_quota_summary, RuntimeRouteKind::Websocket)
{
websocket_session.close();
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket websocket_pre_send_skip profile={profile_name} reason={reason} quota_source={} {}",
initial_quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(initial_quota_summary),
),
);
return Ok(RuntimeWebsocketAttempt::LocalSelectionBlocked {
profile_name: profile_name.to_string(),
reason,
});
}
let has_alternative_quota_profile =
runtime_has_alternative_quota_compatible_profile(shared, profile_name)?;
let (quota_summary, quota_source) = ensure_runtime_profile_precommit_quota_ready(
shared,
profile_name,
RuntimeRouteKind::Websocket,
"websocket_precommit_reprobe",
)?;
if runtime_quota_summary_requires_live_source_after_probe(
quota_summary,
quota_source,
RuntimeRouteKind::Websocket,
) && has_alternative_quota_profile
{
websocket_session.close();
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket websocket_pre_send_skip profile={profile_name} reason=quota_windows_unavailable_after_reprobe quota_source={} {}",
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
return Ok(RuntimeWebsocketAttempt::LocalSelectionBlocked {
profile_name: profile_name.to_string(),
reason: "quota_windows_unavailable_after_reprobe",
});
}
if let Some(reason) =
runtime_quota_precommit_guard_reason(quota_summary, RuntimeRouteKind::Websocket)
{
websocket_session.close();
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket websocket_pre_send_skip profile={profile_name} reason={reason} quota_source={} {}",
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
return Ok(RuntimeWebsocketAttempt::LocalSelectionBlocked {
profile_name: profile_name.to_string(),
reason,
});
}
let reuse_existing_session = websocket_session.can_reuse(profile_name, turn_state_override);
let reuse_started_at = reuse_existing_session.then(Instant::now);
let precommit_started_at = Instant::now();
let (mut upstream_socket, mut upstream_turn_state, mut inflight_guard) =
if reuse_existing_session {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket websocket_reuse_start profile={profile_name} turn_state_override={:?}",
turn_state_override
),
);
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket upstream_session=reuse profile={profile_name} turn_state_override={:?}",
turn_state_override
),
);
(
websocket_session
.take_socket()
.expect("runtime websocket session should keep its upstream socket"),
websocket_session.turn_state.clone(),
None,
)
} else {
websocket_session.close();
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket upstream_session=connect profile={profile_name} turn_state_override={:?}",
turn_state_override
),
);
match connect_runtime_proxy_upstream_websocket(
request_id,
handshake_request,
shared,
profile_name,
turn_state_override,
)? {
RuntimeWebsocketConnectResult::Connected { socket, turn_state } => (
socket,
turn_state,
Some(acquire_runtime_profile_inflight_guard(
shared,
profile_name,
"websocket_session",
)?),
),
RuntimeWebsocketConnectResult::QuotaBlocked(payload) => {
return Ok(RuntimeWebsocketAttempt::QuotaBlocked {
profile_name: profile_name.to_string(),
payload,
});
}
RuntimeWebsocketConnectResult::Overloaded(payload) => {
return Ok(RuntimeWebsocketAttempt::Overloaded {
profile_name: profile_name.to_string(),
payload,
});
}
}
};
runtime_set_upstream_websocket_io_timeout(
&mut upstream_socket,
Some(Duration::from_millis(
runtime_proxy_websocket_precommit_progress_timeout_ms(),
)),
)
.context("failed to configure runtime websocket pre-commit timeout")?;
if let Err(err) = upstream_socket.send(WsMessage::Text(request_text.to_string().into())) {
let _ = upstream_socket.close(None);
websocket_session.reset();
let transport_error =
anyhow::anyhow!("failed to send runtime websocket request upstream: {err}");
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Websocket,
"websocket_upstream_send",
&transport_error,
);
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket upstream_send_error profile={profile_name} error={err}"
),
);
if reuse_existing_session {
return Ok(RuntimeWebsocketAttempt::ReuseWatchdogTripped {
profile_name: profile_name.to_string(),
event: "upstream_send_error",
});
}
return Err(transport_error);
}
let mut committed = false;
let mut first_upstream_frame_seen = false;
let mut buffered_precommit_text_frames = Vec::new();
let mut previous_response_owner_recorded = false;
let mut precommit_hold_count = 0usize;
loop {
match upstream_socket.read() {
Ok(WsMessage::Text(text)) => {
let text = text.to_string();
if !first_upstream_frame_seen {
first_upstream_frame_seen = true;
runtime_set_upstream_websocket_io_timeout(
&mut upstream_socket,
Some(Duration::from_millis(runtime_proxy_stream_idle_timeout_ms())),
)
.context("failed to restore runtime websocket idle timeout")?;
}
let inspected = inspect_runtime_websocket_text_frame(text.as_str());
if let Some(turn_state) = inspected.turn_state.as_deref() {
remember_runtime_turn_state(
shared,
profile_name,
Some(turn_state),
RuntimeRouteKind::Websocket,
)?;
upstream_turn_state = Some(turn_state.to_string());
}
if !committed {
match inspected.retry_kind {
Some(RuntimeWebsocketRetryInspectionKind::QuotaBlocked) => {
let _ = upstream_socket.close(None);
websocket_session.reset();
return Ok(RuntimeWebsocketAttempt::QuotaBlocked {
profile_name: profile_name.to_string(),
payload: RuntimeWebsocketErrorPayload::Text(text),
});
}
Some(RuntimeWebsocketRetryInspectionKind::Overloaded) => {
let _ = upstream_socket.close(None);
websocket_session.reset();
return Ok(RuntimeWebsocketAttempt::Overloaded {
profile_name: profile_name.to_string(),
payload: RuntimeWebsocketErrorPayload::Text(text),
});
}
Some(RuntimeWebsocketRetryInspectionKind::PreviousResponseNotFound) => {
let _ = upstream_socket.close(None);
websocket_session.reset();
return Ok(RuntimeWebsocketAttempt::PreviousResponseNotFound {
profile_name: profile_name.to_string(),
payload: RuntimeWebsocketErrorPayload::Text(text),
turn_state: upstream_turn_state.clone(),
});
}
None => {}
}
}
if reuse_existing_session && !committed && inspected.precommit_hold {
if precommit_hold_count == 0 {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket precommit_hold profile={profile_name} event_type={}",
inspected.event_type.as_deref().unwrap_or("-")
),
);
}
precommit_hold_count = precommit_hold_count.saturating_add(1);
buffered_precommit_text_frames.push(RuntimeBufferedWebsocketTextFrame {
text,
response_ids: inspected.response_ids,
});
continue;
}
if !committed {
remember_runtime_session_id(
shared,
profile_name,
request_session_id.as_deref(),
RuntimeRouteKind::Websocket,
)?;
remember_runtime_turn_state(
shared,
profile_name,
upstream_turn_state.as_deref(),
RuntimeRouteKind::Websocket,
)?;
let _ = commit_runtime_proxy_profile_selection_with_policy(
shared,
profile_name,
RuntimeRouteKind::Websocket,
promote_committed_profile,
)?;
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket committed profile={profile_name}"
),
);
committed = true;
forward_runtime_proxy_buffered_websocket_text_frames(
local_socket,
&mut buffered_precommit_text_frames,
shared,
profile_name,
request_previous_response_id.as_deref(),
request_session_id.as_deref(),
request_turn_state.as_deref(),
&mut previous_response_owner_recorded,
)?;
}
remember_runtime_websocket_response_ids(
shared,
profile_name,
request_previous_response_id.as_deref(),
request_session_id.as_deref(),
request_turn_state.as_deref(),
&inspected.response_ids,
&mut previous_response_owner_recorded,
)?;
local_socket
.send(WsMessage::Text(text.into()))
.with_context(|| {
websocket_session.reset();
"failed to forward runtime websocket text frame"
})?;
if inspected.terminal_event {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket terminal_event profile={profile_name} event_type={} precommit_hold_count={precommit_hold_count}",
inspected.event_type.as_deref().unwrap_or("-"),
),
);
websocket_session.store(
upstream_socket,
profile_name,
upstream_turn_state,
inflight_guard.take(),
);
return Ok(RuntimeWebsocketAttempt::Delivered);
}
}
Ok(WsMessage::Binary(payload)) => {
if !first_upstream_frame_seen {
first_upstream_frame_seen = true;
runtime_set_upstream_websocket_io_timeout(
&mut upstream_socket,
Some(Duration::from_millis(runtime_proxy_stream_idle_timeout_ms())),
)
.context("failed to restore runtime websocket idle timeout")?;
}
if !committed {
remember_runtime_session_id(
shared,
profile_name,
request_session_id.as_deref(),
RuntimeRouteKind::Websocket,
)?;
remember_runtime_turn_state(
shared,
profile_name,
upstream_turn_state.as_deref(),
RuntimeRouteKind::Websocket,
)?;
let _ = commit_runtime_proxy_profile_selection_with_policy(
shared,
profile_name,
RuntimeRouteKind::Websocket,
promote_committed_profile,
)?;
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket committed_binary profile={profile_name}"
),
);
committed = true;
forward_runtime_proxy_buffered_websocket_text_frames(
local_socket,
&mut buffered_precommit_text_frames,
shared,
profile_name,
request_previous_response_id.as_deref(),
request_session_id.as_deref(),
request_turn_state.as_deref(),
&mut previous_response_owner_recorded,
)?;
}
local_socket
.send(WsMessage::Binary(payload))
.with_context(|| {
websocket_session.reset();
"failed to forward runtime websocket binary frame"
})?;
}
Ok(WsMessage::Ping(payload)) => {
if !first_upstream_frame_seen {
first_upstream_frame_seen = true;
runtime_set_upstream_websocket_io_timeout(
&mut upstream_socket,
Some(Duration::from_millis(runtime_proxy_stream_idle_timeout_ms())),
)
.context("failed to restore runtime websocket idle timeout")?;
}
upstream_socket
.send(WsMessage::Pong(payload))
.context("failed to respond to upstream websocket ping")?;
}
Ok(WsMessage::Pong(_)) | Ok(WsMessage::Frame(_)) => {
if !first_upstream_frame_seen {
first_upstream_frame_seen = true;
runtime_set_upstream_websocket_io_timeout(
&mut upstream_socket,
Some(Duration::from_millis(runtime_proxy_stream_idle_timeout_ms())),
)
.context("failed to restore runtime websocket idle timeout")?;
}
}
Ok(WsMessage::Close(frame)) => {
websocket_session.reset();
if let Some(started_at) = reuse_started_at {
runtime_proxy_log(
shared,
format!(
"websocket_reuse_watchdog profile={profile_name} event=upstream_close_before_terminal elapsed_ms={} committed={committed}",
started_at.elapsed().as_millis()
),
);
}
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket upstream_close_before_completed profile={profile_name}"
),
);
let _ = frame;
let transport_error =
anyhow::anyhow!("runtime websocket upstream closed before response.completed");
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Websocket,
"websocket_upstream_close",
&transport_error,
);
if reuse_existing_session && !committed {
return Ok(RuntimeWebsocketAttempt::ReuseWatchdogTripped {
profile_name: profile_name.to_string(),
event: "upstream_close_before_commit",
});
}
return Err(transport_error);
}
Err(WsError::ConnectionClosed) | Err(WsError::AlreadyClosed) => {
websocket_session.reset();
if let Some(started_at) = reuse_started_at {
runtime_proxy_log(
shared,
format!(
"websocket_reuse_watchdog profile={profile_name} event=connection_closed elapsed_ms={} committed={committed}",
started_at.elapsed().as_millis()
),
);
}
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket upstream_connection_closed profile={profile_name}"
),
);
let transport_error =
anyhow::anyhow!("runtime websocket upstream closed before response.completed");
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Websocket,
"websocket_upstream_connection_closed",
&transport_error,
);
if reuse_existing_session && !committed {
return Ok(RuntimeWebsocketAttempt::ReuseWatchdogTripped {
profile_name: profile_name.to_string(),
event: "connection_closed_before_commit",
});
}
return Err(transport_error);
}
Err(err) => {
websocket_session.reset();
if !committed && !first_upstream_frame_seen && runtime_websocket_timeout_error(&err)
{
let elapsed_ms = precommit_started_at.elapsed().as_millis();
runtime_proxy_log(
shared,
format!(
"websocket_precommit_frame_timeout profile={profile_name} event=no_first_upstream_frame_before_deadline elapsed_ms={elapsed_ms} reuse={reuse_existing_session}"
),
);
let transport_error = anyhow::anyhow!(
"runtime websocket upstream produced no first frame before the pre-commit deadline: {err}"
);
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Websocket,
"websocket_first_frame_timeout",
&transport_error,
);
if reuse_existing_session {
runtime_proxy_log(
shared,
format!(
"websocket_reuse_watchdog profile={profile_name} event=no_first_upstream_frame_before_deadline elapsed_ms={elapsed_ms} committed={committed}"
),
);
return Ok(RuntimeWebsocketAttempt::ReuseWatchdogTripped {
profile_name: profile_name.to_string(),
event: "no_first_upstream_frame_before_deadline",
});
}
return Err(transport_error);
}
if let Some(started_at) = reuse_started_at {
runtime_proxy_log(
shared,
format!(
"websocket_reuse_watchdog profile={profile_name} event=read_error elapsed_ms={} committed={committed}",
started_at.elapsed().as_millis()
),
);
}
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket upstream_read_error profile={profile_name} error={err}"
),
);
let transport_error = anyhow::anyhow!(
"runtime websocket upstream failed before response.completed: {err}"
);
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Websocket,
"websocket_upstream_read",
&transport_error,
);
if reuse_existing_session && !committed {
return Ok(RuntimeWebsocketAttempt::ReuseWatchdogTripped {
profile_name: profile_name.to_string(),
event: "upstream_read_error",
});
}
return Err(transport_error);
}
}
}
}
fn connect_runtime_proxy_upstream_websocket(
request_id: u64,
handshake_request: &RuntimeProxyRequest,
shared: &RuntimeRotationProxyShared,
profile_name: &str,
turn_state_override: Option<&str>,
) -> Result<RuntimeWebsocketConnectResult> {
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?
.clone();
let auth = runtime_profile_usage_auth(shared, profile_name)?;
let upstream_url = runtime_proxy_upstream_websocket_url(
&runtime.upstream_base_url,
&handshake_request.path_and_query,
)?;
let mut request = upstream_url
.as_str()
.into_client_request()
.with_context(|| format!("failed to build runtime websocket request for {upstream_url}"))?;
for (name, value) in &handshake_request.headers {
if turn_state_override.is_some() && name.eq_ignore_ascii_case("x-codex-turn-state") {
continue;
}
if should_skip_runtime_request_header(name) {
continue;
}
let Ok(header_name) = WsHeaderName::from_bytes(name.as_bytes()) else {
continue;
};
let Ok(header_value) = WsHeaderValue::from_str(value) else {
continue;
};
request.headers_mut().insert(header_name, header_value);
}
if let Some(turn_state) = turn_state_override {
request.headers_mut().insert(
WsHeaderName::from_static("x-codex-turn-state"),
WsHeaderValue::from_str(turn_state)
.context("failed to encode websocket turn-state header")?,
);
}
request.headers_mut().insert(
WsHeaderName::from_static("authorization"),
WsHeaderValue::from_str(&format!("Bearer {}", auth.access_token))
.context("failed to encode websocket authorization header")?,
);
let user_agent =
runtime_proxy_effective_user_agent(&handshake_request.headers).unwrap_or("codex-cli");
request.headers_mut().insert(
WsHeaderName::from_static("user-agent"),
WsHeaderValue::from_str(user_agent).context("failed to encode websocket user-agent")?,
);
if let Some(account_id) = auth.account_id.as_deref() {
request.headers_mut().insert(
WsHeaderName::from_static("chatgpt-account-id"),
WsHeaderValue::from_str(account_id)
.context("failed to encode websocket account header")?,
);
}
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket upstream_connect_start profile={profile_name} url={upstream_url} turn_state_override={:?}",
turn_state_override
),
);
if runtime_take_fault_injection("PRODEX_RUNTIME_FAULT_UPSTREAM_CONNECT_ERROR_ONCE") {
let transport_error = anyhow::anyhow!("injected runtime websocket connect failure");
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Websocket,
"websocket_connect",
&transport_error,
);
return Err(transport_error);
}
let started_at = Instant::now();
match connect_runtime_proxy_upstream_websocket_with_timeout(request) {
Ok((socket, response, selected_addr, resolved_addrs, attempted_addrs)) => {
Ok(RuntimeWebsocketConnectResult::Connected {
socket,
turn_state: {
let turn_state = runtime_proxy_tungstenite_header_value(
response.headers(),
"x-codex-turn-state",
);
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket upstream_connect_ok profile={profile_name} status={} addr={} resolved_addrs={} attempted_addrs={} turn_state={:?}",
response.status().as_u16(),
selected_addr,
resolved_addrs,
attempted_addrs,
turn_state
),
);
note_runtime_profile_latency_observation(
shared,
profile_name,
RuntimeRouteKind::Websocket,
"connect",
started_at.elapsed().as_millis() as u64,
);
turn_state
},
})
}
Err(WsError::Http(response)) => {
let status = response.status().as_u16();
let body = response.body().clone().unwrap_or_default();
if matches!(status, 401 | 403) {
if status == 401 || extract_runtime_proxy_quota_message(&body).is_none() {
note_runtime_profile_auth_failure(
shared,
profile_name,
RuntimeRouteKind::Websocket,
status,
);
}
}
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=websocket upstream_connect_http profile={profile_name} status={status} body_bytes={}",
body.len()
),
);
if matches!(status, 403 | 429) && extract_runtime_proxy_quota_message(&body).is_some() {
return Ok(RuntimeWebsocketConnectResult::QuotaBlocked(
runtime_websocket_error_payload_from_http_body(&body),
));
}
if extract_runtime_proxy_overload_message(status, &body).is_some() {
return Ok(RuntimeWebsocketConnectResult::Overloaded(
runtime_websocket_error_payload_from_http_body(&body),
));
}
bail!("runtime websocket upstream rejected the handshake with HTTP {status}");
}
Err(err) => {
let failure_kind = runtime_transport_failure_kind_from_ws(&err);
log_runtime_upstream_connect_failure(
shared,
request_id,
"websocket",
profile_name,
failure_kind,
&err,
);
let transport_error =
anyhow::anyhow!("failed to connect runtime websocket upstream: {err}");
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Websocket,
"websocket_connect",
&transport_error,
);
Err(transport_error)
}
}
}
fn runtime_websocket_error_payload_from_http_body(body: &[u8]) -> RuntimeWebsocketErrorPayload {
if body.is_empty() {
return RuntimeWebsocketErrorPayload::Empty;
}
match std::str::from_utf8(body) {
Ok(text) => RuntimeWebsocketErrorPayload::Text(text.to_string()),
Err(_) => RuntimeWebsocketErrorPayload::Binary(body.to_vec()),
}
}
fn connect_runtime_proxy_upstream_websocket_with_timeout(
request: tungstenite::http::Request<()>,
) -> std::result::Result<
(
RuntimeUpstreamWebSocket,
tungstenite::handshake::client::Response,
SocketAddr,
usize,
usize,
),
WsError,
> {
let stream = connect_runtime_proxy_upstream_tcp_stream(request.uri())?;
let selected_addr = stream.selected_addr;
let resolved_addrs = stream.resolved_addrs;
let attempted_addrs = stream.attempted_addrs;
match client_tls_with_config(request, stream.stream, None, None) {
Ok((socket, response)) => Ok((
socket,
response,
selected_addr,
resolved_addrs,
attempted_addrs,
)),
Err(WsHandshakeError::Failure(err)) => Err(err),
Err(WsHandshakeError::Interrupted(_)) => {
unreachable!("blocking upstream websocket handshake should not interrupt")
}
}
}
fn runtime_interleave_socket_addrs(addrs: Vec<SocketAddr>) -> Vec<SocketAddr> {
let (mut primary, mut secondary): (VecDeque<_>, VecDeque<_>) =
addrs.into_iter().partition(|addr| addr.is_ipv6());
let prefer_ipv6 = primary.front().is_some();
if !prefer_ipv6 {
std::mem::swap(&mut primary, &mut secondary);
}
let mut ordered = Vec::with_capacity(primary.len().saturating_add(secondary.len()));
loop {
let mut progressed = false;
if let Some(addr) = primary.pop_front() {
ordered.push(addr);
progressed = true;
}
if let Some(addr) = secondary.pop_front() {
ordered.push(addr);
progressed = true;
}
if !progressed {
break;
}
}
ordered
}
fn runtime_configure_upstream_tcp_stream(
stream: &TcpStream,
io_timeout: Duration,
) -> io::Result<()> {
stream.set_nodelay(true)?;
stream.set_read_timeout(Some(io_timeout))?;
stream.set_write_timeout(Some(io_timeout))?;
Ok(())
}
fn runtime_launch_websocket_tcp_connect_attempt(
sender: mpsc::Sender<RuntimeWebsocketTcpAttemptResult>,
addr: SocketAddr,
connect_timeout: Duration,
) {
thread::spawn(move || {
let result = TcpStream::connect_timeout(&addr, connect_timeout);
let _ = sender.send(RuntimeWebsocketTcpAttemptResult { addr, result });
});
}
fn connect_runtime_proxy_upstream_tcp_stream(
uri: &tungstenite::http::Uri,
) -> std::result::Result<RuntimeWebsocketTcpConnectSuccess, WsError> {
let host = uri.host().ok_or(WsError::Url(WsUrlError::NoHostName))?;
let host = if host.starts_with('[') && host.ends_with(']') {
&host[1..host.len() - 1]
} else {
host
};
let port = uri.port_u16().unwrap_or(match uri.scheme_str() {
Some("wss") => 443,
_ => 80,
});
let connect_timeout = Duration::from_millis(runtime_proxy_websocket_connect_timeout_ms());
let io_timeout = Duration::from_millis(runtime_proxy_websocket_precommit_progress_timeout_ms());
let happy_eyeballs_delay =
Duration::from_millis(runtime_proxy_websocket_happy_eyeballs_delay_ms());
let addrs = runtime_interleave_socket_addrs(
(host, port)
.to_socket_addrs()
.map_err(WsError::Io)?
.collect(),
);
if addrs.is_empty() {
return Err(WsError::Url(WsUrlError::UnableToConnect(uri.to_string())));
}
let resolved_addrs = addrs.len();
let (sender, receiver) = mpsc::channel::<RuntimeWebsocketTcpAttemptResult>();
let mut next_index = 0usize;
let mut attempted_addrs = 0usize;
let mut in_flight = 0usize;
let mut last_error = None;
while next_index < addrs.len() || in_flight > 0 {
if in_flight == 0 && next_index < addrs.len() {
runtime_launch_websocket_tcp_connect_attempt(
sender.clone(),
addrs[next_index],
connect_timeout,
);
next_index += 1;
attempted_addrs += 1;
in_flight += 1;
}
let next = if in_flight == 1 && next_index < addrs.len() && !happy_eyeballs_delay.is_zero()
{
match receiver.recv_timeout(happy_eyeballs_delay) {
Ok(result) => Some(result),
Err(RecvTimeoutError::Timeout) => {
runtime_launch_websocket_tcp_connect_attempt(
sender.clone(),
addrs[next_index],
connect_timeout,
);
next_index += 1;
attempted_addrs += 1;
in_flight += 1;
receiver.recv().ok()
}
Err(RecvTimeoutError::Disconnected) => None,
}
} else {
receiver.recv().ok()
};
let Some(result) = next else {
break;
};
in_flight = in_flight.saturating_sub(1);
match result.result {
Ok(stream) => {
runtime_configure_upstream_tcp_stream(&stream, io_timeout).map_err(WsError::Io)?;
return Ok(RuntimeWebsocketTcpConnectSuccess {
stream,
selected_addr: result.addr,
resolved_addrs,
attempted_addrs,
});
}
Err(err) => {
last_error = Some(err);
}
}
}
match last_error {
Some(err) => Err(WsError::Io(err)),
None => Err(WsError::Url(WsUrlError::UnableToConnect(uri.to_string()))),
}
}
fn send_runtime_proxy_websocket_error(
local_socket: &mut RuntimeLocalWebSocket,
status: u16,
code: &str,
message: &str,
) -> Result<()> {
let payload = serde_json::json!({
"type": "error",
"status": status,
"error": {
"code": code,
"message": message,
}
})
.to_string();
local_socket
.send(WsMessage::Text(payload.into()))
.context("failed to send runtime websocket error frame")
}
fn forward_runtime_proxy_websocket_error(
local_socket: &mut RuntimeLocalWebSocket,
payload: &RuntimeWebsocketErrorPayload,
) -> Result<()> {
match payload {
RuntimeWebsocketErrorPayload::Text(text) => local_socket
.send(WsMessage::Text(text.clone().into()))
.context("failed to forward runtime websocket text error frame"),
RuntimeWebsocketErrorPayload::Binary(bytes) => local_socket
.send(WsMessage::Binary(bytes.clone().into()))
.context("failed to forward runtime websocket binary error frame"),
RuntimeWebsocketErrorPayload::Empty => Ok(()),
}
}
fn remember_runtime_websocket_response_ids(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
request_previous_response_id: Option<&str>,
request_session_id: Option<&str>,
request_turn_state: Option<&str>,
response_ids: &[String],
previous_response_owner_recorded: &mut bool,
) -> Result<()> {
if !*previous_response_owner_recorded {
remember_runtime_successful_previous_response_owner(
shared,
profile_name,
request_previous_response_id,
RuntimeRouteKind::Websocket,
)?;
*previous_response_owner_recorded = true;
}
remember_runtime_response_ids(
shared,
profile_name,
response_ids,
RuntimeRouteKind::Websocket,
)?;
if !response_ids.is_empty() {
let _ = release_runtime_compact_lineage(
shared,
profile_name,
request_session_id,
request_turn_state,
"response_committed",
);
}
Ok(())
}
fn forward_runtime_proxy_buffered_websocket_text_frames(
local_socket: &mut RuntimeLocalWebSocket,
buffered_frames: &mut Vec<RuntimeBufferedWebsocketTextFrame>,
shared: &RuntimeRotationProxyShared,
profile_name: &str,
request_previous_response_id: Option<&str>,
request_session_id: Option<&str>,
request_turn_state: Option<&str>,
previous_response_owner_recorded: &mut bool,
) -> Result<()> {
for frame in buffered_frames.drain(..) {
remember_runtime_websocket_response_ids(
shared,
profile_name,
request_previous_response_id,
request_session_id,
request_turn_state,
&frame.response_ids,
previous_response_owner_recorded,
)?;
local_socket
.send(WsMessage::Text(frame.text.into()))
.context("failed to forward buffered runtime websocket text frame")?;
}
Ok(())
}
fn inspect_runtime_websocket_text_frame(payload: &str) -> RuntimeInspectedWebsocketTextFrame {
let Ok(value) = serde_json::from_str::<serde_json::Value>(payload) else {
return RuntimeInspectedWebsocketTextFrame::default();
};
let event_type = runtime_response_event_type_from_value(&value);
let retry_kind = if extract_runtime_proxy_previous_response_message_from_value(&value).is_some()
{
Some(RuntimeWebsocketRetryInspectionKind::PreviousResponseNotFound)
} else if extract_runtime_proxy_overload_message_from_value(&value).is_some() {
Some(RuntimeWebsocketRetryInspectionKind::Overloaded)
} else if extract_runtime_proxy_quota_message_from_value(&value).is_some() {
Some(RuntimeWebsocketRetryInspectionKind::QuotaBlocked)
} else {
None
};
let precommit_hold = event_type
.as_deref()
.is_some_and(runtime_proxy_precommit_hold_event_kind);
let terminal_event = event_type
.as_deref()
.is_some_and(|kind| matches!(kind, "response.completed" | "response.failed"));
RuntimeInspectedWebsocketTextFrame {
event_type,
turn_state: extract_runtime_turn_state_from_value(&value),
response_ids: extract_runtime_response_ids_from_value(&value),
retry_kind,
precommit_hold,
terminal_event,
}
}
fn runtime_response_event_type_from_value(value: &serde_json::Value) -> Option<String> {
value
.get("type")
.and_then(serde_json::Value::as_str)
.map(str::to_string)
}
#[cfg(test)]
fn runtime_response_event_type(payload: &str) -> Option<String> {
serde_json::from_str::<serde_json::Value>(payload)
.ok()
.and_then(|value| runtime_response_event_type_from_value(&value))
}
fn runtime_proxy_precommit_hold_event_kind(kind: &str) -> bool {
matches!(
kind,
"response.created"
| "response.in_progress"
| "response.queued"
| "response.output_item.added"
| "response.content_part.added"
| "response.reasoning_summary_part.added"
)
}
#[cfg(test)]
fn is_runtime_terminal_event(payload: &str) -> bool {
runtime_response_event_type(payload)
.is_some_and(|kind| matches!(kind.as_str(), "response.completed" | "response.failed"))
}
fn proxy_runtime_standard_request(
request_id: u64,
request: &RuntimeProxyRequest,
shared: &RuntimeRotationProxyShared,
) -> Result<tiny_http::ResponseBox> {
let request_session_id = runtime_request_session_id(request);
let request_turn_state = runtime_request_turn_state(request);
let mut session_profile = request_session_id
.as_deref()
.map(|session_id| runtime_session_bound_profile(shared, session_id))
.transpose()?
.flatten();
if !is_runtime_compact_path(&request.path_and_query) {
let current_profile = runtime_proxy_current_profile(shared)?;
let preferred_profile = session_profile
.clone()
.unwrap_or_else(|| current_profile.clone());
let pressure_mode =
runtime_proxy_pressure_mode_active_for_route(shared, RuntimeRouteKind::Standard);
let selection_started_at = Instant::now();
let mut selection_attempts = 0usize;
let mut excluded_profiles = BTreeSet::new();
let mut last_failure = None;
let mut saw_inflight_saturation = false;
let (quota_summary, quota_source) = runtime_profile_quota_summary_for_route(
shared,
&preferred_profile,
RuntimeRouteKind::Standard,
)?;
let preferred_is_session = session_profile.as_deref() == Some(preferred_profile.as_str());
let preferred_profile_usable = if preferred_is_session {
runtime_quota_summary_allows_soft_affinity(
quota_summary,
quota_source,
RuntimeRouteKind::Standard,
)
} else {
quota_summary.route_band != RuntimeQuotaPressureBand::Exhausted
};
if !preferred_profile_usable {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http {} profile={} reason={} quota_source={} {}",
if preferred_is_session {
format!(
"selection_skip_affinity route={} affinity=session",
runtime_route_kind_label(RuntimeRouteKind::Standard)
)
} else {
format!(
"selection_skip_current route={}",
runtime_route_kind_label(RuntimeRouteKind::Standard)
)
},
preferred_profile,
if preferred_is_session {
runtime_quota_soft_affinity_rejection_reason(
quota_summary,
quota_source,
RuntimeRouteKind::Standard,
)
} else {
runtime_quota_pressure_band_reason(quota_summary.route_band)
},
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
excluded_profiles.insert(preferred_profile.clone());
}
loop {
if runtime_proxy_precommit_budget_exhausted(
selection_started_at,
selection_attempts,
session_profile.is_some(),
pressure_mode,
) {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http standard_precommit_budget_exhausted attempts={selection_attempts} elapsed_ms={} pressure_mode={pressure_mode}",
selection_started_at.elapsed().as_millis()
),
);
return match last_failure {
Some(response) => Ok(response),
None if saw_inflight_saturation => Ok(build_runtime_proxy_json_error_response(
503,
"service_unavailable",
"All runtime auto-rotate candidates are temporarily saturated. Retry the request.",
)),
None => Ok(build_runtime_proxy_text_response(
503,
runtime_proxy_local_selection_failure_message(),
)),
};
}
let candidate_name = if excluded_profiles.is_empty() {
preferred_profile.clone()
} else if let Some(candidate_name) = select_runtime_response_candidate_for_route(
shared,
&excluded_profiles,
None,
None,
None,
None,
false,
None,
RuntimeRouteKind::Standard,
)? {
candidate_name
} else {
return match last_failure {
Some(response) => Ok(response),
None if saw_inflight_saturation => Ok(build_runtime_proxy_json_error_response(
503,
"service_unavailable",
"All runtime auto-rotate candidates are temporarily saturated. Retry the request.",
)),
None => Ok(build_runtime_proxy_text_response(
503,
runtime_proxy_local_selection_failure_message(),
)),
};
};
selection_attempts = selection_attempts.saturating_add(1);
if runtime_profile_inflight_hard_limited_for_context(
shared,
&candidate_name,
"standard_http",
)? {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http profile_inflight_saturated profile={candidate_name} hard_limit={}",
runtime_proxy_profile_inflight_hard_limit(),
),
);
excluded_profiles.insert(candidate_name);
saw_inflight_saturation = true;
continue;
}
match attempt_runtime_noncompact_standard_request(
request_id,
request,
shared,
&candidate_name,
)? {
RuntimeStandardAttempt::Success {
profile_name: _,
response,
} => return Ok(response),
RuntimeStandardAttempt::RetryableFailure {
profile_name,
response,
overload: _,
} => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http standard_retryable_failure profile={profile_name}"
),
);
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
let released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
None,
None,
request_session_id.as_deref(),
)?;
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http quota_blocked_affinity_released profile={profile_name} route=standard"
),
);
}
excluded_profiles.insert(profile_name);
last_failure = Some(response);
}
RuntimeStandardAttempt::LocalSelectionBlocked { profile_name } => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http local_selection_blocked profile={profile_name} route=standard reason=quota_exhausted_before_send"
),
);
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
excluded_profiles.insert(profile_name);
}
}
}
}
let current_profile = runtime_proxy_current_profile(shared)?;
let mut compact_followup_profile = runtime_compact_followup_bound_profile(
shared,
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?;
if let Some((profile_name, source)) = compact_followup_profile.as_ref() {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_followup_owner profile={profile_name} source={source}"
),
);
}
let initial_compact_affinity_profile = compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str())
.or(session_profile.as_deref());
let compact_owner_profile = compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.clone())
.or(session_profile.clone())
.unwrap_or_else(|| current_profile.clone());
let pressure_mode =
runtime_proxy_pressure_mode_active_for_route(shared, RuntimeRouteKind::Compact);
if runtime_proxy_should_shed_fresh_compact_request(
pressure_mode,
initial_compact_affinity_profile,
) {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_pressure_shed reason=fresh_request pressure_mode={pressure_mode}"
),
);
return Ok(build_runtime_proxy_json_error_response(
503,
"service_unavailable",
"Fresh compact requests are temporarily deferred while the runtime proxy is under pressure. Retry the request.",
));
}
let mut excluded_profiles = BTreeSet::new();
let mut conservative_overload_retried_profiles = BTreeSet::new();
let mut last_failure = None;
let mut saw_inflight_saturation = false;
let selection_started_at = Instant::now();
let mut selection_attempts = 0usize;
loop {
if runtime_proxy_precommit_budget_exhausted(
selection_started_at,
selection_attempts,
compact_followup_profile.is_some() || session_profile.is_some(),
pressure_mode,
) {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_precommit_budget_exhausted attempts={selection_attempts} elapsed_ms={} pressure_mode={pressure_mode}",
selection_started_at.elapsed().as_millis()
),
);
return match last_failure {
Some(response) => Ok(response),
None if saw_inflight_saturation => Ok(build_runtime_proxy_json_error_response(
503,
"service_unavailable",
"All runtime auto-rotate candidates are temporarily saturated. Retry the request.",
)),
None if compact_followup_profile.is_some() || session_profile.is_some() => {
Ok(build_runtime_proxy_json_error_response(
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
))
}
None => match attempt_runtime_standard_request(
request_id,
request,
shared,
&compact_owner_profile,
runtime_candidate_has_hard_affinity(
RuntimeRouteKind::Compact,
&compact_owner_profile,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
None,
None,
session_profile.as_deref(),
false,
),
)? {
RuntimeStandardAttempt::Success {
profile_name,
response,
} => {
commit_runtime_proxy_profile_selection_with_notice(
shared,
&profile_name,
RuntimeRouteKind::Compact,
)?;
Ok(response)
}
RuntimeStandardAttempt::RetryableFailure { response, .. } => Ok(response),
RuntimeStandardAttempt::LocalSelectionBlocked { .. } => {
Ok(build_runtime_proxy_json_error_response(
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
))
}
},
};
}
selection_attempts = selection_attempts.saturating_add(1);
let Some(candidate_name) = select_runtime_response_candidate_for_route(
shared,
&excluded_profiles,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
None,
None,
session_profile.as_deref(),
false,
None,
RuntimeRouteKind::Compact,
)?
else {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_candidate_exhausted last_failure={}",
if last_failure.is_some() {
"http"
} else {
"none"
}
),
);
return match last_failure {
Some(response) => Ok(response),
None if saw_inflight_saturation => Ok(build_runtime_proxy_json_error_response(
503,
"service_unavailable",
"All runtime auto-rotate candidates are temporarily saturated. Retry the request.",
)),
None if compact_followup_profile.is_some() || session_profile.is_some() => {
Ok(build_runtime_proxy_json_error_response(
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
))
}
None => match attempt_runtime_standard_request(
request_id,
request,
shared,
&compact_owner_profile,
runtime_candidate_has_hard_affinity(
RuntimeRouteKind::Compact,
&compact_owner_profile,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
None,
None,
session_profile.as_deref(),
false,
),
)? {
RuntimeStandardAttempt::Success {
profile_name,
response,
} => {
commit_runtime_proxy_profile_selection_with_notice(
shared,
&profile_name,
RuntimeRouteKind::Compact,
)?;
Ok(response)
}
RuntimeStandardAttempt::RetryableFailure { response, .. } => Ok(response),
RuntimeStandardAttempt::LocalSelectionBlocked { .. } => {
Ok(build_runtime_proxy_json_error_response(
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
))
}
},
};
};
if excluded_profiles.contains(&candidate_name) {
continue;
}
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_candidate={} excluded_count={}",
candidate_name,
excluded_profiles.len()
),
);
let session_affinity_candidate = compact_followup_profile
.as_ref()
.is_some_and(|(owner, _)| owner == &candidate_name)
|| session_profile.as_deref() == Some(candidate_name.as_str());
if runtime_profile_inflight_hard_limited_for_context(
shared,
&candidate_name,
"compact_http",
)? && !session_affinity_candidate
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http profile_inflight_saturated profile={candidate_name} hard_limit={}",
runtime_proxy_profile_inflight_hard_limit(),
),
);
excluded_profiles.insert(candidate_name);
saw_inflight_saturation = true;
continue;
}
match attempt_runtime_standard_request(
request_id,
request,
shared,
&candidate_name,
runtime_candidate_has_hard_affinity(
RuntimeRouteKind::Compact,
&candidate_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
None,
None,
session_profile.as_deref(),
false,
),
)? {
RuntimeStandardAttempt::Success {
profile_name,
response,
} => {
commit_runtime_proxy_profile_selection_with_notice(
shared,
&profile_name,
RuntimeRouteKind::Compact,
)?;
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_committed profile={profile_name}"
),
);
return Ok(response);
}
RuntimeStandardAttempt::RetryableFailure {
profile_name,
response,
overload,
} => {
let mut released_affinity = false;
let mut released_compact_lineage = false;
if !overload {
released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
None,
None,
request_session_id.as_deref(),
)?;
released_compact_lineage = release_runtime_compact_lineage(
shared,
&profile_name,
request_session_id.as_deref(),
None,
"quota_blocked",
)?;
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if compact_followup_profile
.as_ref()
.is_some_and(|(owner, _)| owner == &profile_name)
{
compact_followup_profile = None;
}
}
let should_retry_same_profile = overload
&& !conservative_overload_retried_profiles.contains(&profile_name)
&& (compact_followup_profile
.as_ref()
.is_some_and(|(owner, _)| owner == &profile_name)
|| session_profile.as_deref() == Some(profile_name.as_str())
|| current_profile == profile_name);
if should_retry_same_profile {
conservative_overload_retried_profiles.insert(profile_name.clone());
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_overload_conservative_retry profile={profile_name} delay_ms={RUNTIME_PROXY_COMPACT_OWNER_RETRY_DELAY_MS} reason=non_blocking_retry"
),
);
last_failure = Some(response);
continue;
}
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_retryable_failure profile={profile_name}"
),
);
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
if !overload
&& runtime_candidate_has_hard_affinity(
RuntimeRouteKind::Compact,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
None,
None,
session_profile.as_deref(),
false,
)
{
return Ok(response);
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http quota_blocked_affinity_released profile={profile_name} route=compact"
),
);
}
if released_compact_lineage {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_lineage_released profile={profile_name} reason=quota_blocked"
),
);
}
if overload {
let _ = bump_runtime_profile_health_score(
shared,
&profile_name,
RuntimeRouteKind::Compact,
RUNTIME_PROFILE_OVERLOAD_HEALTH_PENALTY,
"compact_overload",
);
let _ = bump_runtime_profile_bad_pairing_score(
shared,
&profile_name,
RuntimeRouteKind::Compact,
RUNTIME_PROFILE_BAD_PAIRING_PENALTY,
"compact_overload",
);
}
excluded_profiles.insert(profile_name);
last_failure = Some(response);
}
RuntimeStandardAttempt::LocalSelectionBlocked { profile_name } => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http local_selection_blocked profile={profile_name} route=compact reason=quota_exhausted_before_send"
),
);
excluded_profiles.insert(profile_name);
}
}
}
}
fn attempt_runtime_noncompact_standard_request(
request_id: u64,
request: &RuntimeProxyRequest,
shared: &RuntimeRotationProxyShared,
profile_name: &str,
) -> Result<RuntimeStandardAttempt> {
let request_session_id = runtime_request_session_id(request);
let (quota_summary, quota_source) =
runtime_profile_quota_summary_for_route(shared, profile_name, RuntimeRouteKind::Standard)?;
if quota_summary.route_band == RuntimeQuotaPressureBand::Exhausted {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http standard_pre_send_skip profile={profile_name} route=standard quota_source={} {}",
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
return Ok(RuntimeStandardAttempt::LocalSelectionBlocked {
profile_name: profile_name.to_string(),
});
}
let _inflight_guard =
acquire_runtime_profile_inflight_guard(shared, profile_name, "standard_http")?;
let response =
send_runtime_proxy_upstream_request(request_id, request, shared, profile_name, None)
.map_err(|err| {
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Standard,
"standard_upstream_request",
&err,
);
err
})?;
if request.path_and_query.ends_with("/backend-api/wham/usage") {
let parts = buffer_runtime_proxy_async_response_parts(shared, response, Vec::new())
.map_err(|err| {
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Standard,
"standard_buffer_usage_response",
&err,
);
err
})?;
if let Ok(usage) = serde_json::from_slice::<UsageResponse>(&parts.body) {
update_runtime_profile_probe_cache_with_usage(shared, profile_name, usage)?;
}
remember_runtime_session_id(
shared,
profile_name,
request_session_id.as_deref(),
RuntimeRouteKind::Standard,
)?;
return Ok(RuntimeStandardAttempt::Success {
profile_name: profile_name.to_string(),
response: build_runtime_proxy_response_from_parts(parts),
});
}
if response.status().is_success() {
remember_runtime_session_id(
shared,
profile_name,
request_session_id.as_deref(),
RuntimeRouteKind::Standard,
)?;
let response =
forward_runtime_proxy_response(shared, response, Vec::new()).map_err(|err| {
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Standard,
"standard_forward_response",
&err,
);
err
})?;
return Ok(RuntimeStandardAttempt::Success {
profile_name: profile_name.to_string(),
response,
});
}
let status = response.status().as_u16();
let parts =
buffer_runtime_proxy_async_response_parts(shared, response, Vec::new()).map_err(|err| {
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Standard,
"standard_buffer_response",
&err,
);
err
})?;
let retryable_quota =
matches!(status, 403 | 429) && extract_runtime_proxy_quota_message(&parts.body).is_some();
if matches!(status, 403 | 429) && !retryable_quota {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http standard_quota_unclassified profile={profile_name} status={status} body_snippet={}",
runtime_proxy_body_snippet(&parts.body, 240),
),
);
}
let response = build_runtime_proxy_response_from_parts(parts);
if retryable_quota {
return Ok(RuntimeStandardAttempt::RetryableFailure {
profile_name: profile_name.to_string(),
response,
overload: false,
});
}
if matches!(status, 401 | 403) {
note_runtime_profile_auth_failure(shared, profile_name, RuntimeRouteKind::Standard, status);
}
remember_runtime_session_id(
shared,
profile_name,
request_session_id.as_deref(),
RuntimeRouteKind::Standard,
)?;
Ok(RuntimeStandardAttempt::Success {
profile_name: profile_name.to_string(),
response,
})
}
fn attempt_runtime_standard_request(
request_id: u64,
request: &RuntimeProxyRequest,
shared: &RuntimeRotationProxyShared,
profile_name: &str,
allow_quota_exhausted_send: bool,
) -> Result<RuntimeStandardAttempt> {
let request_session_id = runtime_request_session_id(request);
let (quota_summary, quota_source) =
runtime_profile_quota_summary_for_route(shared, profile_name, RuntimeRouteKind::Compact)?;
if quota_summary.route_band == RuntimeQuotaPressureBand::Exhausted
&& !allow_quota_exhausted_send
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http standard_pre_send_skip profile={profile_name} route=compact quota_source={} {}",
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
return Ok(RuntimeStandardAttempt::LocalSelectionBlocked {
profile_name: profile_name.to_string(),
});
} else if quota_summary.route_band == RuntimeQuotaPressureBand::Exhausted {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_pre_send_allow_quota_exhausted profile={profile_name} quota_source={} {}",
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
}
let _inflight_guard =
acquire_runtime_profile_inflight_guard(shared, profile_name, "compact_http")?;
let response =
send_runtime_proxy_upstream_request(request_id, request, shared, profile_name, None)
.map_err(|err| {
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Compact,
"compact_upstream_request",
&err,
);
err
})?;
if !is_runtime_compact_path(&request.path_and_query) || response.status().is_success() {
let response_turn_state = is_runtime_compact_path(&request.path_and_query)
.then(|| runtime_proxy_header_value(response.headers(), "x-codex-turn-state"))
.flatten();
let response =
forward_runtime_proxy_response(shared, response, Vec::new()).map_err(|err| {
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Compact,
"compact_forward_response",
&err,
);
err
})?;
remember_runtime_session_id(
shared,
profile_name,
request_session_id.as_deref(),
if is_runtime_compact_path(&request.path_and_query) {
RuntimeRouteKind::Compact
} else {
RuntimeRouteKind::Standard
},
)?;
if is_runtime_compact_path(&request.path_and_query) {
remember_runtime_compact_lineage(
shared,
profile_name,
request_session_id.as_deref(),
response_turn_state.as_deref(),
RuntimeRouteKind::Compact,
)?;
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_committed_owner profile={profile_name} session={} turn_state={}",
request_session_id.as_deref().unwrap_or("-"),
response_turn_state.as_deref().unwrap_or("-"),
),
);
}
return Ok(RuntimeStandardAttempt::Success {
profile_name: profile_name.to_string(),
response,
});
}
let status = response.status().as_u16();
let parts =
buffer_runtime_proxy_async_response_parts(shared, response, Vec::new()).map_err(|err| {
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Compact,
"compact_buffer_response",
&err,
);
err
})?;
let retryable_quota =
matches!(status, 403 | 429) && extract_runtime_proxy_quota_message(&parts.body).is_some();
let retryable_overload = extract_runtime_proxy_overload_message(status, &parts.body).is_some();
if matches!(status, 403 | 429) && !retryable_quota {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_quota_unclassified profile={profile_name} status={status} body_snippet={}",
runtime_proxy_body_snippet(&parts.body, 240),
),
);
}
let response = build_runtime_proxy_response_from_parts(parts);
if retryable_quota || retryable_overload {
return Ok(RuntimeStandardAttempt::RetryableFailure {
profile_name: profile_name.to_string(),
response,
overload: retryable_overload,
});
}
if matches!(status, 401 | 403) {
note_runtime_profile_auth_failure(shared, profile_name, RuntimeRouteKind::Compact, status);
}
Ok(RuntimeStandardAttempt::Success {
profile_name: profile_name.to_string(),
response,
})
}
fn proxy_runtime_anthropic_messages_request(
request_id: u64,
request: &RuntimeProxyRequest,
shared: &RuntimeRotationProxyShared,
) -> Result<RuntimeResponsesReply> {
let translated_request = match translate_runtime_anthropic_messages_request(request) {
Ok(translated_request) => translated_request,
Err(err) => {
return Ok(RuntimeResponsesReply::Buffered(
build_runtime_anthropic_error_parts(400, "invalid_request_error", &err.to_string()),
));
}
};
let response = proxy_runtime_responses_request(
request_id,
&translated_request.translated_request,
shared,
)?;
translate_runtime_responses_reply_to_anthropic(response, &translated_request)
}
fn proxy_runtime_responses_request(
request_id: u64,
request: &RuntimeProxyRequest,
shared: &RuntimeRotationProxyShared,
) -> Result<RuntimeResponsesReply> {
let mut request = request.clone();
let request_requires_previous_response_affinity =
runtime_request_requires_previous_response_affinity(&request);
let mut previous_response_id = runtime_request_previous_response_id(&request);
let mut request_turn_state = runtime_request_turn_state(&request);
let request_session_id = runtime_request_session_id(&request);
let mut bound_profile = previous_response_id
.as_deref()
.map(|response_id| {
runtime_response_bound_profile(shared, response_id, RuntimeRouteKind::Responses)
})
.transpose()?
.flatten();
let mut trusted_previous_response_affinity = runtime_previous_response_affinity_is_trusted(
shared,
previous_response_id.as_deref(),
bound_profile.as_deref(),
)?;
let mut turn_state_profile = request_turn_state
.as_deref()
.map(|value| runtime_turn_state_bound_profile(shared, value))
.transpose()?
.flatten();
let mut compact_followup_profile = if previous_response_id.is_none()
&& bound_profile.is_none()
&& turn_state_profile.is_none()
{
runtime_compact_followup_bound_profile(
shared,
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?
} else {
None
};
if let Some((profile_name, source)) = compact_followup_profile.as_ref() {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_followup_owner profile={profile_name} source={source}"
),
);
}
let mut session_profile = if previous_response_id.is_none()
&& bound_profile.is_none()
&& turn_state_profile.is_none()
&& compact_followup_profile.is_none()
{
request_session_id
.as_deref()
.map(|session_id| runtime_session_bound_profile(shared, session_id))
.transpose()?
.flatten()
} else {
None
};
let mut pinned_profile = bound_profile.clone().or(compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.clone()));
let mut excluded_profiles = BTreeSet::new();
let mut last_failure = None;
let mut previous_response_retry_candidate: Option<String> = None;
let mut previous_response_retry_index = 0usize;
let mut candidate_turn_state_retry_profile: Option<String> = None;
let mut candidate_turn_state_retry_value: Option<String> = None;
let mut saw_inflight_saturation = false;
let mut selection_started_at = Instant::now();
let mut selection_attempts = 0usize;
let mut previous_response_fresh_fallback_used = false;
let mut saw_previous_response_not_found = false;
loop {
let pressure_mode =
runtime_proxy_pressure_mode_active_for_route(shared, RuntimeRouteKind::Responses);
if runtime_proxy_precommit_budget_exhausted(
selection_started_at,
selection_attempts,
runtime_proxy_has_continuation_priority(
previous_response_id.as_deref(),
pinned_profile.as_deref(),
request_turn_state.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
),
pressure_mode,
) {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http precommit_budget_exhausted attempts={selection_attempts} elapsed_ms={} pressure_mode={pressure_mode}",
selection_started_at.elapsed().as_millis()
),
);
if previous_response_id.is_some()
&& saw_previous_response_not_found
&& !previous_response_fresh_fallback_used
&& !runtime_request_requires_previous_response_affinity(&request)
&& let Some(fresh_request) =
runtime_request_without_previous_response_affinity(&request)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_fresh_fallback reason=precommit_budget_exhausted"
),
);
request = fresh_request;
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
pinned_profile = None;
turn_state_profile = None;
session_profile = None;
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
if let Some((profile_name, source)) = compact_followup_profile.as_ref() {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_fresh_fallback_blocked profile={profile_name} source={source} reason=precommit_budget_exhausted"
),
);
return Ok(match last_failure {
Some(RuntimeUpstreamFailureResponse::Http(response)) => response,
_ if saw_inflight_saturation => {
RuntimeResponsesReply::Buffered(build_runtime_proxy_json_error_parts(
503,
"service_unavailable",
"All runtime auto-rotate candidates are temporarily saturated. Retry the request.",
))
}
_ => RuntimeResponsesReply::Buffered(build_runtime_proxy_json_error_parts(
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
)),
});
}
if runtime_proxy_allows_direct_current_profile_fallback(
previous_response_id.as_deref(),
pinned_profile.as_deref(),
request_turn_state.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
saw_inflight_saturation,
last_failure.is_some(),
) {
if let Some(current_profile) = runtime_proxy_direct_current_fallback_profile(
shared,
&excluded_profiles,
RuntimeRouteKind::Responses,
)? {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http direct_current_profile_fallback profile={current_profile} reason=precommit_budget_exhausted"
),
);
match attempt_runtime_responses_request(
request_id,
&request,
shared,
¤t_profile,
request_turn_state.as_deref(),
)? {
RuntimeResponsesAttempt::Success {
profile_name,
response,
} => {
if saw_previous_response_not_found {
remember_runtime_successful_previous_response_owner(
shared,
&profile_name,
previous_response_id.as_deref(),
RuntimeRouteKind::Responses,
)?;
}
commit_runtime_proxy_profile_selection_with_notice(
shared,
&profile_name,
RuntimeRouteKind::Responses,
)?;
let _ = release_runtime_compact_lineage(
shared,
&profile_name,
request_session_id.as_deref(),
request_turn_state.as_deref(),
"response_committed_post_commit",
);
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http committed profile={profile_name} via=direct_current_profile_fallback"
),
);
return Ok(response);
}
RuntimeResponsesAttempt::QuotaBlocked {
profile_name,
response,
} => {
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Responses,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
return Ok(response);
}
let released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?;
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref()
== Some(profile_name.as_str())
{
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
previous_response_retry_index = 0;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http quota_blocked_affinity_released profile={profile_name} via=direct_current_profile_fallback"
),
);
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request) =
runtime_request_without_previous_response_affinity(&request)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_fresh_fallback reason=quota_blocked via=direct_current_profile_fallback"
),
);
request = fresh_request;
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
pinned_profile = None;
turn_state_profile = None;
session_profile = None;
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Http(response));
continue;
}
RuntimeResponsesAttempt::PreviousResponseNotFound {
profile_name,
response,
turn_state,
} => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http route=responses previous_response_not_found profile={profile_name} retry_index={previous_response_retry_index} replay_turn_state={:?} via=direct_current_profile_fallback",
turn_state
),
);
saw_previous_response_not_found = true;
if previous_response_retry_candidate.as_deref()
!= Some(profile_name.as_str())
{
previous_response_retry_candidate = Some(profile_name.clone());
previous_response_retry_index = 0;
}
let has_turn_state_retry = turn_state.is_some();
if has_turn_state_retry {
candidate_turn_state_retry_profile = Some(profile_name.clone());
candidate_turn_state_retry_value = turn_state;
}
if has_turn_state_retry
&& let Some(delay) = runtime_previous_response_retry_delay(
previous_response_retry_index,
)
{
previous_response_retry_index += 1;
last_failure = Some(RuntimeUpstreamFailureResponse::Http(response));
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_retry_immediate profile={profile_name} delay_ms={} reason=non_blocking_retry via=direct_current_profile_fallback",
delay.as_millis()
),
);
continue;
}
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
if !has_turn_state_retry && !request_requires_previous_response_affinity
{
let _ = clear_runtime_stale_previous_response_binding(
shared,
&profile_name,
previous_response_id.as_deref(),
)?;
}
let released_affinity = release_runtime_previous_response_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
RuntimeRouteKind::Responses,
)?;
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_affinity_released profile={profile_name} via=direct_current_profile_fallback"
),
);
}
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref()
== Some(profile_name.as_str())
{
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if compact_followup_profile
.as_ref()
.is_some_and(|(owner, _)| owner == &profile_name)
{
compact_followup_profile = None;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Http(response));
continue;
}
RuntimeResponsesAttempt::LocalSelectionBlocked {
profile_name,
reason,
} => {
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Responses,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
return Ok(RuntimeResponsesReply::Buffered(
build_runtime_proxy_json_error_parts(
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
),
));
}
let released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?;
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref()
== Some(profile_name.as_str())
{
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
previous_response_retry_index = 0;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http quota_blocked_affinity_released profile={profile_name} reason={reason} via=direct_current_profile_fallback"
),
);
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request) =
runtime_request_without_previous_response_affinity(&request)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_fresh_fallback reason={reason} via=direct_current_profile_fallback"
),
);
request = fresh_request;
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
session_profile = None;
pinned_profile = None;
turn_state_profile = None;
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
continue;
}
}
}
}
return Ok(match last_failure {
Some(RuntimeUpstreamFailureResponse::Http(response)) => response,
_ if saw_inflight_saturation => {
RuntimeResponsesReply::Buffered(build_runtime_proxy_json_error_parts(
503,
"service_unavailable",
"All runtime auto-rotate candidates are temporarily saturated. Retry the request.",
))
}
_ => RuntimeResponsesReply::Buffered(build_runtime_proxy_json_error_parts(
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
)),
});
}
let Some(candidate_name) = select_runtime_response_candidate_for_route(
shared,
&excluded_profiles,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
previous_response_id.is_some(),
previous_response_id.as_deref(),
RuntimeRouteKind::Responses,
)?
else {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http candidate_exhausted last_failure={}",
match &last_failure {
Some(RuntimeUpstreamFailureResponse::Http(_)) => "http",
Some(RuntimeUpstreamFailureResponse::Websocket(_)) => "websocket",
None => "none",
}
),
);
if runtime_proxy_maybe_wait_for_interactive_inflight_relief(
request_id,
&request,
shared,
&excluded_profiles,
RuntimeRouteKind::Responses,
selection_started_at,
runtime_proxy_has_continuation_priority(
previous_response_id.as_deref(),
pinned_profile.as_deref(),
request_turn_state.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
),
runtime_wait_affinity_owner(
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
),
)? {
continue;
}
if previous_response_id.is_some()
&& saw_previous_response_not_found
&& !previous_response_fresh_fallback_used
&& !runtime_request_requires_previous_response_affinity(&request)
&& let Some(fresh_request) =
runtime_request_without_previous_response_affinity(&request)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_fresh_fallback reason=candidate_exhausted"
),
);
request = fresh_request;
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
pinned_profile = None;
turn_state_profile = None;
session_profile = None;
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
if let Some((profile_name, source)) = compact_followup_profile.as_ref() {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http compact_fresh_fallback_blocked profile={profile_name} source={source} reason=candidate_exhausted"
),
);
return Ok(match last_failure {
Some(RuntimeUpstreamFailureResponse::Http(response)) => response,
_ if saw_inflight_saturation => {
RuntimeResponsesReply::Buffered(build_runtime_proxy_json_error_parts(
503,
"service_unavailable",
"All runtime auto-rotate candidates are temporarily saturated. Retry the request.",
))
}
_ => RuntimeResponsesReply::Buffered(build_runtime_proxy_json_error_parts(
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
)),
});
}
if runtime_proxy_allows_direct_current_profile_fallback(
previous_response_id.as_deref(),
pinned_profile.as_deref(),
request_turn_state.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
saw_inflight_saturation,
last_failure.is_some(),
) {
if let Some(current_profile) = runtime_proxy_direct_current_fallback_profile(
shared,
&excluded_profiles,
RuntimeRouteKind::Responses,
)? {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http direct_current_profile_fallback profile={current_profile} reason=candidate_exhausted"
),
);
match attempt_runtime_responses_request(
request_id,
&request,
shared,
¤t_profile,
request_turn_state.as_deref(),
)? {
RuntimeResponsesAttempt::Success {
profile_name,
response,
} => {
if saw_previous_response_not_found {
remember_runtime_successful_previous_response_owner(
shared,
&profile_name,
previous_response_id.as_deref(),
RuntimeRouteKind::Responses,
)?;
}
commit_runtime_proxy_profile_selection_with_notice(
shared,
&profile_name,
RuntimeRouteKind::Responses,
)?;
let _ = release_runtime_compact_lineage(
shared,
&profile_name,
request_session_id.as_deref(),
request_turn_state.as_deref(),
"response_committed_post_commit",
);
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http committed profile={profile_name} via=direct_current_profile_fallback"
),
);
return Ok(response);
}
RuntimeResponsesAttempt::QuotaBlocked {
profile_name,
response,
} => {
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Responses,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
return Ok(response);
}
let released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?;
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref()
== Some(profile_name.as_str())
{
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
previous_response_retry_index = 0;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http quota_blocked_affinity_released profile={profile_name} via=direct_current_profile_fallback"
),
);
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request) =
runtime_request_without_previous_response_affinity(&request)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_fresh_fallback reason=quota_blocked via=direct_current_profile_fallback"
),
);
request = fresh_request;
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
pinned_profile = None;
turn_state_profile = None;
session_profile = None;
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Http(response));
continue;
}
RuntimeResponsesAttempt::PreviousResponseNotFound {
profile_name,
response,
turn_state,
} => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http route=responses previous_response_not_found profile={profile_name} retry_index={previous_response_retry_index} replay_turn_state={:?} via=direct_current_profile_fallback",
turn_state
),
);
saw_previous_response_not_found = true;
if previous_response_retry_candidate.as_deref()
!= Some(profile_name.as_str())
{
previous_response_retry_candidate = Some(profile_name.clone());
previous_response_retry_index = 0;
}
let has_turn_state_retry = turn_state.is_some();
if has_turn_state_retry {
candidate_turn_state_retry_profile = Some(profile_name.clone());
candidate_turn_state_retry_value = turn_state;
}
if has_turn_state_retry
&& let Some(delay) = runtime_previous_response_retry_delay(
previous_response_retry_index,
)
{
previous_response_retry_index += 1;
last_failure = Some(RuntimeUpstreamFailureResponse::Http(response));
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_retry_immediate profile={profile_name} delay_ms={} reason=non_blocking_retry via=direct_current_profile_fallback",
delay.as_millis()
),
);
continue;
}
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
if !has_turn_state_retry && !request_requires_previous_response_affinity
{
let _ = clear_runtime_stale_previous_response_binding(
shared,
&profile_name,
previous_response_id.as_deref(),
)?;
}
let released_affinity = release_runtime_previous_response_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
RuntimeRouteKind::Responses,
)?;
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_affinity_released profile={profile_name} via=direct_current_profile_fallback"
),
);
}
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref()
== Some(profile_name.as_str())
{
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if compact_followup_profile
.as_ref()
.is_some_and(|(owner, _)| owner == &profile_name)
{
compact_followup_profile = None;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Http(response));
continue;
}
RuntimeResponsesAttempt::LocalSelectionBlocked {
profile_name,
reason,
} => {
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Responses,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
return Ok(RuntimeResponsesReply::Buffered(
build_runtime_proxy_json_error_parts(
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
),
));
}
let released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?;
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref()
== Some(profile_name.as_str())
{
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
previous_response_retry_index = 0;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http quota_blocked_affinity_released profile={profile_name} reason={reason} via=direct_current_profile_fallback"
),
);
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request) =
runtime_request_without_previous_response_affinity(&request)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_fresh_fallback reason={reason} via=direct_current_profile_fallback"
),
);
request = fresh_request;
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
session_profile = None;
pinned_profile = None;
turn_state_profile = None;
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
continue;
}
}
}
}
return Ok(match last_failure {
Some(RuntimeUpstreamFailureResponse::Http(response)) => response,
_ if saw_inflight_saturation => {
RuntimeResponsesReply::Buffered(build_runtime_proxy_json_error_parts(
503,
"service_unavailable",
"All runtime auto-rotate candidates are temporarily saturated. Retry the request.",
))
}
_ => RuntimeResponsesReply::Buffered(build_runtime_proxy_json_error_parts(
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
)),
});
};
selection_attempts = selection_attempts.saturating_add(1);
let turn_state_override =
if candidate_turn_state_retry_profile.as_deref() == Some(candidate_name.as_str()) {
candidate_turn_state_retry_value.as_deref()
} else {
request_turn_state.as_deref()
};
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http candidate={} pinned={:?} turn_state_profile={:?} turn_state_override={:?} excluded_count={}",
candidate_name,
pinned_profile,
turn_state_profile,
turn_state_override,
excluded_profiles.len()
),
);
if previous_response_id.is_none()
&& pinned_profile.is_none()
&& turn_state_profile.is_none()
&& runtime_profile_inflight_hard_limited_for_context(
shared,
&candidate_name,
"responses_http",
)?
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http profile_inflight_saturated profile={candidate_name} hard_limit={}",
runtime_proxy_profile_inflight_hard_limit(),
),
);
saw_inflight_saturation = true;
if runtime_proxy_maybe_wait_for_interactive_inflight_relief(
request_id,
&request,
shared,
&excluded_profiles,
RuntimeRouteKind::Responses,
selection_started_at,
runtime_proxy_has_continuation_priority(
previous_response_id.as_deref(),
pinned_profile.as_deref(),
request_turn_state.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
),
runtime_wait_affinity_owner(
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
),
)? {
continue;
}
excluded_profiles.insert(candidate_name);
continue;
}
match attempt_runtime_responses_request(
request_id,
&request,
shared,
&candidate_name,
turn_state_override,
)? {
RuntimeResponsesAttempt::Success {
profile_name,
response,
} => {
if saw_previous_response_not_found {
remember_runtime_successful_previous_response_owner(
shared,
&profile_name,
previous_response_id.as_deref(),
RuntimeRouteKind::Responses,
)?;
}
commit_runtime_proxy_profile_selection_with_notice(
shared,
&profile_name,
RuntimeRouteKind::Responses,
)?;
let _ = release_runtime_compact_lineage(
shared,
&profile_name,
request_session_id.as_deref(),
request_turn_state.as_deref(),
"response_committed_post_commit",
);
runtime_proxy_log(
shared,
format!("request={request_id} transport=http committed profile={profile_name}"),
);
return Ok(response);
}
RuntimeResponsesAttempt::QuotaBlocked {
profile_name,
response,
} => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http quota_blocked profile={profile_name}"
),
);
let quota_message =
extract_runtime_proxy_quota_message_from_response_reply(&response);
mark_runtime_profile_quota_quarantine(
shared,
&profile_name,
RuntimeRouteKind::Responses,
quota_message.as_deref(),
)?;
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Responses,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http upstream_usage_limit_passthrough route=responses profile={profile_name} reason=hard_affinity"
),
);
return Ok(response);
}
let released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?;
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref() == Some(profile_name.as_str()) {
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
previous_response_retry_index = 0;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http quota_blocked_affinity_released profile={profile_name}"
),
);
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request) =
runtime_request_without_previous_response_affinity(&request)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_fresh_fallback reason=quota_blocked"
),
);
request = fresh_request;
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
pinned_profile = None;
turn_state_profile = None;
session_profile = None;
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Http(response));
}
RuntimeResponsesAttempt::LocalSelectionBlocked {
profile_name,
reason,
} => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http local_selection_blocked profile={profile_name} route=responses reason={reason}"
),
);
mark_runtime_profile_retry_backoff(shared, &profile_name)?;
if !runtime_quota_blocked_affinity_is_releasable(
RuntimeRouteKind::Responses,
&profile_name,
compact_followup_profile
.as_ref()
.map(|(profile_name, _)| profile_name.as_str()),
pinned_profile.as_deref(),
turn_state_profile.as_deref(),
session_profile.as_deref(),
trusted_previous_response_affinity,
request_requires_previous_response_affinity,
) {
return Ok(RuntimeResponsesReply::Buffered(
build_runtime_proxy_json_error_parts(
503,
"service_unavailable",
runtime_proxy_local_selection_failure_message(),
),
));
}
let released_affinity = release_runtime_quota_blocked_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
)?;
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref() == Some(profile_name.as_str()) {
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
previous_response_retry_index = 0;
}
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http quota_blocked_affinity_released profile={profile_name} reason={reason}"
),
);
}
if previous_response_id.is_some()
&& trusted_previous_response_affinity
&& !previous_response_fresh_fallback_used
&& !request_requires_previous_response_affinity
&& let Some(fresh_request) =
runtime_request_without_previous_response_affinity(&request)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_fresh_fallback reason={reason}"
),
);
request = fresh_request;
previous_response_id = None;
request_turn_state = None;
previous_response_fresh_fallback_used = true;
saw_previous_response_not_found = false;
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
trusted_previous_response_affinity = false;
bound_profile = None;
pinned_profile = None;
turn_state_profile = None;
session_profile = None;
excluded_profiles.clear();
last_failure = None;
selection_started_at = Instant::now();
selection_attempts = 0;
continue;
}
excluded_profiles.insert(profile_name);
}
RuntimeResponsesAttempt::PreviousResponseNotFound {
profile_name,
response,
turn_state,
} => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http route=responses previous_response_not_found profile={profile_name} retry_index={previous_response_retry_index} replay_turn_state={:?}",
turn_state
),
);
saw_previous_response_not_found = true;
if previous_response_retry_candidate.as_deref() != Some(profile_name.as_str()) {
previous_response_retry_candidate = Some(profile_name.clone());
previous_response_retry_index = 0;
}
let has_turn_state_retry = turn_state.is_some();
if has_turn_state_retry {
candidate_turn_state_retry_profile = Some(profile_name.clone());
candidate_turn_state_retry_value = turn_state;
}
if has_turn_state_retry
&& let Some(delay) =
runtime_previous_response_retry_delay(previous_response_retry_index)
{
previous_response_retry_index += 1;
last_failure = Some(RuntimeUpstreamFailureResponse::Http(response));
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_retry_immediate profile={profile_name} delay_ms={} reason=non_blocking_retry",
delay.as_millis()
),
);
continue;
}
previous_response_retry_candidate = None;
previous_response_retry_index = 0;
if !has_turn_state_retry && !request_requires_previous_response_affinity {
let _ = clear_runtime_stale_previous_response_binding(
shared,
&profile_name,
previous_response_id.as_deref(),
)?;
}
let released_affinity = release_runtime_previous_response_affinity(
shared,
&profile_name,
previous_response_id.as_deref(),
request_turn_state.as_deref(),
request_session_id.as_deref(),
RuntimeRouteKind::Responses,
)?;
if released_affinity {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http previous_response_affinity_released profile={profile_name}"
),
);
}
if bound_profile.as_deref() == Some(profile_name.as_str()) {
bound_profile = None;
}
if session_profile.as_deref() == Some(profile_name.as_str()) {
session_profile = None;
}
if candidate_turn_state_retry_profile.as_deref() == Some(profile_name.as_str()) {
candidate_turn_state_retry_profile = None;
candidate_turn_state_retry_value = None;
}
if pinned_profile.as_deref() == Some(profile_name.as_str()) {
pinned_profile = None;
}
trusted_previous_response_affinity = false;
if turn_state_profile.as_deref() == Some(profile_name.as_str()) {
turn_state_profile = None;
}
if compact_followup_profile
.as_ref()
.is_some_and(|(owner, _)| owner == &profile_name)
{
compact_followup_profile = None;
}
excluded_profiles.insert(profile_name);
last_failure = Some(RuntimeUpstreamFailureResponse::Http(response));
}
}
}
}
fn attempt_runtime_responses_request(
request_id: u64,
request: &RuntimeProxyRequest,
shared: &RuntimeRotationProxyShared,
profile_name: &str,
turn_state_override: Option<&str>,
) -> Result<RuntimeResponsesAttempt> {
let request_session_id = runtime_request_session_id(request);
let request_previous_response_id = runtime_request_previous_response_id(request);
let request_turn_state = runtime_request_turn_state(request);
let (initial_quota_summary, initial_quota_source) =
runtime_profile_quota_summary_for_route(shared, profile_name, RuntimeRouteKind::Responses)?;
if (request_previous_response_id.is_some()
|| request_session_id.is_some()
|| request_turn_state.is_some())
&& matches!(
initial_quota_source,
Some(RuntimeQuotaSource::PersistedSnapshot)
)
&& let Some(reason) =
runtime_quota_precommit_guard_reason(initial_quota_summary, RuntimeRouteKind::Responses)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http responses_pre_send_skip profile={profile_name} route=responses reason={reason} quota_source={} {}",
initial_quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(initial_quota_summary),
),
);
return Ok(RuntimeResponsesAttempt::LocalSelectionBlocked {
profile_name: profile_name.to_string(),
reason,
});
}
let has_alternative_quota_profile =
runtime_has_alternative_quota_compatible_profile(shared, profile_name)?;
let (quota_summary, quota_source) = ensure_runtime_profile_precommit_quota_ready(
shared,
profile_name,
RuntimeRouteKind::Responses,
"responses_precommit_reprobe",
)?;
if runtime_quota_summary_requires_live_source_after_probe(
quota_summary,
quota_source,
RuntimeRouteKind::Responses,
) && has_alternative_quota_profile
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http responses_pre_send_skip profile={profile_name} route=responses reason=quota_windows_unavailable_after_reprobe quota_source={} {}",
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
return Ok(RuntimeResponsesAttempt::LocalSelectionBlocked {
profile_name: profile_name.to_string(),
reason: "quota_windows_unavailable_after_reprobe",
});
}
if let Some(reason) =
runtime_quota_precommit_guard_reason(quota_summary, RuntimeRouteKind::Responses)
{
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http responses_pre_send_skip profile={profile_name} route=responses reason={reason} quota_source={} {}",
quota_source
.map(runtime_quota_source_label)
.unwrap_or("unknown"),
runtime_quota_summary_log_fields(quota_summary),
),
);
return Ok(RuntimeResponsesAttempt::LocalSelectionBlocked {
profile_name: profile_name.to_string(),
reason,
});
}
let inflight_guard =
acquire_runtime_profile_inflight_guard(shared, profile_name, "responses_http")?;
let response = send_runtime_proxy_upstream_responses_request(
request_id,
request,
shared,
profile_name,
turn_state_override,
)
.map_err(|err| {
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Responses,
"responses_upstream_request",
&err,
);
err
})?;
let response_turn_state = runtime_proxy_header_value(response.headers(), "x-codex-turn-state");
if !response.status().is_success() {
let status = response.status().as_u16();
let parts = buffer_runtime_proxy_async_response_parts(shared, response, Vec::new())
.map_err(|err| {
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Responses,
"responses_buffer_response",
&err,
);
err
})?;
let retryable_quota = matches!(status, 403 | 429)
&& extract_runtime_proxy_quota_message(&parts.body).is_some();
let retryable_previous =
status == 400 && extract_runtime_proxy_previous_response_message(&parts.body).is_some();
let response = RuntimeResponsesReply::Buffered(parts);
if retryable_quota {
return Ok(RuntimeResponsesAttempt::QuotaBlocked {
profile_name: profile_name.to_string(),
response,
});
}
if retryable_previous {
return Ok(RuntimeResponsesAttempt::PreviousResponseNotFound {
profile_name: profile_name.to_string(),
response,
turn_state: response_turn_state,
});
}
if matches!(status, 401 | 403) {
note_runtime_profile_auth_failure(
shared,
profile_name,
RuntimeRouteKind::Responses,
status,
);
}
return Ok(RuntimeResponsesAttempt::Success {
profile_name: profile_name.to_string(),
response,
});
}
prepare_runtime_proxy_responses_success(
request_id,
runtime_request_previous_response_id(request).as_deref(),
request_session_id.as_deref(),
runtime_request_turn_state(request).as_deref(),
response,
shared,
profile_name,
inflight_guard,
)
.map_err(|err| {
note_runtime_profile_transport_failure(
shared,
profile_name,
RuntimeRouteKind::Responses,
"responses_prepare_success",
&err,
);
err
})
}
fn next_runtime_response_candidate_for_route(
shared: &RuntimeRotationProxyShared,
excluded_profiles: &BTreeSet<String>,
route_kind: RuntimeRouteKind,
) -> Result<Option<String>> {
let now = Local::now().timestamp();
let pressure_mode = runtime_proxy_pressure_mode_active_for_route(shared, route_kind);
let sync_probe_pressure_mode = runtime_proxy_sync_probe_pressure_mode_active(shared);
let inflight_soft_limit = runtime_profile_inflight_soft_limit(route_kind, pressure_mode);
let (
state,
current_profile,
include_code_review,
upstream_base_url,
cached_reports,
mut cached_usage_snapshots,
profile_usage_auth,
mut retry_backoff_until,
mut transport_backoff_until,
mut route_circuit_open_until,
profile_inflight,
profile_health,
) = {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
prune_runtime_profile_selection_backoff(&mut runtime, now);
(
runtime.state.clone(),
runtime.current_profile.clone(),
runtime.include_code_review,
runtime.upstream_base_url.clone(),
runtime.profile_probe_cache.clone(),
runtime.profile_usage_snapshots.clone(),
runtime.profile_usage_auth.clone(),
runtime.profile_retry_backoff_until.clone(),
runtime.profile_transport_backoff_until.clone(),
runtime.profile_route_circuit_open_until.clone(),
runtime.profile_inflight.clone(),
runtime.profile_health.clone(),
)
};
let mut reports = Vec::new();
let mut cold_start_probe_jobs = Vec::new();
for (order_index, name) in active_profile_selection_order(&state, ¤t_profile)
.into_iter()
.enumerate()
{
if excluded_profiles.contains(&name) {
continue;
}
let Some(profile) = state.profiles.get(&name) else {
continue;
};
if let Some(entry) = cached_reports.get(&name) {
reports.push(RunProfileProbeReport {
name: name.clone(),
order_index,
auth: entry.auth.clone(),
result: entry.result.clone(),
});
if runtime_profile_probe_cache_freshness(entry, now)
!= RuntimeProbeCacheFreshness::Fresh
{
schedule_runtime_probe_refresh(shared, &name, &profile.codex_home);
}
} else {
let auth = read_auth_summary(&profile.codex_home);
reports.push(RunProfileProbeReport {
name: name.clone(),
order_index,
auth,
result: Err("runtime quota snapshot unavailable".to_string()),
});
cold_start_probe_jobs.push(RunProfileProbeJob {
name,
order_index,
codex_home: profile.codex_home.clone(),
});
}
}
cold_start_probe_jobs.sort_by_key(|job| {
let quota_summary = cached_usage_snapshots
.get(&job.name)
.filter(|snapshot| runtime_usage_snapshot_is_usable(snapshot, now))
.map(|snapshot| runtime_quota_summary_from_usage_snapshot(snapshot, route_kind))
.unwrap_or(RuntimeQuotaSummary {
five_hour: RuntimeQuotaWindowSummary {
status: RuntimeQuotaWindowStatus::Unknown,
remaining_percent: 0,
reset_at: i64::MAX,
},
weekly: RuntimeQuotaWindowSummary {
status: RuntimeQuotaWindowStatus::Unknown,
remaining_percent: 0,
reset_at: i64::MAX,
},
route_band: RuntimeQuotaPressureBand::Unknown,
});
(
runtime_quota_pressure_sort_key_for_route_from_summary(quota_summary),
job.order_index,
)
});
reports.sort_by_key(|report| report.order_index);
let mut candidates = ready_profile_candidates(
&reports,
include_code_review,
Some(current_profile.as_str()),
&state,
Some(&cached_usage_snapshots),
);
let best_candidate_order_index = candidates
.iter()
.filter(|candidate| !excluded_profiles.contains(&candidate.name))
.filter(|candidate| {
!runtime_profile_name_in_selection_backoff(
&candidate.name,
&retry_backoff_until,
&transport_backoff_until,
&route_circuit_open_until,
route_kind,
now,
)
})
.filter(|candidate| {
!runtime_profile_auth_failure_active_with_auth_cache(
&profile_health,
&profile_usage_auth,
&candidate.name,
now,
)
})
.filter(|candidate| {
runtime_profile_inflight_sort_key(&candidate.name, &profile_inflight)
< inflight_soft_limit
})
.map(|candidate| candidate.order_index)
.min();
let should_sync_probe_cold_start = !sync_probe_pressure_mode
&& !cold_start_probe_jobs.is_empty()
&& (candidates.is_empty()
|| best_candidate_order_index.is_none()
|| best_candidate_order_index.is_some_and(|best_order_index| {
cold_start_probe_jobs
.iter()
.any(|job| job.order_index < best_order_index)
}));
if sync_probe_pressure_mode && !cold_start_probe_jobs.is_empty() {
runtime_proxy_log(
shared,
format!(
"selection_skip_sync_probe route={} reason=pressure_mode cold_start_jobs={}",
runtime_route_kind_label(route_kind),
cold_start_probe_jobs.len(),
),
);
}
if should_sync_probe_cold_start {
let base_url = Some(upstream_base_url.clone());
let sync_jobs = cold_start_probe_jobs
.iter()
.filter(|job| {
candidates.is_empty()
|| best_candidate_order_index.is_none()
|| best_candidate_order_index
.is_some_and(|best_order_index| job.order_index < best_order_index)
})
.take(RUNTIME_PROFILE_SYNC_PROBE_FALLBACK_LIMIT)
.map(|job| RunProfileProbeJob {
name: job.name.clone(),
order_index: job.order_index,
codex_home: job.codex_home.clone(),
})
.collect::<Vec<_>>();
let probed_names = sync_jobs
.iter()
.map(|job| job.name.clone())
.collect::<BTreeSet<_>>();
let fresh_reports = map_parallel(sync_jobs, |job| {
let auth = read_auth_summary(&job.codex_home);
let result = if auth.quota_compatible {
fetch_usage(&job.codex_home, base_url.as_deref()).map_err(|err| err.to_string())
} else {
Err("auth mode is not quota-compatible".to_string())
};
RunProfileProbeReport {
name: job.name,
order_index: job.order_index,
auth,
result,
}
});
for report in &fresh_reports {
apply_runtime_profile_probe_result(
shared,
&report.name,
report.auth.clone(),
report.result.clone(),
)?;
}
{
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
cached_usage_snapshots = runtime.profile_usage_snapshots.clone();
retry_backoff_until = runtime.profile_retry_backoff_until.clone();
transport_backoff_until = runtime.profile_transport_backoff_until.clone();
route_circuit_open_until = runtime.profile_route_circuit_open_until.clone();
}
for fresh_report in fresh_reports {
if let Some(existing) = reports
.iter_mut()
.find(|report| report.name == fresh_report.name)
{
*existing = fresh_report;
}
}
reports.sort_by_key(|report| report.order_index);
candidates = ready_profile_candidates(
&reports,
include_code_review,
Some(current_profile.as_str()),
&state,
Some(&cached_usage_snapshots),
);
for job in cold_start_probe_jobs
.into_iter()
.filter(|job| !probed_names.contains(&job.name))
{
schedule_runtime_probe_refresh(shared, &job.name, &job.codex_home);
}
} else {
if sync_probe_pressure_mode && !cold_start_probe_jobs.is_empty() {
runtime_proxy_log(
shared,
format!(
"selection_skip_sync_probe route={} reason=pressure_mode cold_start_profiles={}",
runtime_route_kind_label(route_kind),
cold_start_probe_jobs.len()
),
);
}
for job in cold_start_probe_jobs {
schedule_runtime_probe_refresh(shared, &job.name, &job.codex_home);
}
}
let available_candidates = candidates
.into_iter()
.enumerate()
.filter(|(_, candidate)| !excluded_profiles.contains(&candidate.name))
.collect::<Vec<_>>();
let mut ready_candidates = available_candidates
.iter()
.filter(|(_, candidate)| {
!runtime_profile_name_in_selection_backoff(
&candidate.name,
&retry_backoff_until,
&transport_backoff_until,
&route_circuit_open_until,
route_kind,
now,
)
})
.collect::<Vec<_>>();
ready_candidates.sort_by_key(|(index, candidate)| {
(
runtime_quota_pressure_sort_key_for_route(&candidate.usage, route_kind),
runtime_quota_source_sort_key(route_kind, candidate.quota_source),
runtime_profile_inflight_sort_key(&candidate.name, &profile_inflight),
runtime_profile_health_sort_key(&candidate.name, &profile_health, now, route_kind),
*index,
runtime_profile_selection_jitter(shared, &candidate.name, route_kind),
)
});
for (index, candidate) in ready_candidates {
let inflight = runtime_profile_inflight_sort_key(&candidate.name, &profile_inflight);
if runtime_profile_auth_failure_active_with_auth_cache(
&profile_health,
&profile_usage_auth,
&candidate.name,
now,
) {
runtime_proxy_log(
shared,
format!(
"selection_skip_current route={} profile={} reason=auth_failure_backoff inflight={} health={} quota_source={} {}",
runtime_route_kind_label(route_kind),
candidate.name,
inflight,
runtime_profile_health_sort_key(
&candidate.name,
&profile_health,
now,
route_kind
),
runtime_quota_source_label(candidate.quota_source),
runtime_quota_summary_log_fields(runtime_quota_summary_for_route(
&candidate.usage,
route_kind
)),
),
);
continue;
}
if inflight >= inflight_soft_limit {
let quota_summary = runtime_quota_summary_for_route(&candidate.usage, route_kind);
runtime_proxy_log(
shared,
format!(
"selection_skip_current route={} profile={} reason=profile_inflight_soft_limit inflight={} soft_limit={} health={} quota_source={} {}",
runtime_route_kind_label(route_kind),
candidate.name,
inflight,
inflight_soft_limit,
runtime_profile_health_sort_key(
&candidate.name,
&profile_health,
now,
route_kind
),
runtime_quota_source_label(candidate.quota_source),
runtime_quota_summary_log_fields(quota_summary),
),
);
continue;
}
if !reserve_runtime_profile_route_circuit_half_open_probe(
shared,
&candidate.name,
route_kind,
)? {
let quota_summary = runtime_quota_summary_for_route(&candidate.usage, route_kind);
runtime_proxy_log(
shared,
format!(
"selection_skip_current route={} profile={} reason=route_circuit_half_open_probe_wait inflight={} health={} quota_source={} {}",
runtime_route_kind_label(route_kind),
candidate.name,
inflight,
runtime_profile_health_sort_key(
&candidate.name,
&profile_health,
now,
route_kind
),
runtime_quota_source_label(candidate.quota_source),
runtime_quota_summary_log_fields(quota_summary),
),
);
continue;
}
let quota_summary = runtime_quota_summary_for_route(&candidate.usage, route_kind);
runtime_proxy_log(
shared,
format!(
"selection_pick route={} profile={} mode=ready inflight={} health={} order={} {}",
runtime_route_kind_label(route_kind),
candidate.name,
inflight,
runtime_profile_health_sort_key(&candidate.name, &profile_health, now, route_kind),
index,
format!(
"quota_source={} {}",
runtime_quota_source_label(candidate.quota_source),
runtime_quota_summary_log_fields(quota_summary)
),
),
);
return Ok(Some(candidate.name.clone()));
}
let mut fallback_candidates = available_candidates.into_iter().collect::<Vec<_>>();
fallback_candidates.sort_by_key(|(index, candidate)| {
(
runtime_profile_backoff_sort_key(
&candidate.name,
&retry_backoff_until,
&transport_backoff_until,
&route_circuit_open_until,
route_kind,
now,
),
runtime_quota_pressure_sort_key_for_route(&candidate.usage, route_kind),
runtime_quota_source_sort_key(route_kind, candidate.quota_source),
runtime_profile_inflight_sort_key(&candidate.name, &profile_inflight),
runtime_profile_health_sort_key(&candidate.name, &profile_health, now, route_kind),
*index,
runtime_profile_selection_jitter(shared, &candidate.name, route_kind),
)
});
let mut fallback = None;
for (index, candidate) in fallback_candidates {
if runtime_profile_auth_failure_active_with_auth_cache(
&profile_health,
&profile_usage_auth,
&candidate.name,
now,
) {
runtime_proxy_log(
shared,
format!(
"selection_skip_current route={} profile={} reason=auth_failure_backoff inflight={} health={} quota_source={} {}",
runtime_route_kind_label(route_kind),
candidate.name,
runtime_profile_inflight_sort_key(&candidate.name, &profile_inflight),
runtime_profile_health_sort_key(
&candidate.name,
&profile_health,
now,
route_kind
),
runtime_quota_source_label(candidate.quota_source),
runtime_quota_summary_log_fields(runtime_quota_summary_for_route(
&candidate.usage,
route_kind
)),
),
);
continue;
}
if !reserve_runtime_profile_route_circuit_half_open_probe(
shared,
&candidate.name,
route_kind,
)? {
let quota_summary = runtime_quota_summary_for_route(&candidate.usage, route_kind);
runtime_proxy_log(
shared,
format!(
"selection_skip_current route={} profile={} reason=route_circuit_half_open_probe_wait inflight={} health={} quota_source={} {}",
runtime_route_kind_label(route_kind),
candidate.name,
runtime_profile_inflight_sort_key(&candidate.name, &profile_inflight),
runtime_profile_health_sort_key(
&candidate.name,
&profile_health,
now,
route_kind
),
runtime_quota_source_label(candidate.quota_source),
runtime_quota_summary_log_fields(quota_summary),
),
);
continue;
}
let quota_summary = runtime_quota_summary_for_route(&candidate.usage, route_kind);
runtime_proxy_log(
shared,
format!(
"selection_pick route={} profile={} mode=backoff inflight={} health={} backoff={:?} order={} quota_source={} {}",
runtime_route_kind_label(route_kind),
candidate.name,
runtime_profile_inflight_sort_key(&candidate.name, &profile_inflight),
runtime_profile_health_sort_key(&candidate.name, &profile_health, now, route_kind),
runtime_profile_backoff_sort_key(
&candidate.name,
&retry_backoff_until,
&transport_backoff_until,
&route_circuit_open_until,
route_kind,
now,
),
index,
runtime_quota_source_label(candidate.quota_source),
runtime_quota_summary_log_fields(quota_summary),
),
);
fallback = Some(candidate.name);
break;
}
if fallback.is_none() {
runtime_proxy_log(
shared,
format!(
"selection_pick route={} profile=none mode=exhausted excluded_count={}",
runtime_route_kind_label(route_kind),
excluded_profiles.len()
),
);
}
Ok(fallback)
}
fn runtime_waitable_inflight_candidates_for_route(
shared: &RuntimeRotationProxyShared,
excluded_profiles: &BTreeSet<String>,
route_kind: RuntimeRouteKind,
wait_affinity_owner: Option<&str>,
) -> Result<BTreeSet<String>> {
let now = Local::now().timestamp();
let pressure_mode = runtime_proxy_pressure_mode_active_for_route(shared, route_kind);
let inflight_soft_limit = runtime_profile_inflight_soft_limit(route_kind, pressure_mode);
let (
state,
current_profile,
include_code_review,
cached_reports,
cached_usage_snapshots,
profile_usage_auth,
retry_backoff_until,
transport_backoff_until,
route_circuit_open_until,
profile_inflight,
profile_health,
) = {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
prune_runtime_profile_selection_backoff(&mut runtime, now);
(
runtime.state.clone(),
runtime.current_profile.clone(),
runtime.include_code_review,
runtime.profile_probe_cache.clone(),
runtime.profile_usage_snapshots.clone(),
runtime.profile_usage_auth.clone(),
runtime.profile_retry_backoff_until.clone(),
runtime.profile_transport_backoff_until.clone(),
runtime.profile_route_circuit_open_until.clone(),
runtime.profile_inflight.clone(),
runtime.profile_health.clone(),
)
};
let mut waitable_profiles = BTreeSet::new();
let mut reports = Vec::new();
for (order_index, name) in active_profile_selection_order(&state, ¤t_profile)
.into_iter()
.enumerate()
{
if excluded_profiles.contains(&name) {
continue;
}
if wait_affinity_owner.is_some_and(|owner| owner != name) {
continue;
}
let Some(entry) = cached_reports.get(&name) else {
continue;
};
reports.push(RunProfileProbeReport {
name,
order_index,
auth: entry.auth.clone(),
result: entry.result.clone(),
});
}
for candidate in ready_profile_candidates(
&reports,
include_code_review,
Some(current_profile.as_str()),
&state,
Some(&cached_usage_snapshots),
) {
if excluded_profiles.contains(&candidate.name) {
continue;
}
if runtime_profile_name_in_selection_backoff(
&candidate.name,
&retry_backoff_until,
&transport_backoff_until,
&route_circuit_open_until,
route_kind,
now,
) {
continue;
}
if runtime_profile_auth_failure_active_with_auth_cache(
&profile_health,
&profile_usage_auth,
&candidate.name,
now,
) {
continue;
}
if runtime_quota_precommit_guard_reason(
runtime_quota_summary_for_route(&candidate.usage, route_kind),
route_kind,
)
.is_some()
{
continue;
}
if runtime_profile_inflight_sort_key(&candidate.name, &profile_inflight)
>= inflight_soft_limit
{
waitable_profiles.insert(candidate.name.clone());
}
}
Ok(waitable_profiles)
}
fn runtime_any_waited_candidate_relieved(
shared: &RuntimeRotationProxyShared,
waited_profiles: &BTreeSet<String>,
route_kind: RuntimeRouteKind,
) -> Result<bool> {
if waited_profiles.is_empty() {
return Ok(false);
}
let now = Local::now().timestamp();
let pressure_mode = runtime_proxy_pressure_mode_active_for_route(shared, route_kind);
let inflight_soft_limit = runtime_profile_inflight_soft_limit(route_kind, pressure_mode);
let (
state,
current_profile,
include_code_review,
cached_reports,
cached_usage_snapshots,
profile_usage_auth,
retry_backoff_until,
transport_backoff_until,
route_circuit_open_until,
profile_inflight,
profile_health,
) = {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
prune_runtime_profile_selection_backoff(&mut runtime, now);
(
runtime.state.clone(),
runtime.current_profile.clone(),
runtime.include_code_review,
runtime.profile_probe_cache.clone(),
runtime.profile_usage_snapshots.clone(),
runtime.profile_usage_auth.clone(),
runtime.profile_retry_backoff_until.clone(),
runtime.profile_transport_backoff_until.clone(),
runtime.profile_route_circuit_open_until.clone(),
runtime.profile_inflight.clone(),
runtime.profile_health.clone(),
)
};
let mut reports = Vec::new();
for (order_index, name) in active_profile_selection_order(&state, ¤t_profile)
.into_iter()
.enumerate()
{
if !waited_profiles.contains(&name) {
continue;
}
let Some(entry) = cached_reports.get(&name) else {
continue;
};
reports.push(RunProfileProbeReport {
name,
order_index,
auth: entry.auth.clone(),
result: entry.result.clone(),
});
}
for candidate in ready_profile_candidates(
&reports,
include_code_review,
Some(current_profile.as_str()),
&state,
Some(&cached_usage_snapshots),
) {
if !waited_profiles.contains(&candidate.name) {
continue;
}
if runtime_profile_name_in_selection_backoff(
&candidate.name,
&retry_backoff_until,
&transport_backoff_until,
&route_circuit_open_until,
route_kind,
now,
) {
continue;
}
if runtime_profile_auth_failure_active_with_auth_cache(
&profile_health,
&profile_usage_auth,
&candidate.name,
now,
) {
continue;
}
if runtime_quota_precommit_guard_reason(
runtime_quota_summary_for_route(&candidate.usage, route_kind),
route_kind,
)
.is_some()
{
continue;
}
if runtime_profile_inflight_sort_key(&candidate.name, &profile_inflight)
< inflight_soft_limit
{
return Ok(true);
}
}
Ok(false)
}
fn runtime_proxy_maybe_wait_for_interactive_inflight_relief(
request_id: u64,
request: &RuntimeProxyRequest,
shared: &RuntimeRotationProxyShared,
excluded_profiles: &BTreeSet<String>,
route_kind: RuntimeRouteKind,
selection_started_at: Instant,
continuation: bool,
wait_affinity_owner: Option<&str>,
) -> Result<bool> {
let pressure_mode = runtime_proxy_pressure_mode_active_for_route(shared, route_kind);
let wait_budget = runtime_proxy_request_inflight_wait_budget(request, pressure_mode);
if wait_budget.is_zero() {
return Ok(false);
}
let waited_profiles = runtime_waitable_inflight_candidates_for_route(
shared,
excluded_profiles,
route_kind,
wait_affinity_owner,
)?;
if waited_profiles.is_empty() {
return Ok(false);
}
let (_, precommit_budget) = runtime_proxy_precommit_budget(continuation, pressure_mode);
let remaining_budget = precommit_budget.saturating_sub(selection_started_at.elapsed());
let total_wait_budget = wait_budget.min(remaining_budget);
if total_wait_budget.is_zero() {
return Ok(false);
}
let wait_deadline = Instant::now() + total_wait_budget;
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http inflight_wait_started route={} wait_ms={}",
runtime_route_kind_label(route_kind),
total_wait_budget.as_millis()
),
);
let started_at = Instant::now();
let mut observed_revision = runtime_profile_inflight_release_revision(shared);
let mut signaled = false;
let mut useful_relief = false;
let mut wake_source = RuntimeProfileInFlightWaitOutcome::Timeout;
loop {
let remaining_wait = wait_deadline.saturating_duration_since(Instant::now());
if remaining_wait.is_zero() {
break;
}
match runtime_profile_inflight_wait_outcome_since(shared, remaining_wait, observed_revision)
{
RuntimeProfileInFlightWaitOutcome::InflightRelease => {
signaled = true;
wake_source = RuntimeProfileInFlightWaitOutcome::InflightRelease;
observed_revision = runtime_profile_inflight_release_revision(shared);
useful_relief =
runtime_any_waited_candidate_relieved(shared, &waited_profiles, route_kind)?;
if useful_relief {
break;
}
}
RuntimeProfileInFlightWaitOutcome::OtherNotify => {
signaled = true;
wake_source = RuntimeProfileInFlightWaitOutcome::OtherNotify;
observed_revision = runtime_profile_inflight_release_revision(shared);
}
RuntimeProfileInFlightWaitOutcome::Timeout => {
if !signaled {
wake_source = RuntimeProfileInFlightWaitOutcome::Timeout;
}
break;
}
}
}
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http inflight_wait_finished route={} waited_ms={} signaled={signaled} useful={} wake_source={}",
runtime_route_kind_label(route_kind),
started_at.elapsed().as_millis(),
useful_relief,
runtime_profile_inflight_wait_outcome_label(wake_source),
),
);
Ok(useful_relief)
}
fn runtime_profile_usage_cache_is_fresh(entry: &RuntimeProfileProbeCacheEntry, now: i64) -> bool {
now.saturating_sub(entry.checked_at) <= RUNTIME_PROFILE_USAGE_CACHE_FRESH_SECONDS
}
fn runtime_profile_probe_cache_freshness(
entry: &RuntimeProfileProbeCacheEntry,
now: i64,
) -> RuntimeProbeCacheFreshness {
let age = now.saturating_sub(entry.checked_at);
if age <= RUNTIME_PROFILE_USAGE_CACHE_FRESH_SECONDS {
RuntimeProbeCacheFreshness::Fresh
} else if age <= RUNTIME_PROFILE_USAGE_CACHE_STALE_GRACE_SECONDS {
RuntimeProbeCacheFreshness::StaleUsable
} else {
RuntimeProbeCacheFreshness::Expired
}
}
fn update_runtime_profile_probe_cache_with_usage(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
usage: UsageResponse,
) -> Result<()> {
let auth = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?
.state
.profiles
.get(profile_name)
.map(|profile| read_auth_summary(&profile.codex_home))
.unwrap_or(AuthSummary {
label: "chatgpt".to_string(),
quota_compatible: true,
});
apply_runtime_profile_probe_result(shared, profile_name, auth, Ok(usage))
}
fn runtime_proxy_current_profile(shared: &RuntimeRotationProxyShared) -> Result<String> {
Ok(shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?
.current_profile
.clone())
}
fn runtime_profile_in_retry_backoff(
runtime: &RuntimeRotationState,
profile_name: &str,
now: i64,
) -> bool {
runtime
.profile_retry_backoff_until
.get(profile_name)
.copied()
.is_some_and(|until| until > now)
}
fn runtime_profile_in_transport_backoff(
runtime: &RuntimeRotationState,
profile_name: &str,
route_kind: RuntimeRouteKind,
now: i64,
) -> bool {
runtime_profile_transport_backoff_until_from_map(
&runtime.profile_transport_backoff_until,
profile_name,
route_kind,
now,
)
.is_some()
}
fn runtime_profile_inflight_count(runtime: &RuntimeRotationState, profile_name: &str) -> usize {
runtime
.profile_inflight
.get(profile_name)
.copied()
.unwrap_or(0)
}
fn runtime_profile_inflight_hard_limit_context(context: &str) -> usize {
runtime_profile_inflight_weight(context)
}
fn runtime_profile_inflight_hard_limited_for_context(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
context: &str,
) -> Result<bool> {
let hard_limit = runtime_proxy_profile_inflight_hard_limit();
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
Ok(runtime_profile_inflight_count(&runtime, profile_name)
.saturating_add(runtime_profile_inflight_hard_limit_context(context))
> hard_limit)
}
fn runtime_profile_in_selection_backoff(
runtime: &RuntimeRotationState,
profile_name: &str,
route_kind: RuntimeRouteKind,
now: i64,
) -> bool {
runtime_profile_in_retry_backoff(runtime, profile_name, now)
|| runtime_profile_in_transport_backoff(runtime, profile_name, route_kind, now)
}
fn runtime_profile_health_score(
runtime: &RuntimeRotationState,
profile_name: &str,
now: i64,
route_kind: RuntimeRouteKind,
) -> u32 {
runtime_profile_global_health_score(runtime, profile_name, now)
.saturating_add(runtime_profile_route_health_score(
runtime,
profile_name,
now,
route_kind,
))
.saturating_add(runtime_profile_route_coupling_score(
runtime,
profile_name,
now,
route_kind,
))
}
fn runtime_route_coupled_kinds(route_kind: RuntimeRouteKind) -> &'static [RuntimeRouteKind] {
match route_kind {
RuntimeRouteKind::Responses => &[RuntimeRouteKind::Websocket],
RuntimeRouteKind::Websocket => &[RuntimeRouteKind::Responses],
RuntimeRouteKind::Compact => &[RuntimeRouteKind::Standard],
RuntimeRouteKind::Standard => &[RuntimeRouteKind::Compact],
}
}
fn runtime_profile_route_coupling_score(
runtime: &RuntimeRotationState,
profile_name: &str,
now: i64,
route_kind: RuntimeRouteKind,
) -> u32 {
runtime_profile_route_coupling_score_from_map(
&runtime.profile_health,
profile_name,
now,
route_kind,
)
}
fn runtime_profile_route_coupling_score_from_map(
profile_health: &BTreeMap<String, RuntimeProfileHealth>,
profile_name: &str,
now: i64,
route_kind: RuntimeRouteKind,
) -> u32 {
runtime_route_coupled_kinds(route_kind)
.iter()
.copied()
.map(|coupled_kind| {
let route_score = runtime_profile_effective_health_score_from_map(
profile_health,
&runtime_profile_route_health_key(profile_name, coupled_kind),
now,
);
let bad_pairing_score = runtime_profile_effective_score_from_map(
profile_health,
&runtime_profile_route_bad_pairing_key(profile_name, coupled_kind),
now,
RUNTIME_PROFILE_BAD_PAIRING_DECAY_SECONDS,
);
route_score
.saturating_add(bad_pairing_score)
.saturating_div(2)
})
.fold(0, u32::saturating_add)
}
fn runtime_profile_selection_jitter(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> u64 {
let mut hasher = DefaultHasher::new();
shared
.request_sequence
.load(Ordering::Relaxed)
.hash(&mut hasher);
profile_name.hash(&mut hasher);
runtime_route_kind_label(route_kind).hash(&mut hasher);
hasher.finish()
}
fn prune_runtime_profile_retry_backoff(runtime: &mut RuntimeRotationState, now: i64) {
runtime
.profile_retry_backoff_until
.retain(|_, until| *until > now);
}
fn prune_runtime_profile_transport_backoff(runtime: &mut RuntimeRotationState, now: i64) {
runtime
.profile_transport_backoff_until
.retain(|_, until| *until > now);
}
fn prune_runtime_profile_route_circuits(runtime: &mut RuntimeRotationState, now: i64) {
runtime
.profile_route_circuit_open_until
.retain(|key, until| {
if *until > now {
return true;
}
let health_key = runtime_profile_route_circuit_health_key(key);
runtime_profile_effective_health_score_from_map(
&runtime.profile_health,
&health_key,
now,
) > 0
});
}
fn prune_runtime_profile_selection_backoff(runtime: &mut RuntimeRotationState, now: i64) {
prune_runtime_profile_retry_backoff(runtime, now);
prune_runtime_profile_transport_backoff(runtime, now);
prune_runtime_profile_route_circuits(runtime, now);
}
fn runtime_profile_name_in_selection_backoff(
profile_name: &str,
retry_backoff_until: &BTreeMap<String, i64>,
transport_backoff_until: &BTreeMap<String, i64>,
route_circuit_open_until: &BTreeMap<String, i64>,
route_kind: RuntimeRouteKind,
now: i64,
) -> bool {
retry_backoff_until
.get(profile_name)
.copied()
.is_some_and(|until| until > now)
|| runtime_profile_transport_backoff_until_from_map(
transport_backoff_until,
profile_name,
route_kind,
now,
)
.is_some()
|| route_circuit_open_until
.get(&runtime_profile_route_circuit_key(profile_name, route_kind))
.copied()
.is_some_and(|until| until > now)
}
fn runtime_profile_backoff_sort_key(
profile_name: &str,
retry_backoff_until: &BTreeMap<String, i64>,
transport_backoff_until: &BTreeMap<String, i64>,
route_circuit_open_until: &BTreeMap<String, i64>,
route_kind: RuntimeRouteKind,
now: i64,
) -> (usize, i64, i64, i64) {
let retry_until = retry_backoff_until
.get(profile_name)
.copied()
.filter(|until| *until > now);
let transport_until = runtime_profile_transport_backoff_until_from_map(
transport_backoff_until,
profile_name,
route_kind,
now,
);
let circuit_until = route_circuit_open_until
.get(&runtime_profile_route_circuit_key(profile_name, route_kind))
.copied()
.filter(|until| *until > now);
match (circuit_until, transport_until, retry_until) {
(None, None, None) => (0, 0, 0, 0),
(Some(circuit_until), None, None) => (1, circuit_until, 0, 0),
(None, Some(transport_until), None) => (2, transport_until, 0, 0),
(None, None, Some(retry_until)) => (3, retry_until, 0, 0),
(Some(circuit_until), Some(transport_until), None) => (
4,
circuit_until.min(transport_until),
circuit_until.max(transport_until),
0,
),
(Some(circuit_until), None, Some(retry_until)) => (
5,
circuit_until.min(retry_until),
circuit_until.max(retry_until),
0,
),
(None, Some(transport_until), Some(retry_until)) => (
6,
transport_until.min(retry_until),
transport_until.max(retry_until),
0,
),
(Some(circuit_until), Some(transport_until), Some(retry_until)) => (
7,
circuit_until.min(transport_until.min(retry_until)),
circuit_until.max(transport_until.max(retry_until)),
retry_until,
),
}
}
fn runtime_profile_health_sort_key(
profile_name: &str,
profile_health: &BTreeMap<String, RuntimeProfileHealth>,
now: i64,
route_kind: RuntimeRouteKind,
) -> u32 {
runtime_profile_effective_health_score_from_map(profile_health, profile_name, now)
.saturating_add(runtime_profile_effective_health_score_from_map(
profile_health,
&runtime_profile_route_health_key(profile_name, route_kind),
now,
))
.saturating_add(runtime_profile_effective_score_from_map(
profile_health,
&runtime_profile_route_bad_pairing_key(profile_name, route_kind),
now,
RUNTIME_PROFILE_BAD_PAIRING_DECAY_SECONDS,
))
.saturating_add(runtime_profile_route_coupling_score_from_map(
profile_health,
profile_name,
now,
route_kind,
))
.saturating_add(runtime_profile_route_performance_score(
profile_health,
profile_name,
now,
route_kind,
))
}
fn runtime_profile_inflight_sort_key(
profile_name: &str,
profile_inflight: &BTreeMap<String, usize>,
) -> usize {
profile_inflight.get(profile_name).copied().unwrap_or(0)
}
fn runtime_profile_inflight_weight(context: &str) -> usize {
match context {
"websocket_session" | "responses_http" => 2,
_ => 1,
}
}
fn runtime_route_kind_inflight_context(route_kind: RuntimeRouteKind) -> &'static str {
match route_kind {
RuntimeRouteKind::Responses => "responses_http",
RuntimeRouteKind::Compact => "compact_http",
RuntimeRouteKind::Websocket => "websocket_session",
RuntimeRouteKind::Standard => "standard_http",
}
}
fn runtime_profile_inflight_soft_limit(route_kind: RuntimeRouteKind, pressure_mode: bool) -> usize {
let base = runtime_proxy_profile_inflight_soft_limit().max(1);
if !pressure_mode {
return base;
}
match route_kind {
RuntimeRouteKind::Responses | RuntimeRouteKind::Websocket => base.saturating_sub(1).max(1),
RuntimeRouteKind::Compact | RuntimeRouteKind::Standard => base.saturating_sub(2).max(1),
}
}
fn runtime_quota_pressure_band_reason(band: RuntimeQuotaPressureBand) -> &'static str {
match band {
RuntimeQuotaPressureBand::Healthy => "quota_healthy",
RuntimeQuotaPressureBand::Thin => "quota_thin",
RuntimeQuotaPressureBand::Critical => "quota_critical",
RuntimeQuotaPressureBand::Exhausted => "quota_exhausted",
RuntimeQuotaPressureBand::Unknown => "quota_unknown",
}
}
fn runtime_quota_window_status_reason(status: RuntimeQuotaWindowStatus) -> &'static str {
match status {
RuntimeQuotaWindowStatus::Ready => "ready",
RuntimeQuotaWindowStatus::Thin => "thin",
RuntimeQuotaWindowStatus::Critical => "critical",
RuntimeQuotaWindowStatus::Exhausted => "exhausted",
RuntimeQuotaWindowStatus::Unknown => "unknown",
}
}
fn runtime_quota_window_summary(usage: &UsageResponse, label: &str) -> RuntimeQuotaWindowSummary {
let Some(window) = required_main_window_snapshot(usage, label) else {
return RuntimeQuotaWindowSummary {
status: RuntimeQuotaWindowStatus::Unknown,
remaining_percent: 0,
reset_at: i64::MAX,
};
};
let status = if window.remaining_percent == 0 {
RuntimeQuotaWindowStatus::Exhausted
} else if window.remaining_percent <= 5 {
RuntimeQuotaWindowStatus::Critical
} else if window.remaining_percent <= 15 {
RuntimeQuotaWindowStatus::Thin
} else {
RuntimeQuotaWindowStatus::Ready
};
RuntimeQuotaWindowSummary {
status,
remaining_percent: window.remaining_percent,
reset_at: window.reset_at,
}
}
fn runtime_quota_summary_for_route(
usage: &UsageResponse,
route_kind: RuntimeRouteKind,
) -> RuntimeQuotaSummary {
RuntimeQuotaSummary {
five_hour: runtime_quota_window_summary(usage, "5h"),
weekly: runtime_quota_window_summary(usage, "weekly"),
route_band: runtime_quota_pressure_band_for_route(usage, route_kind),
}
}
fn runtime_quota_summary_blocking_reset_at(
summary: RuntimeQuotaSummary,
route_kind: RuntimeRouteKind,
) -> Option<i64> {
let floor_percent = runtime_quota_precommit_floor_percent(route_kind);
[summary.five_hour, summary.weekly]
.into_iter()
.filter(|window| runtime_quota_window_precommit_guard(*window, floor_percent))
.map(|window| window.reset_at)
.filter(|reset_at| *reset_at != i64::MAX)
.max()
}
fn runtime_profile_usage_snapshot_from_usage(usage: &UsageResponse) -> RuntimeProfileUsageSnapshot {
let five_hour = runtime_quota_window_summary(usage, "5h");
let weekly = runtime_quota_window_summary(usage, "weekly");
RuntimeProfileUsageSnapshot {
checked_at: Local::now().timestamp(),
five_hour_status: five_hour.status,
five_hour_remaining_percent: five_hour.remaining_percent,
five_hour_reset_at: five_hour.reset_at,
weekly_status: weekly.status,
weekly_remaining_percent: weekly.remaining_percent,
weekly_reset_at: weekly.reset_at,
}
}
fn runtime_quota_summary_from_usage_snapshot(
snapshot: &RuntimeProfileUsageSnapshot,
route_kind: RuntimeRouteKind,
) -> RuntimeQuotaSummary {
let five_hour = RuntimeQuotaWindowSummary {
status: snapshot.five_hour_status,
remaining_percent: snapshot.five_hour_remaining_percent,
reset_at: snapshot.five_hour_reset_at,
};
let weekly = RuntimeQuotaWindowSummary {
status: snapshot.weekly_status,
remaining_percent: snapshot.weekly_remaining_percent,
reset_at: snapshot.weekly_reset_at,
};
let route_band = [
five_hour.status,
weekly.status,
match route_kind {
RuntimeRouteKind::Responses | RuntimeRouteKind::Websocket => weekly.status,
RuntimeRouteKind::Compact | RuntimeRouteKind::Standard => five_hour.status,
},
]
.into_iter()
.fold(RuntimeQuotaPressureBand::Healthy, |band, status| {
band.max(match status {
RuntimeQuotaWindowStatus::Ready => RuntimeQuotaPressureBand::Healthy,
RuntimeQuotaWindowStatus::Thin => RuntimeQuotaPressureBand::Thin,
RuntimeQuotaWindowStatus::Critical => RuntimeQuotaPressureBand::Critical,
RuntimeQuotaWindowStatus::Exhausted => RuntimeQuotaPressureBand::Exhausted,
RuntimeQuotaWindowStatus::Unknown => RuntimeQuotaPressureBand::Unknown,
})
});
RuntimeQuotaSummary {
five_hour,
weekly,
route_band,
}
}
fn runtime_profile_usage_snapshot_hold_active(
snapshot: &RuntimeProfileUsageSnapshot,
now: i64,
) -> bool {
[
(snapshot.five_hour_status, snapshot.five_hour_reset_at),
(snapshot.weekly_status, snapshot.weekly_reset_at),
]
.into_iter()
.any(|(status, reset_at)| {
matches!(status, RuntimeQuotaWindowStatus::Exhausted)
&& reset_at != i64::MAX
&& reset_at > now
})
}
fn runtime_profile_usage_snapshot_hold_expired(
snapshot: &RuntimeProfileUsageSnapshot,
now: i64,
) -> bool {
[
(snapshot.five_hour_status, snapshot.five_hour_reset_at),
(snapshot.weekly_status, snapshot.weekly_reset_at),
]
.into_iter()
.any(|(status, reset_at)| {
matches!(status, RuntimeQuotaWindowStatus::Exhausted)
&& reset_at != i64::MAX
&& reset_at <= now
})
}
fn runtime_proxy_quota_reset_at_from_message(message: &str) -> Option<i64> {
let marker = message.to_ascii_lowercase().find("try again at ")?;
let candidate = message
.get(marker + "try again at ".len()..)?
.trim()
.trim_end_matches('.');
let now = Local::now();
if let Some((time_text, meridiem)) = candidate
.split_whitespace()
.collect::<Vec<_>>()
.get(..2)
.and_then(|parts| {
if parts.len() == 2 {
Some((parts[0], parts[1]))
} else {
None
}
})
&& let Ok(time) =
chrono::NaiveTime::parse_from_str(&format!("{time_text} {meridiem}"), "%I:%M %p")
{
let mut naive = now.date_naive().and_time(time);
let mut parsed = Local
.from_local_datetime(&naive)
.single()
.or_else(|| Local.from_local_datetime(&naive).earliest())?;
if parsed.timestamp() <= now.timestamp() {
naive = naive.checked_add_signed(chrono::Duration::days(1))?;
parsed = Local
.from_local_datetime(&naive)
.single()
.or_else(|| Local.from_local_datetime(&naive).earliest())?;
}
return Some(parsed.timestamp());
}
let mut parts = candidate
.split_whitespace()
.map(|part| part.to_string())
.collect::<Vec<_>>();
if parts.len() < 5 {
return None;
}
let day_digits = parts[1]
.trim_end_matches(',')
.chars()
.take_while(|ch| ch.is_ascii_digit())
.collect::<String>();
if day_digits.is_empty() {
return None;
}
parts[1] = format!("{day_digits},");
let normalized = parts[..5].join(" ");
let naive = chrono::NaiveDateTime::parse_from_str(&normalized, "%b %d, %Y %I:%M %p").ok()?;
Local
.from_local_datetime(&naive)
.single()
.or_else(|| Local.from_local_datetime(&naive).earliest())
.map(|datetime| datetime.timestamp())
}
fn runtime_profile_known_quota_reset_at(
runtime: &RuntimeRotationState,
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> Option<i64> {
let now = Local::now().timestamp();
runtime
.profile_probe_cache
.get(profile_name)
.and_then(|entry| entry.result.as_ref().ok())
.map(|usage| runtime_quota_summary_for_route(usage, route_kind))
.and_then(|summary| runtime_quota_summary_blocking_reset_at(summary, route_kind))
.filter(|reset_at| *reset_at > now)
.or_else(|| {
runtime
.profile_usage_snapshots
.get(profile_name)
.and_then(|snapshot| {
runtime_quota_summary_blocking_reset_at(
runtime_quota_summary_from_usage_snapshot(snapshot, route_kind),
route_kind,
)
})
.filter(|reset_at| *reset_at > now)
})
}
fn mark_runtime_profile_quota_quarantine(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
quota_message: Option<&str>,
) -> Result<()> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
prune_runtime_profile_selection_backoff(&mut runtime, now);
let resolved_reset_at = quota_message
.and_then(runtime_proxy_quota_reset_at_from_message)
.or_else(|| runtime_profile_known_quota_reset_at(&runtime, profile_name, route_kind))
.filter(|reset_at| *reset_at > now);
let until = resolved_reset_at
.unwrap_or_else(|| now.saturating_add(RUNTIME_PROFILE_QUOTA_QUARANTINE_FALLBACK_SECONDS));
runtime.profile_probe_cache.remove(profile_name);
let snapshot = runtime
.profile_usage_snapshots
.entry(profile_name.to_string())
.or_insert(RuntimeProfileUsageSnapshot {
checked_at: now,
five_hour_status: RuntimeQuotaWindowStatus::Unknown,
five_hour_remaining_percent: 0,
five_hour_reset_at: i64::MAX,
weekly_status: RuntimeQuotaWindowStatus::Unknown,
weekly_remaining_percent: 0,
weekly_reset_at: i64::MAX,
});
snapshot.checked_at = now;
snapshot.five_hour_status = RuntimeQuotaWindowStatus::Exhausted;
snapshot.five_hour_remaining_percent = 0;
snapshot.five_hour_reset_at = if snapshot.five_hour_reset_at == i64::MAX {
until
} else {
snapshot.five_hour_reset_at.max(until)
};
snapshot.weekly_status = RuntimeQuotaWindowStatus::Exhausted;
snapshot.weekly_remaining_percent = 0;
snapshot.weekly_reset_at = if snapshot.weekly_reset_at == i64::MAX {
until
} else {
snapshot.weekly_reset_at.max(until)
};
runtime
.profile_retry_backoff_until
.entry(profile_name.to_string())
.and_modify(|current| *current = (*current).max(until))
.or_insert(until);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("profile_retry_backoff:{profile_name}"),
);
drop(runtime);
runtime_proxy_log(
shared,
format!(
"profile_quota_quarantine profile={profile_name} route={} until={} reset_at={} message={}",
runtime_route_kind_label(route_kind),
until,
resolved_reset_at.unwrap_or(i64::MAX),
quota_message.unwrap_or("-"),
),
);
runtime_proxy_log(
shared,
format!("profile_retry_backoff profile={profile_name} until={until}"),
);
Ok(())
}
fn usage_from_runtime_usage_snapshot(snapshot: &RuntimeProfileUsageSnapshot) -> UsageResponse {
UsageResponse {
email: None,
plan_type: None,
rate_limit: Some(WindowPair {
primary_window: Some(UsageWindow {
used_percent: Some((100 - snapshot.five_hour_remaining_percent).clamp(0, 100)),
reset_at: (snapshot.five_hour_reset_at != i64::MAX)
.then_some(snapshot.five_hour_reset_at),
limit_window_seconds: Some(18_000),
}),
secondary_window: Some(UsageWindow {
used_percent: Some((100 - snapshot.weekly_remaining_percent).clamp(0, 100)),
reset_at: (snapshot.weekly_reset_at != i64::MAX)
.then_some(snapshot.weekly_reset_at),
limit_window_seconds: Some(604_800),
}),
}),
code_review_rate_limit: None,
additional_rate_limits: Vec::new(),
}
}
fn runtime_profile_backoffs_snapshot(runtime: &RuntimeRotationState) -> RuntimeProfileBackoffs {
RuntimeProfileBackoffs {
retry_backoff_until: runtime.profile_retry_backoff_until.clone(),
transport_backoff_until: runtime.profile_transport_backoff_until.clone(),
route_circuit_open_until: runtime.profile_route_circuit_open_until.clone(),
}
}
fn runtime_continuation_store_snapshot(runtime: &RuntimeRotationState) -> RuntimeContinuationStore {
RuntimeContinuationStore {
response_profile_bindings: runtime.state.response_profile_bindings.clone(),
session_profile_bindings: runtime.state.session_profile_bindings.clone(),
turn_state_bindings: runtime.turn_state_bindings.clone(),
session_id_bindings: runtime.session_id_bindings.clone(),
statuses: runtime.continuation_statuses.clone(),
}
}
fn runtime_soften_persisted_backoff_map_for_startup(
backoffs: &mut BTreeMap<String, i64>,
now: i64,
max_future_seconds: i64,
) -> bool {
let max_until = now.saturating_add(max_future_seconds.max(0));
let mut changed = false;
backoffs.retain(|_, until| {
if *until <= now {
changed = true;
return false;
}
let next_until = (*until).min(max_until);
if next_until != *until {
changed = true;
}
*until = next_until;
true
});
changed
}
fn runtime_route_kind_from_label(label: &str) -> Option<RuntimeRouteKind> {
match label {
"responses" => Some(RuntimeRouteKind::Responses),
"compact" => Some(RuntimeRouteKind::Compact),
"websocket" => Some(RuntimeRouteKind::Websocket),
"standard" => Some(RuntimeRouteKind::Standard),
_ => None,
}
}
fn runtime_profile_route_circuit_probe_seconds(
profile_scores: &BTreeMap<String, RuntimeProfileHealth>,
route_profile_key: &str,
now: i64,
) -> i64 {
let Some((route_label, profile_name)) =
runtime_profile_route_key_parts(route_profile_key, "__route_circuit__:")
else {
return RUNTIME_PROFILE_CIRCUIT_HALF_OPEN_PROBE_SECONDS;
};
let Some(route_kind) = runtime_route_kind_from_label(route_label) else {
return RUNTIME_PROFILE_CIRCUIT_HALF_OPEN_PROBE_SECONDS;
};
let score = runtime_profile_effective_health_score_from_map(
profile_scores,
&runtime_profile_route_health_key(profile_name, route_kind),
now,
);
runtime_profile_circuit_half_open_probe_seconds(score)
}
fn runtime_soften_persisted_route_circuits_for_startup(
route_circuit_open_until: &mut BTreeMap<String, i64>,
profile_scores: &BTreeMap<String, RuntimeProfileHealth>,
now: i64,
) -> bool {
let mut changed = false;
route_circuit_open_until.retain(|route_profile_key, until| {
if *until <= now {
changed = true;
return false;
}
let max_until = now.saturating_add(runtime_profile_route_circuit_probe_seconds(
profile_scores,
route_profile_key,
now,
));
let next_until = (*until).min(max_until);
if next_until != *until {
changed = true;
}
*until = next_until;
true
});
changed
}
fn runtime_soften_persisted_backoffs_for_startup(
backoffs: &mut RuntimeProfileBackoffs,
profile_scores: &BTreeMap<String, RuntimeProfileHealth>,
now: i64,
) -> bool {
let mut changed = runtime_soften_persisted_backoff_map_for_startup(
&mut backoffs.transport_backoff_until,
now,
RUNTIME_PROFILE_TRANSPORT_BACKOFF_SECONDS,
);
changed = runtime_soften_persisted_route_circuits_for_startup(
&mut backoffs.route_circuit_open_until,
profile_scores,
now,
) || changed;
changed
}
const RUNTIME_PROFILE_CIRCUIT_OPEN_THRESHOLD: u32 = 4;
const RUNTIME_PROFILE_CIRCUIT_OPEN_SECONDS: i64 = 20;
const RUNTIME_PROFILE_CIRCUIT_OPEN_MAX_SECONDS: i64 = if cfg!(test) { 320 } else { 600 };
const RUNTIME_PROFILE_CIRCUIT_HALF_OPEN_PROBE_SECONDS: i64 = 5;
const RUNTIME_PROFILE_CIRCUIT_HALF_OPEN_PROBE_MAX_SECONDS: i64 = if cfg!(test) { 20 } else { 60 };
const RUNTIME_PROFILE_CIRCUIT_REOPEN_DECAY_SECONDS: i64 = if cfg!(test) { 12 } else { 1_800 };
const RUNTIME_PROFILE_CIRCUIT_REOPEN_MAX_STAGE: u32 = 4;
fn runtime_profile_circuit_open_seconds(score: u32, reopen_stage: u32) -> i64 {
let multiplier = 1_i64
.checked_shl(
score
.saturating_sub(RUNTIME_PROFILE_CIRCUIT_OPEN_THRESHOLD)
.min(3)
.saturating_add(reopen_stage.min(RUNTIME_PROFILE_CIRCUIT_REOPEN_MAX_STAGE)),
)
.unwrap_or(i64::MAX);
RUNTIME_PROFILE_CIRCUIT_OPEN_SECONDS
.saturating_mul(multiplier)
.min(RUNTIME_PROFILE_CIRCUIT_OPEN_MAX_SECONDS)
}
fn runtime_profile_circuit_half_open_probe_seconds(score: u32) -> i64 {
let multiplier = 1_i64
.checked_shl(
score
.saturating_sub(RUNTIME_PROFILE_CIRCUIT_OPEN_THRESHOLD)
.min(3),
)
.unwrap_or(i64::MAX);
RUNTIME_PROFILE_CIRCUIT_HALF_OPEN_PROBE_SECONDS
.saturating_mul(multiplier)
.min(RUNTIME_PROFILE_CIRCUIT_HALF_OPEN_PROBE_MAX_SECONDS)
}
fn runtime_profile_route_circuit_key(profile_name: &str, route_kind: RuntimeRouteKind) -> String {
format!(
"__route_circuit__:{}:{profile_name}",
runtime_route_kind_label(route_kind)
)
}
fn runtime_profile_route_circuit_profile_name(key: &str) -> &str {
key.rsplit(':').next().unwrap_or(key)
}
fn runtime_profile_route_circuit_health_key(key: &str) -> String {
key.replacen("__route_circuit__", "__route_health__", 1)
}
fn runtime_profile_route_circuit_reopen_key(
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> String {
format!(
"__route_circuit_reopen__:{}:{profile_name}",
runtime_route_kind_label(route_kind)
)
}
fn runtime_profile_route_circuit_open_until(
runtime: &RuntimeRotationState,
profile_name: &str,
route_kind: RuntimeRouteKind,
now: i64,
) -> Option<i64> {
runtime
.profile_route_circuit_open_until
.get(&runtime_profile_route_circuit_key(profile_name, route_kind))
.copied()
.filter(|until| *until > now)
}
fn reserve_runtime_profile_route_circuit_half_open_probe(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> Result<bool> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
let key = runtime_profile_route_circuit_key(profile_name, route_kind);
let route_label = runtime_route_kind_label(route_kind);
let Some(until) = runtime.profile_route_circuit_open_until.get(&key).copied() else {
return Ok(true);
};
if until > now {
return Ok(false);
}
let health_key = runtime_profile_route_circuit_health_key(&key);
let reopen_key = runtime_profile_route_circuit_reopen_key(profile_name, route_kind);
let health_score =
runtime_profile_effective_health_score_from_map(&runtime.profile_health, &health_key, now);
if health_score == 0 {
runtime.profile_route_circuit_open_until.remove(&key);
runtime.profile_health.remove(&reopen_key);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("profile_circuit_clear:{profile_name}:{route_label}"),
);
return Ok(true);
}
let probe_seconds = runtime_profile_circuit_half_open_probe_seconds(health_score);
let reserve_until = now.saturating_add(probe_seconds);
runtime
.profile_route_circuit_open_until
.insert(key, reserve_until);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("profile_circuit_half_open_probe:{profile_name}:{route_label}"),
);
drop(runtime);
runtime_proxy_log(
shared,
format!(
"profile_circuit_half_open_probe profile={profile_name} route={} until={reserve_until} health={health_score} probe_seconds={probe_seconds}",
route_label
),
);
Ok(true)
}
fn clear_runtime_profile_circuit_for_route(
runtime: &mut RuntimeRotationState,
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> bool {
runtime
.profile_route_circuit_open_until
.remove(&runtime_profile_route_circuit_key(profile_name, route_kind))
.is_some()
}
fn runtime_quota_source_label(source: RuntimeQuotaSource) -> &'static str {
match source {
RuntimeQuotaSource::LiveProbe => "probe_cache",
RuntimeQuotaSource::PersistedSnapshot => "persisted_snapshot",
}
}
fn runtime_usage_snapshot_is_usable(snapshot: &RuntimeProfileUsageSnapshot, now: i64) -> bool {
if runtime_profile_usage_snapshot_hold_active(snapshot, now) {
return true;
}
if runtime_profile_usage_snapshot_hold_expired(snapshot, now) {
return false;
}
now.saturating_sub(snapshot.checked_at) <= RUNTIME_PROFILE_USAGE_CACHE_STALE_GRACE_SECONDS
}
fn runtime_quota_summary_log_fields(summary: RuntimeQuotaSummary) -> String {
format!(
"quota_band={} five_hour_status={} five_hour_remaining={} five_hour_reset_at={} weekly_status={} weekly_remaining={} weekly_reset_at={}",
runtime_quota_pressure_band_reason(summary.route_band),
runtime_quota_window_status_reason(summary.five_hour.status),
summary.five_hour.remaining_percent,
summary.five_hour.reset_at,
runtime_quota_window_status_reason(summary.weekly.status),
summary.weekly.remaining_percent,
summary.weekly.reset_at,
)
}
fn runtime_quota_pressure_sort_key_for_route(
usage: &UsageResponse,
route_kind: RuntimeRouteKind,
) -> (
RuntimeQuotaPressureBand,
i64,
i64,
i64,
Reverse<i64>,
Reverse<i64>,
Reverse<i64>,
i64,
i64,
) {
let score = ready_profile_score_for_route(usage, route_kind);
(
runtime_quota_pressure_band_for_route(usage, route_kind),
score.total_pressure,
score.weekly_pressure,
score.five_hour_pressure,
Reverse(score.reserve_floor),
Reverse(score.weekly_remaining),
Reverse(score.five_hour_remaining),
score.weekly_reset_at,
score.five_hour_reset_at,
)
}
fn runtime_quota_pressure_sort_key_for_route_from_summary(
summary: RuntimeQuotaSummary,
) -> (
RuntimeQuotaPressureBand,
i64,
i64,
i64,
Reverse<i64>,
Reverse<i64>,
Reverse<i64>,
i64,
i64,
) {
(
summary.route_band,
match summary.route_band {
RuntimeQuotaPressureBand::Healthy => 0,
RuntimeQuotaPressureBand::Thin => 1,
RuntimeQuotaPressureBand::Critical => 2,
RuntimeQuotaPressureBand::Exhausted => 3,
RuntimeQuotaPressureBand::Unknown => 4,
},
match summary.weekly.status {
RuntimeQuotaWindowStatus::Ready => 0,
RuntimeQuotaWindowStatus::Thin => 1,
RuntimeQuotaWindowStatus::Critical => 2,
RuntimeQuotaWindowStatus::Exhausted => 3,
RuntimeQuotaWindowStatus::Unknown => 4,
},
match summary.five_hour.status {
RuntimeQuotaWindowStatus::Ready => 0,
RuntimeQuotaWindowStatus::Thin => 1,
RuntimeQuotaWindowStatus::Critical => 2,
RuntimeQuotaWindowStatus::Exhausted => 3,
RuntimeQuotaWindowStatus::Unknown => 4,
},
Reverse(
summary
.weekly
.remaining_percent
.min(summary.five_hour.remaining_percent),
),
Reverse(summary.weekly.remaining_percent),
Reverse(summary.five_hour.remaining_percent),
summary.weekly.reset_at,
summary.five_hour.reset_at,
)
}
fn runtime_quota_pressure_band_for_route(
usage: &UsageResponse,
route_kind: RuntimeRouteKind,
) -> RuntimeQuotaPressureBand {
let Some(weekly) = required_main_window_snapshot(usage, "weekly") else {
return RuntimeQuotaPressureBand::Unknown;
};
let Some(five_hour) = required_main_window_snapshot(usage, "5h") else {
return RuntimeQuotaPressureBand::Unknown;
};
let weekly_remaining = weekly.remaining_percent;
let five_hour_remaining = five_hour.remaining_percent;
if weekly_remaining == 0 || five_hour_remaining == 0 {
return RuntimeQuotaPressureBand::Exhausted;
}
let (thin_weekly, thin_five_hour, critical_weekly, critical_five_hour) = match route_kind {
RuntimeRouteKind::Responses | RuntimeRouteKind::Websocket => (20, 10, 10, 5),
RuntimeRouteKind::Compact | RuntimeRouteKind::Standard => (10, 5, 5, 3),
};
if weekly_remaining <= critical_weekly || five_hour_remaining <= critical_five_hour {
RuntimeQuotaPressureBand::Critical
} else if weekly_remaining <= thin_weekly || five_hour_remaining <= thin_five_hour {
RuntimeQuotaPressureBand::Thin
} else {
RuntimeQuotaPressureBand::Healthy
}
}
fn runtime_profile_effective_health_score(entry: &RuntimeProfileHealth, now: i64) -> u32 {
runtime_profile_effective_score(entry, now, RUNTIME_PROFILE_HEALTH_DECAY_SECONDS)
}
fn runtime_profile_effective_score(
entry: &RuntimeProfileHealth,
now: i64,
decay_seconds: i64,
) -> u32 {
let decay = now
.saturating_sub(entry.updated_at)
.saturating_div(decay_seconds.max(1))
.clamp(0, i64::from(u32::MAX)) as u32;
entry.score.saturating_sub(decay)
}
fn runtime_profile_effective_health_score_from_map(
profile_health: &BTreeMap<String, RuntimeProfileHealth>,
key: &str,
now: i64,
) -> u32 {
profile_health
.get(key)
.map(|entry| runtime_profile_effective_health_score(entry, now))
.unwrap_or(0)
}
fn runtime_profile_effective_score_from_map(
profile_health: &BTreeMap<String, RuntimeProfileHealth>,
key: &str,
now: i64,
decay_seconds: i64,
) -> u32 {
profile_health
.get(key)
.map(|entry| runtime_profile_effective_score(entry, now, decay_seconds))
.unwrap_or(0)
}
fn runtime_profile_global_health_score(
runtime: &RuntimeRotationState,
profile_name: &str,
now: i64,
) -> u32 {
runtime_profile_effective_health_score_from_map(&runtime.profile_health, profile_name, now)
}
fn runtime_profile_route_health_key(profile_name: &str, route_kind: RuntimeRouteKind) -> String {
format!(
"__route_health__:{}:{profile_name}",
runtime_route_kind_label(route_kind)
)
}
fn runtime_profile_route_bad_pairing_key(
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> String {
format!(
"__route_bad_pairing__:{}:{profile_name}",
runtime_route_kind_label(route_kind)
)
}
fn runtime_profile_route_success_streak_key(
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> String {
format!(
"__route_success__:{}:{profile_name}",
runtime_route_kind_label(route_kind)
)
}
fn runtime_profile_route_performance_key(
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> String {
format!(
"__route_performance__:{}:{profile_name}",
runtime_route_kind_label(route_kind)
)
}
fn runtime_profile_route_health_score(
runtime: &RuntimeRotationState,
profile_name: &str,
now: i64,
route_kind: RuntimeRouteKind,
) -> u32 {
runtime_profile_effective_health_score_from_map(
&runtime.profile_health,
&runtime_profile_route_health_key(profile_name, route_kind),
now,
)
}
fn runtime_profile_route_performance_score(
profile_health: &BTreeMap<String, RuntimeProfileHealth>,
profile_name: &str,
now: i64,
route_kind: RuntimeRouteKind,
) -> u32 {
let route_score = runtime_profile_effective_score_from_map(
profile_health,
&runtime_profile_route_performance_key(profile_name, route_kind),
now,
RUNTIME_PROFILE_PERFORMANCE_DECAY_SECONDS,
);
let coupled_score = runtime_route_coupled_kinds(route_kind)
.iter()
.copied()
.map(|coupled_kind| {
runtime_profile_effective_score_from_map(
profile_health,
&runtime_profile_route_performance_key(profile_name, coupled_kind),
now,
RUNTIME_PROFILE_PERFORMANCE_DECAY_SECONDS,
)
.saturating_div(2)
})
.fold(0, u32::saturating_add);
route_score.saturating_add(coupled_score)
}
fn runtime_profile_latency_penalty(
elapsed_ms: u64,
route_kind: RuntimeRouteKind,
stage: &str,
) -> u32 {
let (good_ms, warn_ms, poor_ms, severe_ms) = match (route_kind, stage) {
(RuntimeRouteKind::Responses, "ttfb") | (RuntimeRouteKind::Websocket, "connect") => {
(120, 300, 700, 1_500)
}
(RuntimeRouteKind::Compact, _) | (RuntimeRouteKind::Standard, _) => (80, 180, 400, 900),
_ => (100, 250, 600, 1_200),
};
match elapsed_ms {
elapsed if elapsed <= good_ms => 0,
elapsed if elapsed <= warn_ms => 2,
elapsed if elapsed <= poor_ms => 4,
elapsed if elapsed <= severe_ms => 7,
_ => RUNTIME_PROFILE_LATENCY_PENALTY_MAX,
}
}
fn update_runtime_profile_route_performance(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
next_score: u32,
reason: &str,
) -> Result<()> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
let key = runtime_profile_route_performance_key(profile_name, route_kind);
if next_score == 0 {
runtime.profile_health.remove(&key);
} else {
runtime.profile_health.insert(
key,
RuntimeProfileHealth {
score: next_score.min(RUNTIME_PROFILE_LATENCY_PENALTY_MAX),
updated_at: now,
},
);
}
drop(runtime);
runtime_proxy_log(
shared,
format!(
"profile_latency profile={profile_name} route={} score={} reason={reason}",
runtime_route_kind_label(route_kind),
next_score.min(RUNTIME_PROFILE_LATENCY_PENALTY_MAX),
),
);
Ok(())
}
fn note_runtime_profile_latency_observation(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
stage: &str,
elapsed_ms: u64,
) {
let current_score = shared
.runtime
.lock()
.ok()
.map(|runtime| {
let now = Local::now().timestamp();
runtime_profile_effective_score_from_map(
&runtime.profile_health,
&runtime_profile_route_performance_key(profile_name, route_kind),
now,
RUNTIME_PROFILE_PERFORMANCE_DECAY_SECONDS,
)
})
.unwrap_or(0);
let observed = runtime_profile_latency_penalty(elapsed_ms, route_kind, stage);
let next_score = if observed == 0 {
current_score.saturating_sub(2)
} else {
(((current_score as u64) * 2) + (observed as u64)).div_ceil(3) as u32
};
let _ = update_runtime_profile_route_performance(
shared,
profile_name,
route_kind,
next_score,
&format!("{stage}_{elapsed_ms}ms"),
);
}
fn note_runtime_profile_latency_failure(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
stage: &str,
) {
let current_score = shared
.runtime
.lock()
.ok()
.map(|runtime| {
let now = Local::now().timestamp();
runtime_profile_effective_score_from_map(
&runtime.profile_health,
&runtime_profile_route_performance_key(profile_name, route_kind),
now,
RUNTIME_PROFILE_PERFORMANCE_DECAY_SECONDS,
)
})
.unwrap_or(0);
let next_score = current_score
.saturating_add(RUNTIME_PROFILE_TRANSPORT_FAILURE_HEALTH_PENALTY)
.min(RUNTIME_PROFILE_LATENCY_PENALTY_MAX);
let _ = update_runtime_profile_route_performance(
shared,
profile_name,
route_kind,
next_score,
stage,
);
}
fn runtime_route_kind_label(route_kind: RuntimeRouteKind) -> &'static str {
match route_kind {
RuntimeRouteKind::Responses => "responses",
RuntimeRouteKind::Compact => "compact",
RuntimeRouteKind::Websocket => "websocket",
RuntimeRouteKind::Standard => "standard",
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum RuntimeTransportFailureKind {
Dns,
ConnectTimeout,
ConnectRefused,
ConnectReset,
TlsHandshake,
ConnectionAborted,
BrokenPipe,
UnexpectedEof,
ReadTimeout,
UpstreamClosedBeforeCommit,
Other,
}
fn runtime_transport_failure_kind_label(kind: RuntimeTransportFailureKind) -> &'static str {
match kind {
RuntimeTransportFailureKind::Dns => "dns",
RuntimeTransportFailureKind::ConnectTimeout => "connect_timeout",
RuntimeTransportFailureKind::ConnectRefused => "connection_refused",
RuntimeTransportFailureKind::ConnectReset => "connection_reset",
RuntimeTransportFailureKind::TlsHandshake => "tls_handshake",
RuntimeTransportFailureKind::ConnectionAborted => "connection_aborted",
RuntimeTransportFailureKind::BrokenPipe => "broken_pipe",
RuntimeTransportFailureKind::UnexpectedEof => "unexpected_eof",
RuntimeTransportFailureKind::ReadTimeout => "read_timeout",
RuntimeTransportFailureKind::UpstreamClosedBeforeCommit => "upstream_closed_before_commit",
RuntimeTransportFailureKind::Other => "other",
}
}
fn runtime_upstream_connect_failure_marker(
failure_kind: Option<RuntimeTransportFailureKind>,
) -> &'static str {
match failure_kind {
Some(RuntimeTransportFailureKind::ConnectTimeout)
| Some(RuntimeTransportFailureKind::ReadTimeout) => "upstream_connect_timeout",
Some(RuntimeTransportFailureKind::Dns) => "upstream_connect_dns_error",
Some(RuntimeTransportFailureKind::TlsHandshake) => "upstream_tls_handshake_error",
_ => "upstream_connect_error",
}
}
fn log_runtime_upstream_connect_failure(
shared: &RuntimeRotationProxyShared,
request_id: u64,
transport: &str,
profile_name: &str,
failure_kind: Option<RuntimeTransportFailureKind>,
error: &impl std::fmt::Display,
) {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport={transport} {} profile={profile_name} class={} error={error}",
runtime_upstream_connect_failure_marker(failure_kind),
failure_kind
.map(runtime_transport_failure_kind_label)
.unwrap_or("unknown"),
),
);
}
fn runtime_transport_failure_kind_from_message(
message: &str,
) -> Option<RuntimeTransportFailureKind> {
let message = message.to_ascii_lowercase();
if message.contains("dns")
|| message.contains("failed to lookup address information")
|| message.contains("no such host")
|| message.contains("name or service not known")
{
Some(RuntimeTransportFailureKind::Dns)
} else if message.contains("tls")
|| message.contains("handshake")
|| message.contains("certificate")
{
Some(RuntimeTransportFailureKind::TlsHandshake)
} else if message.contains("connection refused") {
Some(RuntimeTransportFailureKind::ConnectRefused)
} else if message.contains("timed out") || message.contains("timeout") {
Some(RuntimeTransportFailureKind::ConnectTimeout)
} else if message.contains("connection reset") {
Some(RuntimeTransportFailureKind::ConnectReset)
} else if message.contains("broken pipe") {
Some(RuntimeTransportFailureKind::BrokenPipe)
} else if message.contains("unexpected eof") {
Some(RuntimeTransportFailureKind::UnexpectedEof)
} else if message.contains("connection aborted") {
Some(RuntimeTransportFailureKind::ConnectionAborted)
} else if message.contains("stream closed before response.completed")
|| message.contains("closed before response.completed")
{
Some(RuntimeTransportFailureKind::UpstreamClosedBeforeCommit)
} else if message.contains("unable to connect") {
Some(RuntimeTransportFailureKind::Other)
} else {
None
}
}
fn runtime_transport_failure_kind_from_io_error(
err: &io::Error,
) -> Option<RuntimeTransportFailureKind> {
match err.kind() {
io::ErrorKind::TimedOut => Some(RuntimeTransportFailureKind::ConnectTimeout),
io::ErrorKind::ConnectionRefused => Some(RuntimeTransportFailureKind::ConnectRefused),
io::ErrorKind::ConnectionReset => Some(RuntimeTransportFailureKind::ConnectReset),
io::ErrorKind::ConnectionAborted => Some(RuntimeTransportFailureKind::ConnectionAborted),
io::ErrorKind::BrokenPipe => Some(RuntimeTransportFailureKind::BrokenPipe),
io::ErrorKind::UnexpectedEof => Some(RuntimeTransportFailureKind::UnexpectedEof),
_ => runtime_transport_failure_kind_from_message(&err.to_string()),
}
}
fn runtime_transport_failure_kind_from_reqwest(
err: &reqwest::Error,
) -> Option<RuntimeTransportFailureKind> {
if err.is_timeout() {
return Some(RuntimeTransportFailureKind::ReadTimeout);
}
std::error::Error::source(err)
.and_then(|source| source.downcast_ref::<io::Error>())
.and_then(runtime_transport_failure_kind_from_io_error)
.or_else(|| runtime_transport_failure_kind_from_message(&err.to_string()))
}
fn runtime_transport_failure_kind_from_ws(err: &WsError) -> Option<RuntimeTransportFailureKind> {
match err {
WsError::Io(io) => runtime_transport_failure_kind_from_io_error(io),
WsError::Tls(_) => Some(RuntimeTransportFailureKind::TlsHandshake),
WsError::ConnectionClosed | WsError::AlreadyClosed => {
Some(RuntimeTransportFailureKind::UpstreamClosedBeforeCommit)
}
_ => runtime_transport_failure_kind_from_message(&err.to_string()),
}
}
fn runtime_proxy_transport_failure_kind(
err: &anyhow::Error,
) -> Option<RuntimeTransportFailureKind> {
for cause in err.chain() {
if let Some(reqwest_error) = cause.downcast_ref::<reqwest::Error>()
&& let Some(kind) = runtime_transport_failure_kind_from_reqwest(reqwest_error)
{
return Some(kind);
}
if let Some(ws_error) = cause.downcast_ref::<WsError>()
&& let Some(kind) = runtime_transport_failure_kind_from_ws(ws_error)
{
return Some(kind);
}
if let Some(io_error) = cause.downcast_ref::<io::Error>()
&& let Some(kind) = runtime_transport_failure_kind_from_io_error(io_error)
{
return Some(kind);
}
if let Some(kind) = runtime_transport_failure_kind_from_message(&cause.to_string()) {
return Some(kind);
}
}
None
}
fn runtime_profile_transport_health_penalty(kind: RuntimeTransportFailureKind) -> u32 {
match kind {
RuntimeTransportFailureKind::Dns
| RuntimeTransportFailureKind::ConnectTimeout
| RuntimeTransportFailureKind::ConnectRefused
| RuntimeTransportFailureKind::ConnectReset
| RuntimeTransportFailureKind::TlsHandshake => {
RUNTIME_PROFILE_CONNECT_FAILURE_HEALTH_PENALTY
}
RuntimeTransportFailureKind::BrokenPipe
| RuntimeTransportFailureKind::ConnectionAborted
| RuntimeTransportFailureKind::UnexpectedEof
| RuntimeTransportFailureKind::ReadTimeout
| RuntimeTransportFailureKind::UpstreamClosedBeforeCommit
| RuntimeTransportFailureKind::Other => RUNTIME_PROFILE_TRANSPORT_FAILURE_HEALTH_PENALTY,
}
}
fn reset_runtime_profile_success_streak(
runtime: &mut RuntimeRotationState,
profile_name: &str,
route_kind: RuntimeRouteKind,
) {
runtime
.profile_health
.remove(&runtime_profile_route_success_streak_key(
profile_name,
route_kind,
));
}
fn bump_runtime_profile_bad_pairing_score(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
delta: u32,
reason: &str,
) -> Result<()> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
let key = runtime_profile_route_bad_pairing_key(profile_name, route_kind);
let next_score = runtime_profile_effective_score_from_map(
&runtime.profile_health,
&key,
now,
RUNTIME_PROFILE_BAD_PAIRING_DECAY_SECONDS,
)
.saturating_add(delta)
.min(RUNTIME_PROFILE_HEALTH_MAX_SCORE);
reset_runtime_profile_success_streak(&mut runtime, profile_name, route_kind);
runtime.profile_health.insert(
key,
RuntimeProfileHealth {
score: next_score,
updated_at: now,
},
);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!(
"profile_bad_pairing:{profile_name}:{}",
runtime_route_kind_label(route_kind)
),
);
drop(runtime);
runtime_proxy_log(
shared,
format!(
"profile_bad_pairing profile={profile_name} route={} score={next_score} delta={delta} reason={reason}",
runtime_route_kind_label(route_kind)
),
);
Ok(())
}
fn bump_runtime_profile_health_score(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
delta: u32,
reason: &str,
) -> Result<()> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
let key = runtime_profile_route_health_key(profile_name, route_kind);
let next_score = runtime
.profile_health
.get(&key)
.map(|entry| runtime_profile_effective_health_score(entry, now))
.unwrap_or(0)
.saturating_add(delta)
.min(RUNTIME_PROFILE_HEALTH_MAX_SCORE);
reset_runtime_profile_success_streak(&mut runtime, profile_name, route_kind);
runtime.profile_health.insert(
key,
RuntimeProfileHealth {
score: next_score,
updated_at: now,
},
);
let circuit_until = if next_score >= RUNTIME_PROFILE_CIRCUIT_OPEN_THRESHOLD {
let circuit_key = runtime_profile_route_circuit_key(profile_name, route_kind);
let reopen_stage = if runtime
.profile_route_circuit_open_until
.contains_key(&circuit_key)
{
runtime_profile_effective_score_from_map(
&runtime.profile_health,
&runtime_profile_route_circuit_reopen_key(profile_name, route_kind),
now,
RUNTIME_PROFILE_CIRCUIT_REOPEN_DECAY_SECONDS,
)
.saturating_add(1)
.min(RUNTIME_PROFILE_CIRCUIT_REOPEN_MAX_STAGE)
} else {
0
};
if reopen_stage == 0 {
runtime
.profile_health
.remove(&runtime_profile_route_circuit_reopen_key(
profile_name,
route_kind,
));
} else {
runtime.profile_health.insert(
runtime_profile_route_circuit_reopen_key(profile_name, route_kind),
RuntimeProfileHealth {
score: reopen_stage,
updated_at: now,
},
);
}
let until = now.saturating_add(runtime_profile_circuit_open_seconds(
next_score,
reopen_stage,
));
runtime
.profile_route_circuit_open_until
.entry(circuit_key)
.and_modify(|current| *current = (*current).max(until))
.or_insert(until);
Some((until, reopen_stage))
} else {
None
};
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!(
"profile_health:{profile_name}:{}",
runtime_route_kind_label(route_kind)
),
);
drop(runtime);
runtime_proxy_log(
shared,
format!(
"profile_health profile={profile_name} route={} score={next_score} delta={delta} reason={reason}",
runtime_route_kind_label(route_kind)
),
);
if let Some((until, reopen_stage)) = circuit_until {
runtime_proxy_log(
shared,
format!(
"profile_circuit_open profile={profile_name} route={} until={until} reopen_stage={reopen_stage} reason={reason} score={next_score}",
runtime_route_kind_label(route_kind)
),
);
}
Ok(())
}
fn recover_runtime_profile_health_for_route(
runtime: &mut RuntimeRotationState,
profile_name: &str,
route_kind: RuntimeRouteKind,
now: i64,
) {
let key = runtime_profile_route_health_key(profile_name, route_kind);
let streak_key = runtime_profile_route_success_streak_key(profile_name, route_kind);
let Some(current_score) = runtime
.profile_health
.get(&key)
.map(|entry| runtime_profile_effective_health_score(entry, now))
else {
runtime.profile_health.remove(&streak_key);
return;
};
let next_streak = runtime_profile_effective_score_from_map(
&runtime.profile_health,
&streak_key,
now,
RUNTIME_PROFILE_SUCCESS_STREAK_DECAY_SECONDS,
)
.saturating_add(1)
.min(RUNTIME_PROFILE_SUCCESS_STREAK_MAX);
let recovery = RUNTIME_PROFILE_HEALTH_SUCCESS_RECOVERY_SCORE
.saturating_add(next_streak.saturating_sub(1).min(1));
let next_score = current_score.saturating_sub(recovery);
if next_score == 0 {
runtime.profile_health.remove(&key);
runtime.profile_health.remove(&streak_key);
} else {
runtime.profile_health.insert(
key,
RuntimeProfileHealth {
score: next_score,
updated_at: now,
},
);
runtime.profile_health.insert(
streak_key,
RuntimeProfileHealth {
score: next_streak,
updated_at: now,
},
);
}
}
fn mark_runtime_profile_retry_backoff(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
) -> Result<()> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
prune_runtime_profile_selection_backoff(&mut runtime, now);
runtime.profile_probe_cache.remove(profile_name);
let until = now.saturating_add(RUNTIME_PROFILE_RETRY_BACKOFF_SECONDS);
runtime
.profile_retry_backoff_until
.insert(profile_name.to_string(), until);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("profile_retry_backoff:{profile_name}"),
);
drop(runtime);
runtime_proxy_log(
shared,
format!("profile_retry_backoff profile={profile_name} until={until}"),
);
Ok(())
}
fn mark_runtime_profile_transport_backoff(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
context: &str,
) -> Result<()> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let now = Local::now().timestamp();
prune_runtime_profile_selection_backoff(&mut runtime, now);
runtime.profile_probe_cache.remove(profile_name);
let route_key = runtime_profile_transport_backoff_key(profile_name, route_kind);
let existing_remaining = runtime_profile_transport_backoff_until_from_map(
&runtime.profile_transport_backoff_until,
profile_name,
route_kind,
now,
)
.unwrap_or(now)
.saturating_sub(now);
let next_backoff_seconds = if existing_remaining > 0 {
existing_remaining.saturating_mul(2).clamp(
RUNTIME_PROFILE_TRANSPORT_BACKOFF_SECONDS,
RUNTIME_PROFILE_TRANSPORT_BACKOFF_MAX_SECONDS,
)
} else {
RUNTIME_PROFILE_TRANSPORT_BACKOFF_SECONDS
};
let until = now.saturating_add(next_backoff_seconds);
runtime
.profile_transport_backoff_until
.entry(route_key)
.and_modify(|current| *current = (*current).max(until))
.or_insert(until);
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!(
"profile_transport_backoff:{profile_name}:{}",
runtime_route_kind_label(route_kind)
),
);
drop(runtime);
runtime_proxy_log(
shared,
format!(
"profile_transport_backoff profile={profile_name} route={} until={until} seconds={next_backoff_seconds} context={context}",
runtime_route_kind_label(route_kind)
),
);
Ok(())
}
fn note_runtime_profile_transport_failure(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
context: &str,
err: &anyhow::Error,
) {
let Some(failure_kind) = runtime_proxy_transport_failure_kind(err) else {
return;
};
runtime_proxy_log(
shared,
format!(
"profile_transport_failure profile={profile_name} route={} class={} context={context}",
runtime_route_kind_label(route_kind),
runtime_transport_failure_kind_label(failure_kind),
),
);
let _ = bump_runtime_profile_health_score(
shared,
profile_name,
route_kind,
runtime_profile_transport_health_penalty(failure_kind),
context,
);
let _ = bump_runtime_profile_bad_pairing_score(
shared,
profile_name,
route_kind,
RUNTIME_PROFILE_BAD_PAIRING_PENALTY,
context,
);
note_runtime_profile_latency_failure(shared, profile_name, route_kind, context);
let _ = mark_runtime_profile_transport_backoff(shared, profile_name, route_kind, context);
}
fn clear_runtime_profile_transport_backoff_for_route(
runtime: &mut RuntimeRotationState,
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> bool {
let mut changed = runtime
.profile_transport_backoff_until
.remove(&runtime_profile_transport_backoff_key(
profile_name,
route_kind,
))
.is_some();
changed = runtime
.profile_transport_backoff_until
.remove(profile_name)
.is_some()
|| changed;
changed
}
fn commit_runtime_proxy_profile_selection(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> Result<bool> {
commit_runtime_proxy_profile_selection_with_policy(shared, profile_name, route_kind, true)
}
fn commit_runtime_proxy_profile_selection_with_policy(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
track_current_profile: bool,
) -> Result<bool> {
let mut runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?;
let switch_runtime_profile = track_current_profile && runtime.current_profile != profile_name;
let switch_global_profile =
track_current_profile && !matches!(route_kind, RuntimeRouteKind::Compact);
let switched = switch_runtime_profile;
let now = Local::now().timestamp();
let cleared_retry_backoff = runtime
.profile_retry_backoff_until
.remove(profile_name)
.is_some();
let cleared_transport_backoff =
clear_runtime_profile_transport_backoff_for_route(&mut runtime, profile_name, route_kind);
let cleared_route_circuit =
clear_runtime_profile_circuit_for_route(&mut runtime, profile_name, route_kind);
let cleared_health =
clear_runtime_profile_health_for_route(&mut runtime, profile_name, route_kind, now);
if switch_runtime_profile {
runtime.current_profile = profile_name.to_string();
}
let state_changed =
switch_global_profile && runtime.state.active_profile.as_deref() != Some(profile_name);
if switch_global_profile {
runtime.state.active_profile = Some(profile_name.to_string());
record_run_selection(&mut runtime.state, profile_name);
}
let should_persist = switched
|| state_changed
|| cleared_retry_backoff
|| cleared_transport_backoff
|| cleared_route_circuit
|| cleared_health;
if should_persist {
schedule_runtime_state_save_from_runtime(
shared,
&runtime,
&format!("profile_commit:{profile_name}"),
);
}
drop(runtime);
if switch_runtime_profile {
update_runtime_broker_current_profile(&shared.log_path, profile_name);
}
runtime_proxy_log(
shared,
format!(
"profile_commit profile={profile_name} route={} switched={switched} persisted={should_persist} track_current_profile={track_current_profile} cleared_route_circuit={cleared_route_circuit}",
runtime_route_kind_label(route_kind),
),
);
Ok(switched)
}
fn clear_runtime_profile_health_for_route(
runtime: &mut RuntimeRotationState,
profile_name: &str,
route_kind: RuntimeRouteKind,
now: i64,
) -> bool {
let mut changed = runtime.profile_health.remove(profile_name).is_some();
let previous_route_score =
runtime_profile_route_health_score(runtime, profile_name, now, route_kind);
let previous_bad_pairing = runtime_profile_effective_score_from_map(
&runtime.profile_health,
&runtime_profile_route_bad_pairing_key(profile_name, route_kind),
now,
RUNTIME_PROFILE_BAD_PAIRING_DECAY_SECONDS,
);
let previous_circuit_reopen = runtime_profile_effective_score_from_map(
&runtime.profile_health,
&runtime_profile_route_circuit_reopen_key(profile_name, route_kind),
now,
RUNTIME_PROFILE_CIRCUIT_REOPEN_DECAY_SECONDS,
);
recover_runtime_profile_health_for_route(runtime, profile_name, route_kind, now);
changed = changed || previous_route_score > 0;
changed = changed || previous_bad_pairing > 0;
changed = changed || previous_circuit_reopen > 0;
changed = runtime
.profile_health
.remove(&runtime_profile_route_bad_pairing_key(
profile_name,
route_kind,
))
.is_some()
|| changed;
changed = runtime
.profile_health
.remove(&runtime_profile_route_circuit_reopen_key(
profile_name,
route_kind,
))
.is_some()
|| changed;
changed
}
fn commit_runtime_proxy_profile_selection_with_notice(
shared: &RuntimeRotationProxyShared,
profile_name: &str,
route_kind: RuntimeRouteKind,
) -> Result<()> {
let _ = commit_runtime_proxy_profile_selection(shared, profile_name, route_kind)?;
Ok(())
}
fn send_runtime_proxy_upstream_request(
request_id: u64,
request: &RuntimeProxyRequest,
shared: &RuntimeRotationProxyShared,
profile_name: &str,
turn_state_override: Option<&str>,
) -> Result<reqwest::Response> {
let started_at = Instant::now();
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?
.clone();
let auth = runtime_profile_usage_auth(shared, profile_name)?;
let upstream_url =
runtime_proxy_upstream_url(&runtime.upstream_base_url, &request.path_and_query);
let method = reqwest::Method::from_bytes(request.method.as_bytes()).with_context(|| {
format!(
"failed to proxy unsupported HTTP method '{}' for runtime auto-rotate",
request.method
)
})?;
let mut upstream_request = shared.async_client.request(method, &upstream_url);
for (name, value) in &request.headers {
if turn_state_override.is_some() && name.eq_ignore_ascii_case("x-codex-turn-state") {
continue;
}
if should_skip_runtime_request_header(name) {
continue;
}
upstream_request = upstream_request.header(name.as_str(), value.as_str());
}
if let Some(turn_state) = turn_state_override {
upstream_request = upstream_request.header("x-codex-turn-state", turn_state);
}
upstream_request = upstream_request
.header("Authorization", format!("Bearer {}", auth.access_token))
.body(request.body.clone());
if let Some(user_agent) = runtime_proxy_effective_user_agent(&request.headers) {
upstream_request = upstream_request.header("User-Agent", user_agent);
} else {
upstream_request = upstream_request.header("User-Agent", "codex-cli");
}
if let Some(account_id) = auth.account_id.as_deref() {
upstream_request = upstream_request.header("ChatGPT-Account-Id", account_id);
}
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http upstream_start profile={profile_name} method={} url={} turn_state_override={:?} previous_response_id={:?}",
request.method,
upstream_url,
turn_state_override,
runtime_request_previous_response_id(request)
),
);
if runtime_take_fault_injection("PRODEX_RUNTIME_FAULT_UPSTREAM_CONNECT_ERROR_ONCE") {
bail!("injected runtime upstream connect failure");
}
let response = match shared
.async_runtime
.block_on(async move { upstream_request.send().await })
{
Ok(response) => response,
Err(err) => {
log_runtime_upstream_connect_failure(
shared,
request_id,
"http",
profile_name,
runtime_transport_failure_kind_from_reqwest(&err),
&err,
);
return Err(anyhow::Error::new(err).context(format!(
"failed to proxy runtime request for profile '{}' to {}",
profile_name, upstream_url
)));
}
};
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http upstream_response profile={profile_name} status={} content_type={:?} turn_state={:?}",
response.status().as_u16(),
response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|value| value.to_str().ok()),
runtime_proxy_header_value(response.headers(), "x-codex-turn-state")
),
);
if matches!(response.status().as_u16(), 401 | 403) {
if response.status().as_u16() == 401 {
note_runtime_profile_auth_failure(
shared,
profile_name,
runtime_proxy_request_lane(&request.path_and_query, false),
response.status().as_u16(),
);
}
}
note_runtime_profile_latency_observation(
shared,
profile_name,
runtime_proxy_request_lane(&request.path_and_query, false),
"connect",
started_at.elapsed().as_millis() as u64,
);
Ok(response)
}
fn send_runtime_proxy_upstream_responses_request(
request_id: u64,
request: &RuntimeProxyRequest,
shared: &RuntimeRotationProxyShared,
profile_name: &str,
turn_state_override: Option<&str>,
) -> Result<reqwest::Response> {
let started_at = Instant::now();
let runtime = shared
.runtime
.lock()
.map_err(|_| anyhow::anyhow!("runtime auto-rotate state is poisoned"))?
.clone();
let auth = runtime_profile_usage_auth(shared, profile_name)?;
let upstream_url =
runtime_proxy_upstream_url(&runtime.upstream_base_url, &request.path_and_query);
let method = reqwest::Method::from_bytes(request.method.as_bytes()).with_context(|| {
format!(
"failed to proxy unsupported HTTP method '{}' for runtime auto-rotate",
request.method
)
})?;
let mut upstream_request = shared.async_client.request(method, &upstream_url);
for (name, value) in &request.headers {
if turn_state_override.is_some() && name.eq_ignore_ascii_case("x-codex-turn-state") {
continue;
}
if should_skip_runtime_request_header(name) {
continue;
}
upstream_request = upstream_request.header(name.as_str(), value.as_str());
}
if let Some(turn_state) = turn_state_override {
upstream_request = upstream_request.header("x-codex-turn-state", turn_state);
}
upstream_request = upstream_request
.header("Authorization", format!("Bearer {}", auth.access_token))
.body(request.body.clone());
if let Some(user_agent) = runtime_proxy_effective_user_agent(&request.headers) {
upstream_request = upstream_request.header("User-Agent", user_agent);
} else {
upstream_request = upstream_request.header("User-Agent", "codex-cli");
}
if let Some(account_id) = auth.account_id.as_deref() {
upstream_request = upstream_request.header("ChatGPT-Account-Id", account_id);
}
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http upstream_async_start profile={profile_name} method={} url={} turn_state_override={:?} previous_response_id={:?}",
request.method,
upstream_url,
turn_state_override,
runtime_request_previous_response_id(request)
),
);
if runtime_take_fault_injection("PRODEX_RUNTIME_FAULT_UPSTREAM_CONNECT_ERROR_ONCE") {
bail!("injected runtime upstream connect failure");
}
let response = match shared
.async_runtime
.block_on(async move { upstream_request.send().await })
{
Ok(response) => response,
Err(err) => {
log_runtime_upstream_connect_failure(
shared,
request_id,
"http",
profile_name,
runtime_transport_failure_kind_from_reqwest(&err),
&err,
);
return Err(anyhow::Error::new(err).context(format!(
"failed to proxy runtime request for profile '{}' to {}",
profile_name, upstream_url
)));
}
};
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http upstream_async_response profile={profile_name} status={} content_type={:?} turn_state={:?}",
response.status().as_u16(),
response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|value| value.to_str().ok()),
runtime_proxy_header_value(response.headers(), "x-codex-turn-state")
),
);
if matches!(response.status().as_u16(), 401 | 403) {
if response.status().as_u16() == 401 {
note_runtime_profile_auth_failure(
shared,
profile_name,
RuntimeRouteKind::Responses,
response.status().as_u16(),
);
}
}
note_runtime_profile_latency_observation(
shared,
profile_name,
RuntimeRouteKind::Responses,
"connect",
started_at.elapsed().as_millis() as u64,
);
Ok(response)
}
fn runtime_proxy_upstream_url(base_url: &str, path_and_query: &str) -> String {
let base_url = base_url.trim_end_matches('/');
let normalized_path_and_query = runtime_proxy_normalize_openai_path(path_and_query);
if base_url.contains("/backend-api")
&& let Some(suffix) = normalized_path_and_query
.as_ref()
.strip_prefix("/backend-api")
{
return format!("{base_url}{suffix}");
}
if normalized_path_and_query.starts_with('/') {
return format!("{base_url}{normalized_path_and_query}");
}
format!("{base_url}/{normalized_path_and_query}")
}
fn runtime_proxy_upstream_websocket_url(base_url: &str, path_and_query: &str) -> Result<String> {
let upstream_url = runtime_proxy_upstream_url(base_url, path_and_query);
let mut url = reqwest::Url::parse(&upstream_url)
.with_context(|| format!("failed to parse upstream websocket URL {}", upstream_url))?;
match url.scheme() {
"http" => {
url.set_scheme("ws").map_err(|_| {
anyhow::anyhow!("failed to set websocket scheme for {upstream_url}")
})?;
}
"https" => {
url.set_scheme("wss").map_err(|_| {
anyhow::anyhow!("failed to set websocket scheme for {upstream_url}")
})?;
}
"ws" | "wss" => {}
scheme => bail!(
"unsupported upstream websocket scheme '{scheme}' in {}",
upstream_url
),
}
Ok(url.to_string())
}
fn should_skip_runtime_request_header(name: &str) -> bool {
let lower = name.to_ascii_lowercase();
matches!(
lower.as_str(),
"authorization"
| "chatgpt-account-id"
| "connection"
| "content-length"
| "host"
| "transfer-encoding"
| "upgrade"
) || lower.starts_with("sec-websocket-")
|| lower.starts_with("x-prodex-internal-")
}
fn runtime_proxy_effective_user_agent(headers: &[(String, String)]) -> Option<&str> {
headers.iter().find_map(|(name, value)| {
name.eq_ignore_ascii_case("user-agent")
.then_some(value.as_str())
.filter(|value| !value.is_empty())
})
}
fn should_skip_runtime_response_header(name: &str) -> bool {
matches!(
name.to_ascii_lowercase().as_str(),
"connection"
| "content-encoding"
| "content-length"
| "date"
| "server"
| "transfer-encoding"
)
}
fn forward_runtime_proxy_response(
shared: &RuntimeRotationProxyShared,
response: reqwest::Response,
prelude: Vec<u8>,
) -> Result<tiny_http::ResponseBox> {
let parts = buffer_runtime_proxy_async_response_parts(shared, response, prelude)?;
Ok(build_runtime_proxy_response_from_parts(parts))
}
fn prepare_runtime_proxy_responses_success(
request_id: u64,
request_previous_response_id: Option<&str>,
request_session_id: Option<&str>,
request_turn_state: Option<&str>,
response: reqwest::Response,
shared: &RuntimeRotationProxyShared,
profile_name: &str,
inflight_guard: RuntimeProfileInFlightGuard,
) -> Result<RuntimeResponsesAttempt> {
let turn_state = runtime_proxy_header_value(response.headers(), "x-codex-turn-state");
remember_runtime_successful_previous_response_owner(
shared,
profile_name,
request_previous_response_id,
RuntimeRouteKind::Responses,
)?;
remember_runtime_session_id(
shared,
profile_name,
request_session_id,
RuntimeRouteKind::Responses,
)?;
remember_runtime_turn_state(
shared,
profile_name,
turn_state.as_deref(),
RuntimeRouteKind::Responses,
)?;
let is_sse = response
.headers()
.get(reqwest::header::CONTENT_TYPE)
.and_then(|value| value.to_str().ok())
.is_some_and(|value| value.contains("text/event-stream"));
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http prepare_success profile={profile_name} sse={is_sse} turn_state={:?}",
turn_state
),
);
if !is_sse {
let parts = buffer_runtime_proxy_async_response_parts(shared, response, Vec::new())?;
let response_ids = extract_runtime_response_ids_from_body_bytes(&parts.body);
if !response_ids.is_empty() {
remember_runtime_response_ids(
shared,
profile_name,
&response_ids,
RuntimeRouteKind::Responses,
)?;
let _ = release_runtime_compact_lineage(
shared,
profile_name,
request_session_id,
request_turn_state,
"response_committed",
);
}
return Ok(RuntimeResponsesAttempt::Success {
profile_name: profile_name.to_string(),
response: RuntimeResponsesReply::Buffered(parts),
});
}
let status = response.status().as_u16();
let mut headers = Vec::new();
for (name, value) in response.headers() {
if should_skip_runtime_response_header(name.as_str()) {
continue;
}
if let Ok(value) = value.to_str() {
headers.push((name.to_string(), value.to_string()));
}
}
let mut prefetch = RuntimePrefetchStream::spawn(
response,
Arc::clone(&shared.async_runtime),
shared.log_path.clone(),
request_id,
);
let lookahead = inspect_runtime_sse_lookahead(&mut prefetch, &shared.log_path, request_id)?;
let (prelude, response_ids) = match lookahead {
RuntimeSseInspection::Commit {
prelude,
response_ids,
} => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http sse_commit profile={profile_name} prelude_bytes={} response_ids={}",
prelude.len(),
response_ids.len()
),
);
(prelude, response_ids)
}
RuntimeSseInspection::QuotaBlocked(prelude) => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http sse_quota_blocked profile={profile_name} prelude_bytes={}",
prelude.len()
),
);
return Ok(RuntimeResponsesAttempt::QuotaBlocked {
profile_name: profile_name.to_string(),
response: RuntimeResponsesReply::Streaming(RuntimeStreamingResponse {
status,
headers: headers.clone(),
body: Box::new(prefetch.into_reader(prelude)),
request_id,
profile_name: profile_name.to_string(),
log_path: shared.log_path.clone(),
shared: shared.clone(),
_inflight_guard: Some(inflight_guard),
}),
});
}
RuntimeSseInspection::PreviousResponseNotFound(prelude) => {
runtime_proxy_log(
shared,
format!(
"request={request_id} transport=http route=responses previous_response_not_found profile={profile_name} stage=sse_prelude prelude_bytes={}",
prelude.len()
),
);
return Ok(RuntimeResponsesAttempt::PreviousResponseNotFound {
profile_name: profile_name.to_string(),
response: RuntimeResponsesReply::Streaming(RuntimeStreamingResponse {
status,
headers: headers.clone(),
body: Box::new(prefetch.into_reader(prelude)),
request_id,
profile_name: profile_name.to_string(),
log_path: shared.log_path.clone(),
shared: shared.clone(),
_inflight_guard: Some(inflight_guard),
}),
turn_state,
});
}
};
remember_runtime_response_ids(
shared,
profile_name,
&response_ids,
RuntimeRouteKind::Responses,
)?;
if !response_ids.is_empty() {
let _ = release_runtime_compact_lineage(
shared,
profile_name,
request_session_id,
request_turn_state,
"response_committed",
);
}
Ok(RuntimeResponsesAttempt::Success {
profile_name: profile_name.to_string(),
response: RuntimeResponsesReply::Streaming(RuntimeStreamingResponse {
status,
headers,
body: Box::new(RuntimeSseTapReader::new(
prefetch.into_reader(prelude.clone()),
shared.clone(),
profile_name.to_string(),
&prelude,
&response_ids,
)),
request_id,
profile_name: profile_name.to_string(),
log_path: shared.log_path.clone(),
shared: shared.clone(),
_inflight_guard: Some(inflight_guard),
}),
})
}
impl RuntimeSseTapState {
fn observe(&mut self, shared: &RuntimeRotationProxyShared, profile_name: &str, chunk: &[u8]) {
for byte in chunk {
self.line.push(*byte);
if *byte != b'\n' {
continue;
}
let line_text = String::from_utf8_lossy(&self.line);
let trimmed = line_text.trim_end_matches(['\r', '\n']);
if trimmed.is_empty() {
self.remember_response_ids(shared, profile_name, RuntimeRouteKind::Responses);
self.data_lines.clear();
self.line.clear();
continue;
}
if let Some(payload) = trimmed.strip_prefix("data:") {
self.data_lines.push(payload.trim_start().to_string());
}
self.line.clear();
}
}
fn finish(&mut self, shared: &RuntimeRotationProxyShared, profile_name: &str) {
self.remember_response_ids(shared, profile_name, RuntimeRouteKind::Responses);
}
fn remember_response_ids(
&mut self,
shared: &RuntimeRotationProxyShared,
profile_name: &str,
verified_route: RuntimeRouteKind,
) {
let fresh_ids = extract_runtime_response_ids_from_sse(&self.data_lines)
.into_iter()
.filter(|response_id| self.remembered_response_ids.insert(response_id.clone()))
.collect::<Vec<_>>();
if fresh_ids.is_empty() {
return;
}
let _ = remember_runtime_response_ids(shared, profile_name, &fresh_ids, verified_route);
}
}
struct RuntimeSseTapReader {
inner: Box<dyn Read + Send>,
shared: RuntimeRotationProxyShared,
profile_name: String,
state: RuntimeSseTapState,
}
impl Read for RuntimePrefetchReader {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
if self.finished {
return Ok(0);
}
loop {
let read = self.pending.read(buf)?;
if read > 0 {
return Ok(read);
}
let next = if let Some(chunk) = self.backlog.pop_front() {
Some(chunk)
} else {
match self
.receiver
.recv_timeout(Duration::from_millis(runtime_proxy_stream_idle_timeout_ms()))
{
Ok(chunk) => {
if let RuntimePrefetchChunk::Data(bytes) = &chunk {
runtime_prefetch_release_queued_bytes(&self.shared, bytes.len());
}
Some(chunk)
}
Err(RecvTimeoutError::Timeout) => {
self.finished = true;
return Err(io::Error::new(
io::ErrorKind::TimedOut,
"runtime upstream stream idle timed out",
));
}
Err(RecvTimeoutError::Disconnected) => {
if let Some((kind, message)) = runtime_prefetch_terminal_error(&self.shared)
{
self.finished = true;
return Err(io::Error::new(kind, message));
}
None
}
}
};
match next {
Some(RuntimePrefetchChunk::Data(chunk)) => {
self.pending = Cursor::new(chunk);
}
Some(RuntimePrefetchChunk::End) | None => {
self.finished = true;
return Ok(0);
}
Some(RuntimePrefetchChunk::Error(kind, message)) => {
self.finished = true;
return Err(io::Error::new(kind, message));
}
}
}
}
}
impl Drop for RuntimePrefetchReader {
fn drop(&mut self) {
self.worker_abort.abort();
}
}
impl RuntimeSseTapReader {
fn new(
inner: impl Read + Send + 'static,
shared: RuntimeRotationProxyShared,
profile_name: String,
prelude: &[u8],
remembered_response_ids: &[String],
) -> Self {
let mut state = RuntimeSseTapState {
remembered_response_ids: remembered_response_ids.iter().cloned().collect(),
..RuntimeSseTapState::default()
};
state.observe(&shared, &profile_name, prelude);
Self {
inner: Box::new(inner),
shared,
profile_name,
state,
}
}
}
impl Read for RuntimeSseTapReader {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let read = match self.inner.read(buf) {
Ok(read) => read,
Err(err) => {
let transport_error =
anyhow::Error::new(io::Error::new(err.kind(), err.to_string()));
note_runtime_profile_transport_failure(
&self.shared,
&self.profile_name,
RuntimeRouteKind::Responses,
"sse_read",
&transport_error,
);
return Err(err);
}
};
if read == 0 {
self.state.finish(&self.shared, &self.profile_name);
return Ok(0);
}
self.state
.observe(&self.shared, &self.profile_name, &buf[..read]);
Ok(read)
}
}
fn write_runtime_streaming_response(
writer: Box<dyn Write + Send + 'static>,
mut response: RuntimeStreamingResponse,
) -> io::Result<()> {
let mut writer = writer;
let flush_each_chunk = response.headers.iter().any(|(name, value)| {
name.eq_ignore_ascii_case("content-type")
&& value.to_ascii_lowercase().contains("text/event-stream")
});
let started_at = Instant::now();
let log_writer_error = |stage: &str,
chunk_count: usize,
total_bytes: usize,
err: &io::Error| {
runtime_proxy_log_to_path(
&response.log_path,
&format!(
"local_writer_error request={} transport=http profile={} stage={} chunks={} bytes={} elapsed_ms={} error={}",
response.request_id,
response.profile_name,
stage,
chunk_count,
total_bytes,
started_at.elapsed().as_millis(),
err
),
);
};
runtime_proxy_log_to_path(
&response.log_path,
&format!(
"request={} transport=http stream_start profile={} status={}",
response.request_id, response.profile_name, response.status
),
);
let status = reqwest::StatusCode::from_u16(response.status)
.ok()
.and_then(|status| status.canonical_reason().map(str::to_string))
.unwrap_or_else(|| "OK".to_string());
write!(
writer,
"HTTP/1.1 {} {}\r\nTransfer-Encoding: chunked\r\nConnection: close\r\n",
response.status, status
)
.map_err(|err| {
log_writer_error("headers_start", 0, 0, &err);
err
})?;
for (name, value) in response.headers {
write!(writer, "{name}: {value}\r\n").map_err(|err| {
log_writer_error("header_line", 0, 0, &err);
err
})?;
}
writer.write_all(b"\r\n").map_err(|err| {
log_writer_error("headers_end", 0, 0, &err);
err
})?;
writer.flush().map_err(|err| {
log_writer_error("headers_flush", 0, 0, &err);
err
})?;
let mut buffer = [0_u8; 8192];
let mut total_bytes = 0usize;
let mut chunk_count = 0usize;
loop {
let read = match response.body.read(&mut buffer) {
Ok(read) => read,
Err(err) => {
runtime_proxy_log_to_path(
&response.log_path,
&format!(
"request={} transport=http stream_read_error profile={} chunks={} bytes={} elapsed_ms={} error={}",
response.request_id,
response.profile_name,
chunk_count,
total_bytes,
started_at.elapsed().as_millis(),
err
),
);
let transport_error =
anyhow::Error::new(io::Error::new(err.kind(), err.to_string()));
if is_runtime_proxy_transport_failure(&transport_error) {
note_runtime_profile_latency_failure(
&response.shared,
&response.profile_name,
RuntimeRouteKind::Responses,
"stream_read_error",
);
}
return Err(err);
}
};
if read == 0 {
break;
}
if chunk_count == 0
&& runtime_take_fault_injection("PRODEX_RUNTIME_FAULT_STREAM_READ_ERROR_ONCE")
{
let err = io::Error::new(
io::ErrorKind::ConnectionReset,
"injected runtime stream read failure",
);
runtime_proxy_log_to_path(
&response.log_path,
&format!(
"request={} transport=http stream_read_error profile={} chunks={} bytes={} elapsed_ms={} error={}",
response.request_id,
response.profile_name,
chunk_count,
total_bytes,
started_at.elapsed().as_millis(),
err
),
);
note_runtime_profile_latency_failure(
&response.shared,
&response.profile_name,
RuntimeRouteKind::Responses,
"stream_read_error",
);
return Err(err);
}
chunk_count += 1;
total_bytes += read;
if chunk_count == 1 {
runtime_proxy_log_to_path(
&response.log_path,
&format!(
"request={} transport=http first_local_chunk profile={} bytes={} elapsed_ms={}",
response.request_id,
response.profile_name,
read,
started_at.elapsed().as_millis()
),
);
note_runtime_profile_latency_observation(
&response.shared,
&response.profile_name,
RuntimeRouteKind::Responses,
"ttfb",
started_at.elapsed().as_millis() as u64,
);
}
write!(writer, "{:X}\r\n", read).map_err(|err| {
log_writer_error("chunk_size", chunk_count, total_bytes, &err);
err
})?;
writer.write_all(&buffer[..read]).map_err(|err| {
log_writer_error("chunk_body", chunk_count, total_bytes, &err);
err
})?;
writer.write_all(b"\r\n").map_err(|err| {
log_writer_error("chunk_suffix", chunk_count, total_bytes, &err);
err
})?;
if flush_each_chunk || chunk_count == 1 {
writer.flush().map_err(|err| {
log_writer_error("chunk_flush", chunk_count, total_bytes, &err);
err
})?;
}
}
writer.write_all(b"0\r\n\r\n").map_err(|err| {
log_writer_error("trailer", chunk_count, total_bytes, &err);
err
})?;
writer.flush().map_err(|err| {
log_writer_error("trailer_flush", chunk_count, total_bytes, &err);
err
})?;
runtime_proxy_log_to_path(
&response.log_path,
&format!(
"request={} transport=http stream_complete profile={} chunks={} bytes={} elapsed_ms={}",
response.request_id,
response.profile_name,
chunk_count,
total_bytes,
started_at.elapsed().as_millis()
),
);
note_runtime_profile_latency_observation(
&response.shared,
&response.profile_name,
RuntimeRouteKind::Responses,
"stream_complete",
started_at.elapsed().as_millis() as u64,
);
Ok(())
}
fn build_runtime_proxy_text_response_parts(
status: u16,
message: &str,
) -> RuntimeBufferedResponseParts {
RuntimeBufferedResponseParts {
status,
headers: vec![(
"Content-Type".to_string(),
b"text/plain; charset=utf-8".to_vec(),
)],
body: message.as_bytes().to_vec(),
}
}
fn build_runtime_proxy_text_response(status: u16, message: &str) -> tiny_http::ResponseBox {
build_runtime_proxy_response_from_parts(build_runtime_proxy_text_response_parts(
status, message,
))
}
fn runtime_proxy_header_value(headers: &reqwest::header::HeaderMap, name: &str) -> Option<String> {
headers
.get(name)
.and_then(|value| value.to_str().ok())
.map(str::trim)
.filter(|value| !value.is_empty())
.map(str::to_string)
}
fn runtime_proxy_tungstenite_header_value(
headers: &tungstenite::http::HeaderMap,
name: &str,
) -> Option<String> {
headers
.get(name)
.and_then(|value| value.to_str().ok())
.map(str::trim)
.filter(|value| !value.is_empty())
.map(str::to_string)
}
fn is_runtime_proxy_transport_failure(err: &anyhow::Error) -> bool {
runtime_proxy_transport_failure_kind(err).is_some()
}
fn build_runtime_proxy_json_error_parts(
status: u16,
code: &str,
message: &str,
) -> RuntimeBufferedResponseParts {
let body = serde_json::json!({
"error": {
"code": code,
"message": message,
}
})
.to_string();
RuntimeBufferedResponseParts {
status,
headers: vec![("Content-Type".to_string(), b"application/json".to_vec())],
body: body.into_bytes(),
}
}
fn build_runtime_proxy_json_error_response(
status: u16,
code: &str,
message: &str,
) -> tiny_http::ResponseBox {
build_runtime_proxy_response_from_parts(build_runtime_proxy_json_error_parts(
status, code, message,
))
}
struct RuntimeBufferedResponseParts {
status: u16,
headers: Vec<(String, Vec<u8>)>,
body: Vec<u8>,
}
fn is_runtime_responses_path(path_and_query: &str) -> bool {
let normalized_path_and_query = runtime_proxy_normalize_openai_path(path_and_query);
path_without_query(normalized_path_and_query.as_ref()).ends_with("/codex/responses")
}
fn is_runtime_anthropic_messages_path(path_and_query: &str) -> bool {
path_without_query(path_and_query).ends_with(RUNTIME_PROXY_ANTHROPIC_MESSAGES_PATH)
}
fn is_runtime_compact_path(path_and_query: &str) -> bool {
let normalized_path_and_query = runtime_proxy_normalize_openai_path(path_and_query);
path_without_query(normalized_path_and_query.as_ref()).ends_with("/responses/compact")
}
fn path_without_query(path_and_query: &str) -> &str {
path_and_query
.split_once('?')
.map(|(path, _)| path)
.unwrap_or(path_and_query)
}
fn runtime_proxy_openai_suffix(path: &str) -> Option<&str> {
if let Some(suffix) = path.strip_prefix(LEGACY_RUNTIME_PROXY_OPENAI_MOUNT_PATH_PREFIX)
&& let Some(version_suffix_index) = suffix.find('/')
{
return Some(&suffix[version_suffix_index..]);
}
if let Some(suffix) = path.strip_prefix(RUNTIME_PROXY_OPENAI_MOUNT_PATH)
&& (suffix.is_empty() || suffix.starts_with('/'))
{
return Some(suffix);
}
None
}
fn runtime_proxy_normalize_openai_path(path_and_query: &str) -> Cow<'_, str> {
let (path, query) = match path_and_query.split_once('?') {
Some((path, query)) => (path, Some(query)),
None => (path_and_query, None),
};
let Some(suffix) = runtime_proxy_openai_suffix(path) else {
return Cow::Borrowed(path_and_query);
};
let mut normalized =
String::with_capacity(path_and_query.len() + RUNTIME_PROXY_OPENAI_UPSTREAM_PATH.len());
normalized.push_str(RUNTIME_PROXY_OPENAI_UPSTREAM_PATH);
normalized.push_str(suffix);
if let Some(query) = query {
normalized.push('?');
normalized.push_str(query);
}
Cow::Owned(normalized)
}
impl RuntimePrefetchStream {
fn spawn(
response: reqwest::Response,
async_runtime: Arc<TokioRuntime>,
log_path: PathBuf,
request_id: u64,
) -> Self {
let (sender, receiver) =
mpsc::sync_channel::<RuntimePrefetchChunk>(RUNTIME_PROXY_PREFETCH_QUEUE_CAPACITY);
let shared = Arc::new(RuntimePrefetchSharedState::default());
let worker_shared = Arc::clone(&shared);
let worker = async_runtime.spawn(async move {
runtime_prefetch_response_chunks(response, sender, worker_shared, log_path, request_id)
.await;
});
let worker_abort = worker.abort_handle();
Self {
receiver: Some(receiver),
shared,
backlog: VecDeque::new(),
worker_abort: Some(worker_abort),
}
}
fn recv_timeout(
&mut self,
timeout: Duration,
) -> std::result::Result<RuntimePrefetchChunk, RecvTimeoutError> {
if let Some(chunk) = self.backlog.pop_front() {
return Ok(chunk);
}
let chunk = self
.receiver
.as_ref()
.expect("runtime prefetch receiver should remain available")
.recv_timeout(timeout)?;
if let RuntimePrefetchChunk::Data(bytes) = &chunk {
runtime_prefetch_release_queued_bytes(&self.shared, bytes.len());
}
Ok(chunk)
}
fn push_backlog(&mut self, chunk: RuntimePrefetchChunk) {
self.backlog.push_back(chunk);
}
fn into_reader(mut self, prelude: Vec<u8>) -> RuntimePrefetchReader {
RuntimePrefetchReader {
receiver: self
.receiver
.take()
.expect("runtime prefetch receiver should remain available"),
shared: Arc::clone(&self.shared),
backlog: std::mem::take(&mut self.backlog),
pending: Cursor::new(prelude),
finished: false,
worker_abort: self
.worker_abort
.take()
.expect("runtime prefetch abort handle should remain available"),
}
}
}
impl Drop for RuntimePrefetchStream {
fn drop(&mut self) {
if let Some(worker_abort) = self.worker_abort.take() {
worker_abort.abort();
}
}
}
fn runtime_prefetch_set_terminal_error(
shared: &RuntimePrefetchSharedState,
kind: io::ErrorKind,
message: impl Into<String>,
) {
let mut terminal_error = shared
.terminal_error
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
if terminal_error.is_none() {
*terminal_error = Some((kind, message.into()));
}
}
fn runtime_prefetch_terminal_error(
shared: &RuntimePrefetchSharedState,
) -> Option<(io::ErrorKind, String)> {
shared
.terminal_error
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
.clone()
}
fn runtime_prefetch_release_queued_bytes(shared: &RuntimePrefetchSharedState, bytes: usize) {
if bytes > 0 {
shared.queued_bytes.fetch_sub(bytes, Ordering::SeqCst);
}
}
async fn runtime_prefetch_send_with_wait(
sender: &SyncSender<RuntimePrefetchChunk>,
shared: &RuntimePrefetchSharedState,
chunk: Vec<u8>,
) -> RuntimePrefetchSendOutcome {
let started_at = Instant::now();
let retry_delay = Duration::from_millis(runtime_proxy_prefetch_backpressure_retry_ms());
let timeout = Duration::from_millis(runtime_proxy_prefetch_backpressure_timeout_ms());
let buffered_limit = runtime_proxy_prefetch_max_buffered_bytes().max(1);
let mut pending = RuntimePrefetchChunk::Data(chunk);
let mut retries = 0usize;
loop {
let chunk_bytes = match &pending {
RuntimePrefetchChunk::Data(bytes) => bytes.len(),
RuntimePrefetchChunk::End | RuntimePrefetchChunk::Error(_, _) => 0,
};
let queued_bytes = shared.queued_bytes.load(Ordering::SeqCst);
if queued_bytes.saturating_add(chunk_bytes) > buffered_limit {
if started_at.elapsed() >= timeout {
return RuntimePrefetchSendOutcome::TimedOut {
message: format!(
"runtime prefetch buffered bytes exceeded safe limit ({} > {})",
queued_bytes.saturating_add(chunk_bytes),
buffered_limit
),
};
}
retries = retries.saturating_add(1);
let remaining = timeout.saturating_sub(started_at.elapsed());
let sleep_for = retry_delay.min(remaining);
if !sleep_for.is_zero() {
tokio::time::sleep(sleep_for).await;
}
continue;
}
match sender.try_send(pending) {
Ok(()) => {
if chunk_bytes > 0 {
shared.queued_bytes.fetch_add(chunk_bytes, Ordering::SeqCst);
}
return RuntimePrefetchSendOutcome::Sent {
wait_ms: started_at.elapsed().as_millis(),
retries,
};
}
Err(TrySendError::Disconnected(_)) => {
return RuntimePrefetchSendOutcome::Disconnected;
}
Err(TrySendError::Full(returned)) => {
if started_at.elapsed() >= timeout {
return RuntimePrefetchSendOutcome::TimedOut {
message: format!(
"runtime prefetch backlog exceeded bounded capacity ({})",
RUNTIME_PROXY_PREFETCH_QUEUE_CAPACITY
),
};
}
pending = returned;
retries = retries.saturating_add(1);
let remaining = timeout.saturating_sub(started_at.elapsed());
let sleep_for = retry_delay.min(remaining);
if !sleep_for.is_zero() {
tokio::time::sleep(sleep_for).await;
}
}
}
}
}
async fn runtime_prefetch_response_chunks(
mut response: reqwest::Response,
sender: SyncSender<RuntimePrefetchChunk>,
shared: Arc<RuntimePrefetchSharedState>,
log_path: PathBuf,
request_id: u64,
) {
let mut saw_data = false;
loop {
match response.chunk().await {
Ok(None) => {
runtime_proxy_log_to_path(
&log_path,
&format!(
"request={request_id} transport=http upstream_stream_end saw_data={saw_data}"
),
);
let _ = sender.try_send(RuntimePrefetchChunk::End);
break;
}
Ok(Some(chunk)) => {
if !saw_data {
saw_data = true;
runtime_proxy_log_to_path(
&log_path,
&format!(
"request={request_id} transport=http first_upstream_chunk bytes={}",
chunk.len()
),
);
}
if chunk.len() > RUNTIME_PROXY_PREFETCH_MAX_CHUNK_BYTES {
let message = format!(
"runtime upstream chunk exceeded prefetch limit ({} > {})",
chunk.len(),
RUNTIME_PROXY_PREFETCH_MAX_CHUNK_BYTES
);
runtime_prefetch_set_terminal_error(
&shared,
io::ErrorKind::InvalidData,
message.clone(),
);
runtime_proxy_log_to_path(
&log_path,
&format!(
"request={request_id} transport=http prefetch_chunk_too_large bytes={} limit={} error={message}",
chunk.len(),
RUNTIME_PROXY_PREFETCH_MAX_CHUNK_BYTES,
),
);
let _ = sender.try_send(RuntimePrefetchChunk::Error(
io::ErrorKind::InvalidData,
message,
));
break;
}
let chunk_bytes = chunk.len();
match runtime_prefetch_send_with_wait(&sender, &shared, chunk.to_vec()).await {
RuntimePrefetchSendOutcome::Sent { wait_ms, retries } => {
if retries > 0 {
runtime_proxy_log_to_path(
&log_path,
&format!(
"request={request_id} transport=http prefetch_backpressure_recovered bytes={chunk_bytes} retries={retries} wait_ms={wait_ms}",
),
);
}
}
RuntimePrefetchSendOutcome::TimedOut { message } => {
runtime_prefetch_set_terminal_error(
&shared,
io::ErrorKind::WouldBlock,
message.clone(),
);
runtime_proxy_log_to_path(
&log_path,
&format!(
"request={request_id} transport=http prefetch_backpressure_timeout bytes={chunk_bytes} capacity={} error={message}",
RUNTIME_PROXY_PREFETCH_QUEUE_CAPACITY,
),
);
break;
}
RuntimePrefetchSendOutcome::Disconnected => {
runtime_proxy_log_to_path(
&log_path,
&format!(
"request={request_id} transport=http prefetch_receiver_disconnected"
),
);
break;
}
}
}
Err(err) => {
let kind = runtime_reqwest_error_kind(&err);
runtime_prefetch_set_terminal_error(&shared, kind, err.to_string());
runtime_proxy_log_to_path(
&log_path,
&format!(
"request={request_id} transport=http upstream_stream_error kind={kind:?} error={err}"
),
);
let _ = sender.try_send(RuntimePrefetchChunk::Error(kind, err.to_string()));
break;
}
}
}
}
fn inspect_runtime_sse_lookahead(
prefetch: &mut RuntimePrefetchStream,
log_path: &Path,
request_id: u64,
) -> Result<RuntimeSseInspection> {
let deadline = Instant::now() + Duration::from_millis(runtime_proxy_sse_lookahead_timeout_ms());
let mut buffered = Vec::new();
loop {
if buffered.len() >= RUNTIME_PROXY_SSE_LOOKAHEAD_BYTES {
break;
}
let now = Instant::now();
if now >= deadline {
break;
}
let remaining = deadline.saturating_duration_since(now);
match prefetch.recv_timeout(remaining) {
Ok(RuntimePrefetchChunk::Data(chunk)) => {
buffered.extend_from_slice(&chunk);
match inspect_runtime_sse_buffer(&buffered)? {
RuntimeSseInspectionProgress::Commit { response_ids } => {
runtime_proxy_log_to_path(
log_path,
&format!(
"request={request_id} transport=http lookahead_commit bytes={} response_ids={}",
buffered.len(),
response_ids.len()
),
);
return Ok(RuntimeSseInspection::Commit {
prelude: buffered,
response_ids,
});
}
RuntimeSseInspectionProgress::Hold { .. } => {}
RuntimeSseInspectionProgress::QuotaBlocked => {
runtime_proxy_log_to_path(
log_path,
&format!(
"request={request_id} transport=http lookahead_retryable_signal bytes={}",
buffered.len()
),
);
return Ok(RuntimeSseInspection::QuotaBlocked(buffered));
}
RuntimeSseInspectionProgress::PreviousResponseNotFound => {
runtime_proxy_log_to_path(
log_path,
&format!(
"request={request_id} transport=http lookahead_retryable_signal bytes={}",
buffered.len()
),
);
return Ok(RuntimeSseInspection::PreviousResponseNotFound(buffered));
}
}
}
Ok(RuntimePrefetchChunk::End) => break,
Ok(RuntimePrefetchChunk::Error(kind, message)) => {
if buffered.is_empty() {
runtime_proxy_log_to_path(
log_path,
&format!(
"request={request_id} transport=http lookahead_error_before_bytes kind={kind:?} error={message}"
),
);
return Err(anyhow::Error::new(io::Error::new(kind, message))
.context("failed to inspect runtime auto-rotate SSE stream"));
}
prefetch.push_backlog(RuntimePrefetchChunk::Error(kind, message));
break;
}
Err(RecvTimeoutError::Timeout) => {
runtime_proxy_log_to_path(
log_path,
&format!(
"request={request_id} transport=http lookahead_timeout bytes={}",
buffered.len()
),
);
break;
}
Err(RecvTimeoutError::Disconnected) => {
runtime_proxy_log_to_path(
log_path,
&format!(
"request={request_id} transport=http lookahead_channel_disconnected bytes={}",
buffered.len()
),
);
break;
}
}
}
match inspect_runtime_sse_buffer(&buffered)? {
RuntimeSseInspectionProgress::Commit { response_ids }
| RuntimeSseInspectionProgress::Hold { response_ids } => {
if !buffered.is_empty() {
runtime_proxy_log_to_path(
log_path,
&format!(
"request={request_id} transport=http lookahead_budget_exhausted bytes={} response_ids={}",
buffered.len(),
response_ids.len()
),
);
}
Ok(RuntimeSseInspection::Commit {
prelude: buffered,
response_ids,
})
}
RuntimeSseInspectionProgress::QuotaBlocked => {
Ok(RuntimeSseInspection::QuotaBlocked(buffered))
}
RuntimeSseInspectionProgress::PreviousResponseNotFound => {
Ok(RuntimeSseInspection::PreviousResponseNotFound(buffered))
}
}
}
fn inspect_runtime_sse_buffer(buffered: &[u8]) -> Result<RuntimeSseInspectionProgress> {
let mut line = Vec::new();
let mut data_lines = Vec::new();
let mut response_ids = BTreeSet::new();
let mut saw_commit_ready_event = false;
for byte in buffered {
line.push(*byte);
if *byte != b'\n' {
continue;
}
let line_text = String::from_utf8_lossy(&line);
let trimmed = line_text.trim_end_matches(['\r', '\n']);
if trimmed.is_empty() {
let event = parse_runtime_sse_event(&data_lines);
if event.quota_blocked {
return Ok(RuntimeSseInspectionProgress::QuotaBlocked);
}
if event.previous_response_not_found {
return Ok(RuntimeSseInspectionProgress::PreviousResponseNotFound);
}
response_ids.extend(event.response_ids);
if !data_lines.is_empty()
&& !event
.event_type
.as_deref()
.is_some_and(runtime_proxy_precommit_hold_event_kind)
{
saw_commit_ready_event = true;
}
data_lines.clear();
line.clear();
continue;
}
if let Some(payload) = trimmed.strip_prefix("data:") {
data_lines.push(payload.trim_start().to_string());
}
line.clear();
}
if saw_commit_ready_event {
Ok(RuntimeSseInspectionProgress::Commit {
response_ids: response_ids.into_iter().collect(),
})
} else {
Ok(RuntimeSseInspectionProgress::Hold {
response_ids: response_ids.into_iter().collect(),
})
}
}
fn buffer_runtime_proxy_async_response_parts(
shared: &RuntimeRotationProxyShared,
mut response: reqwest::Response,
prelude: Vec<u8>,
) -> Result<RuntimeBufferedResponseParts> {
let status = response.status().as_u16();
let mut headers = Vec::new();
for (name, value) in response.headers() {
if should_skip_runtime_response_header(name.as_str()) {
continue;
}
headers.push((name.as_str().to_string(), value.as_bytes().to_vec()));
}
let body = shared.async_runtime.block_on(async move {
let mut body = prelude;
loop {
let next = response
.chunk()
.await
.context("failed to read upstream runtime response body chunk")?;
let Some(chunk) = next else {
break;
};
if body.len().saturating_add(chunk.len()) > RUNTIME_PROXY_BUFFERED_RESPONSE_MAX_BYTES {
return Err(anyhow::Error::new(io::Error::new(
io::ErrorKind::InvalidData,
format!(
"runtime buffered response exceeded safe size limit ({})",
RUNTIME_PROXY_BUFFERED_RESPONSE_MAX_BYTES
),
)));
}
body.extend_from_slice(&chunk);
}
Ok::<Vec<u8>, anyhow::Error>(body)
})?;
Ok(RuntimeBufferedResponseParts {
status,
headers,
body,
})
}
fn runtime_reqwest_error_kind(err: &reqwest::Error) -> io::ErrorKind {
match runtime_transport_failure_kind_from_reqwest(err) {
Some(
RuntimeTransportFailureKind::ConnectTimeout | RuntimeTransportFailureKind::ReadTimeout,
) => io::ErrorKind::TimedOut,
Some(RuntimeTransportFailureKind::ConnectRefused) => io::ErrorKind::ConnectionRefused,
Some(RuntimeTransportFailureKind::ConnectReset) => io::ErrorKind::ConnectionReset,
Some(RuntimeTransportFailureKind::ConnectionAborted) => io::ErrorKind::ConnectionAborted,
Some(RuntimeTransportFailureKind::BrokenPipe) => io::ErrorKind::BrokenPipe,
Some(RuntimeTransportFailureKind::UnexpectedEof) => io::ErrorKind::UnexpectedEof,
_ => io::ErrorKind::Other,
}
}
fn build_runtime_proxy_response_from_parts(
parts: RuntimeBufferedResponseParts,
) -> tiny_http::ResponseBox {
let status = TinyStatusCode(parts.status);
let headers = parts
.headers
.into_iter()
.filter_map(|(name, value)| TinyHeader::from_bytes(name.as_bytes(), value).ok())
.collect::<Vec<_>>();
let body_len = parts.body.len();
TinyResponse::new(
status,
headers,
Box::new(Cursor::new(parts.body)),
Some(body_len),
None,
)
.boxed()
}
fn runtime_buffered_response_content_type(parts: &RuntimeBufferedResponseParts) -> Option<&str> {
parts.headers.iter().find_map(|(name, value)| {
name.eq_ignore_ascii_case("content-type")
.then(|| std::str::from_utf8(value).ok())
.flatten()
.map(str::trim)
.filter(|value| !value.is_empty())
})
}
fn runtime_anthropic_message_id() -> String {
format!("msg_{}", runtime_random_token("claude").replace('-', ""))
}
fn runtime_anthropic_error_type_for_status(status: u16) -> &'static str {
match status {
400 => "invalid_request_error",
401 => "authentication_error",
403 => "permission_error",
404 => "not_found_error",
429 => "rate_limit_error",
500 | 502 | 503 | 504 | 529 => "overloaded_error",
_ => "api_error",
}
}
fn runtime_anthropic_error_message_from_parts(parts: &RuntimeBufferedResponseParts) -> String {
if let Ok(value) = serde_json::from_slice::<serde_json::Value>(&parts.body) {
if let Some(message) = value
.get("error")
.and_then(|error| error.get("message"))
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
{
return message.to_string();
}
if let Some(message) = value
.get("message")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
{
return message.to_string();
}
}
let body = String::from_utf8_lossy(&parts.body).trim().to_string();
if body.is_empty() {
"Upstream runtime proxy request failed.".to_string()
} else {
body
}
}
fn build_runtime_anthropic_error_parts(
status: u16,
error_type: &str,
message: &str,
) -> RuntimeBufferedResponseParts {
RuntimeBufferedResponseParts {
status,
headers: vec![("Content-Type".to_string(), b"application/json".to_vec())],
body: serde_json::json!({
"type": "error",
"error": {
"type": error_type,
"message": message,
}
})
.to_string()
.into_bytes(),
}
}
fn runtime_anthropic_error_from_upstream_parts(
parts: RuntimeBufferedResponseParts,
) -> RuntimeBufferedResponseParts {
let status = parts.status;
let message = runtime_anthropic_error_message_from_parts(&parts);
build_runtime_anthropic_error_parts(
status,
runtime_anthropic_error_type_for_status(status),
&message,
)
}
fn runtime_anthropic_usage_from_value(value: &serde_json::Value) -> (u64, u64, Option<u64>) {
let usage = value.get("usage").or_else(|| {
value
.get("response")
.and_then(|response| response.get("usage"))
});
let input_tokens = usage
.and_then(|usage| usage.get("input_tokens"))
.and_then(serde_json::Value::as_u64)
.unwrap_or(0);
let output_tokens = usage
.and_then(|usage| usage.get("output_tokens"))
.and_then(serde_json::Value::as_u64)
.unwrap_or(0);
let cached_tokens = usage
.and_then(|usage| usage.get("input_tokens_details"))
.and_then(|details| details.get("cached_tokens"))
.and_then(serde_json::Value::as_u64);
(input_tokens, output_tokens, cached_tokens)
}
fn runtime_anthropic_tool_input_from_arguments(arguments: &str) -> serde_json::Value {
serde_json::from_str::<serde_json::Value>(arguments)
.ok()
.filter(|value| value.is_object())
.unwrap_or_else(|| serde_json::Value::Object(serde_json::Map::new()))
}
fn runtime_anthropic_reasoning_summary_text(item: &serde_json::Value) -> String {
item.get("summary")
.and_then(serde_json::Value::as_array)
.map(|summary| {
summary
.iter()
.filter_map(|entry| {
entry
.get("text")
.and_then(serde_json::Value::as_str)
.or_else(|| {
(entry.get("type").and_then(serde_json::Value::as_str)
== Some("summary_text"))
.then(|| entry.get("text").and_then(serde_json::Value::as_str))
.flatten()
})
})
.collect::<Vec<_>>()
.join("\n")
})
.unwrap_or_default()
}
fn runtime_anthropic_message_annotation_titles_by_url(
output: &[serde_json::Value],
) -> BTreeMap<String, String> {
let mut titles = BTreeMap::new();
for item in output {
let Some(parts) = item.get("content").and_then(serde_json::Value::as_array) else {
continue;
};
for part in parts {
let Some(annotations) = part
.get("annotations")
.and_then(serde_json::Value::as_array)
else {
continue;
};
for annotation in annotations {
let url = annotation
.get("url")
.and_then(serde_json::Value::as_str)
.or_else(|| {
annotation
.get("url_citation")
.and_then(|value| value.get("url"))
.and_then(serde_json::Value::as_str)
})
.map(str::trim)
.filter(|value| !value.is_empty());
let title = annotation
.get("title")
.and_then(serde_json::Value::as_str)
.or_else(|| {
annotation
.get("url_citation")
.and_then(|value| value.get("title"))
.and_then(serde_json::Value::as_str)
})
.map(str::trim)
.filter(|value| !value.is_empty());
if let (Some(url), Some(title)) = (url, title) {
titles
.entry(url.to_string())
.or_insert_with(|| title.to_string());
}
}
}
}
titles
}
fn runtime_anthropic_web_search_blocks_from_output_item(
item: &serde_json::Value,
annotation_titles_by_url: &BTreeMap<String, String>,
) -> Vec<serde_json::Value> {
let call_id = item
.get("id")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.unwrap_or("web_search_call")
.to_string();
let query = item
.get("action")
.and_then(|action| action.get("query"))
.and_then(serde_json::Value::as_str)
.or_else(|| {
item.get("action")
.and_then(|action| action.get("queries"))
.and_then(serde_json::Value::as_array)
.and_then(|queries| queries.first())
.and_then(serde_json::Value::as_str)
})
.map(str::trim)
.filter(|value| !value.is_empty())
.unwrap_or_default()
.to_string();
let mut seen_urls = BTreeSet::new();
let mut results = Vec::new();
if let Some(sources) = item
.get("action")
.and_then(|action| action.get("sources"))
.and_then(serde_json::Value::as_array)
{
for source in sources {
let Some(url) = source
.get("url")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
else {
continue;
};
if !seen_urls.insert(url.to_string()) {
continue;
}
let mut result = serde_json::Map::new();
result.insert(
"type".to_string(),
serde_json::Value::String("web_search_result".to_string()),
);
result.insert(
"url".to_string(),
serde_json::Value::String(url.to_string()),
);
if let Some(title) = source
.get("title")
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
.or_else(|| annotation_titles_by_url.get(url).map(String::as_str))
{
result.insert(
"title".to_string(),
serde_json::Value::String(title.to_string()),
);
}
for key in ["encrypted_content", "page_age"] {
if let Some(value) = source
.get(key)
.and_then(serde_json::Value::as_str)
.map(str::trim)
.filter(|value| !value.is_empty())
{
result.insert(
key.to_string(),
serde_json::Value::String(value.to_string()),
);
}
}
results.push(serde_json::Value::Object(result));
}
}
if results.is_empty() {
for (url, title) in annotation_titles_by_url {
if !seen_urls.insert(url.clone()) {
continue;
}
results.push(serde_json::json!({
"type": "web_search_result",
"url": url,
"title": title,
}));
}
}
vec![
serde_json::json!({
"type": "server_tool_use",
"id": call_id,
"name": "web_search",
"input": {
"query": query,
},
}),
serde_json::json!({
"type": "web_search_tool_result",
"tool_use_id": call_id,
"content": results,
}),
]
}
fn runtime_anthropic_output_blocks_from_json(
output: &[serde_json::Value],
want_thinking: bool,
) -> (Vec<serde_json::Value>, bool) {
let mut content = Vec::new();
let mut has_tool_calls = false;
let annotation_titles_by_url = runtime_anthropic_message_annotation_titles_by_url(output);
for item in output {
match item.get("type").and_then(serde_json::Value::as_str) {
Some("reasoning") if want_thinking => {
let thinking = runtime_anthropic_reasoning_summary_text(item);
if !thinking.is_empty() {
content.push(serde_json::json!({
"type": "thinking",
"thinking": thinking,
}));
}
}
Some("message") => {
if let Some(parts) = item.get("content").and_then(serde_json::Value::as_array) {
let mut text = String::new();
for part in parts {
if part
.get("type")
.and_then(serde_json::Value::as_str)
.is_some_and(|part_type| matches!(part_type, "output_text" | "text"))
&& let Some(part_text) =
part.get("text").and_then(serde_json::Value::as_str)
{
text.push_str(part_text);
}
}
if !text.is_empty() {
content.push(serde_json::json!({
"type": "text",
"text": text,
}));
}
}
}
Some("web_search_call") => {
content.extend(runtime_anthropic_web_search_blocks_from_output_item(
item,
&annotation_titles_by_url,
));
}
Some("function_call") => {
has_tool_calls = true;
content.push(serde_json::json!({
"type": "tool_use",
"id": item
.get("call_id")
.and_then(serde_json::Value::as_str)
.unwrap_or("tool_call"),
"name": item
.get("name")
.and_then(serde_json::Value::as_str)
.unwrap_or("tool"),
"input": runtime_anthropic_tool_input_from_arguments(
item.get("arguments")
.and_then(serde_json::Value::as_str)
.unwrap_or("{}"),
),
}));
}
_ => {}
}
}
if content.is_empty() {
content.push(serde_json::json!({
"type": "text",
"text": "",
}));
}
(content, has_tool_calls)
}
fn runtime_anthropic_response_from_json_value(
value: &serde_json::Value,
requested_model: &str,
want_thinking: bool,
) -> serde_json::Value {
let (input_tokens, output_tokens, cached_tokens) = runtime_anthropic_usage_from_value(value);
let output = value
.get("output")
.and_then(serde_json::Value::as_array)
.cloned()
.unwrap_or_default();
let (content, has_tool_calls) =
runtime_anthropic_output_blocks_from_json(&output, want_thinking);
let mut usage = serde_json::Map::new();
usage.insert(
"input_tokens".to_string(),
serde_json::Value::Number(input_tokens.into()),
);
usage.insert(
"output_tokens".to_string(),
serde_json::Value::Number(output_tokens.into()),
);
if let Some(cached_tokens) = cached_tokens {
usage.insert(
"cache_read_input_tokens".to_string(),
serde_json::Value::Number(cached_tokens.into()),
);
}
serde_json::json!({
"id": runtime_anthropic_message_id(),
"type": "message",
"role": "assistant",
"content": content,
"model": requested_model,
"stop_reason": if has_tool_calls { "tool_use" } else { "end_turn" },
"stop_sequence": serde_json::Value::Null,
"usage": usage,
})
}
#[derive(Debug, Clone, Default)]
struct RuntimeAnthropicCollectedToolUse {
call_id: String,
name: String,
arguments: String,
saw_delta: bool,
}
#[derive(Debug, Clone, Default)]
struct RuntimeAnthropicCollectedResponse {
content: Vec<serde_json::Value>,
pending_text: String,
pending_thinking: String,
active_tool_use: Option<RuntimeAnthropicCollectedToolUse>,
input_tokens: u64,
output_tokens: u64,
cached_tokens: Option<u64>,
has_tool_calls: bool,
want_thinking: bool,
}
impl RuntimeAnthropicCollectedResponse {
fn flush_text(&mut self) {
if self.pending_text.is_empty() {
return;
}
self.content.push(serde_json::json!({
"type": "text",
"text": std::mem::take(&mut self.pending_text),
}));
}
fn flush_thinking(&mut self) {
if self.pending_thinking.is_empty() {
return;
}
self.content.push(serde_json::json!({
"type": "thinking",
"thinking": std::mem::take(&mut self.pending_thinking),
}));
}
fn flush_pending_textual_content(&mut self) {
self.flush_thinking();
self.flush_text();
}
fn close_active_tool_use(&mut self) {
let Some(active_tool_use) = self.active_tool_use.take() else {
return;
};
self.has_tool_calls = true;
self.content.push(serde_json::json!({
"type": "tool_use",
"id": active_tool_use.call_id,
"name": active_tool_use.name,
"input": runtime_anthropic_tool_input_from_arguments(&active_tool_use.arguments),
}));
}
fn observe_event(&mut self, value: &serde_json::Value) -> Result<()> {
match value.get("type").and_then(serde_json::Value::as_str) {
Some("response.reasoning_summary_text.delta") if self.want_thinking => {
self.flush_text();
if let Some(delta) = value.get("delta").and_then(serde_json::Value::as_str) {
self.pending_thinking.push_str(delta);
}
}
Some("response.output_text.delta") => {
self.flush_thinking();
if let Some(delta) = value.get("delta").and_then(serde_json::Value::as_str) {
self.pending_text.push_str(delta);
}
}
Some("response.output_item.added") => {
if value
.get("item")
.and_then(|item| item.get("type"))
.and_then(serde_json::Value::as_str)
== Some("function_call")
{
self.flush_pending_textual_content();
self.active_tool_use = Some(RuntimeAnthropicCollectedToolUse {
call_id: value
.get("item")
.and_then(|item| item.get("call_id"))
.and_then(serde_json::Value::as_str)
.unwrap_or("tool_call")
.to_string(),
name: value
.get("item")
.and_then(|item| item.get("name"))
.and_then(serde_json::Value::as_str)
.unwrap_or("tool")
.to_string(),
..RuntimeAnthropicCollectedToolUse::default()
});
}
}
Some("response.function_call_arguments.delta") => {
if let Some(active_tool_use) = self.active_tool_use.as_mut()
&& let Some(delta) = value.get("delta").and_then(serde_json::Value::as_str)
{
active_tool_use.saw_delta = true;
active_tool_use.arguments.push_str(delta);
}
}
Some("response.function_call_arguments.done") => {
if let Some(active_tool_use) = self.active_tool_use.as_mut()
&& let Some(arguments) =
value.get("arguments").and_then(serde_json::Value::as_str)
&& !active_tool_use.saw_delta
{
active_tool_use.arguments = arguments.to_string();
}
}
Some("response.output_item.done") => {
if value
.get("item")
.and_then(|item| item.get("type"))
.and_then(serde_json::Value::as_str)
== Some("function_call")
{
if let Some(active_tool_use) = self.active_tool_use.as_mut() {
if let Some(arguments) = value
.get("item")
.and_then(|item| item.get("arguments"))
.and_then(serde_json::Value::as_str)
&& !active_tool_use.saw_delta
{
active_tool_use.arguments = arguments.to_string();
}
if let Some(name) = value
.get("item")
.and_then(|item| item.get("name"))
.and_then(serde_json::Value::as_str)
{
active_tool_use.name = name.to_string();
}
}
self.close_active_tool_use();
}
}
Some("response.completed") => {
let (input_tokens, output_tokens, cached_tokens) =
runtime_anthropic_usage_from_value(value);
self.input_tokens = input_tokens;
self.output_tokens = output_tokens;
self.cached_tokens = cached_tokens;
}
Some("error" | "response.failed") => {
let message = value
.get("error")
.and_then(|error| error.get("message"))
.and_then(serde_json::Value::as_str)
.unwrap_or("Codex returned an error.");
bail!(message.to_string());
}
_ => {}
}
Ok(())
}
fn into_response(mut self, requested_model: &str) -> serde_json::Value {
self.close_active_tool_use();
self.flush_pending_textual_content();
if self.content.is_empty() {
self.content.push(serde_json::json!({
"type": "text",
"text": "",
}));
}
let mut usage = serde_json::Map::new();
usage.insert(
"input_tokens".to_string(),
serde_json::Value::Number(self.input_tokens.into()),
);
usage.insert(
"output_tokens".to_string(),
serde_json::Value::Number(self.output_tokens.into()),
);
if let Some(cached_tokens) = self.cached_tokens {
usage.insert(
"cache_read_input_tokens".to_string(),
serde_json::Value::Number(cached_tokens.into()),
);
}
serde_json::json!({
"id": runtime_anthropic_message_id(),
"type": "message",
"role": "assistant",
"content": self.content,
"model": requested_model,
"stop_reason": if self.has_tool_calls { "tool_use" } else { "end_turn" },
"stop_sequence": serde_json::Value::Null,
"usage": usage,
})
}
}
fn runtime_anthropic_response_from_sse_bytes(
body: &[u8],
requested_model: &str,
want_thinking: bool,
) -> Result<serde_json::Value> {
let mut collected = RuntimeAnthropicCollectedResponse {
want_thinking,
..RuntimeAnthropicCollectedResponse::default()
};
let mut line = Vec::new();
let mut data_lines = Vec::new();
let mut process_event = |data_lines: &mut Vec<String>| -> Result<()> {
if data_lines.is_empty() {
return Ok(());
}
let payload = data_lines.join("\n");
let value = serde_json::from_str::<serde_json::Value>(&payload)
.context("failed to parse buffered Responses SSE payload")?;
collected.observe_event(&value)?;
data_lines.clear();
Ok(())
};
for byte in body {
line.push(*byte);
if *byte != b'\n' {
continue;
}
let line_text = String::from_utf8_lossy(&line);
let trimmed = line_text.trim_end_matches(['\r', '\n']);
if trimmed.is_empty() {
process_event(&mut data_lines)?;
line.clear();
continue;
}
if let Some(payload) = trimmed.strip_prefix("data:") {
data_lines.push(payload.trim_start().to_string());
}
line.clear();
}
if !line.is_empty() {
let line_text = String::from_utf8_lossy(&line);
let trimmed = line_text.trim_end_matches(['\r', '\n']);
if let Some(payload) = trimmed.strip_prefix("data:") {
data_lines.push(payload.trim_start().to_string());
}
}
process_event(&mut data_lines)?;
Ok(collected.into_response(requested_model))
}
fn runtime_anthropic_json_response_parts(value: serde_json::Value) -> RuntimeBufferedResponseParts {
RuntimeBufferedResponseParts {
status: 200,
headers: vec![("Content-Type".to_string(), b"application/json".to_vec())],
body: serde_json::to_vec(&value).unwrap_or_else(|_| b"{}".to_vec()),
}
}
fn runtime_anthropic_sse_event_bytes(event_type: &str, data: serde_json::Value) -> Vec<u8> {
format!(
"event: {event_type}\ndata: {}\n\n",
serde_json::to_string(&data).unwrap_or_else(|_| "{}".to_string())
)
.into_bytes()
}
fn runtime_anthropic_sse_response_parts_from_message_value(
value: serde_json::Value,
) -> RuntimeBufferedResponseParts {
let mut body = Vec::new();
let message_id = value
.get("id")
.and_then(serde_json::Value::as_str)
.unwrap_or("msg_prodex")
.to_string();
let model = value
.get("model")
.and_then(serde_json::Value::as_str)
.unwrap_or("claude-sonnet-4-6")
.to_string();
let stop_reason = value
.get("stop_reason")
.cloned()
.unwrap_or(serde_json::Value::Null);
let stop_sequence = value
.get("stop_sequence")
.cloned()
.unwrap_or(serde_json::Value::Null);
let usage = value
.get("usage")
.cloned()
.unwrap_or_else(|| serde_json::json!({}));
body.extend(runtime_anthropic_sse_event_bytes(
"message_start",
serde_json::json!({
"type": "message_start",
"message": {
"id": message_id,
"type": "message",
"role": "assistant",
"content": [],
"model": model,
"stop_reason": serde_json::Value::Null,
"stop_sequence": serde_json::Value::Null,
"usage": {
"input_tokens": 0,
"output_tokens": 0,
}
}
}),
));
for (index, block) in value
.get("content")
.and_then(serde_json::Value::as_array)
.into_iter()
.flatten()
.enumerate()
{
let index_value = serde_json::Value::Number((index as u64).into());
match block.get("type").and_then(serde_json::Value::as_str) {
Some("thinking") => {
body.extend(runtime_anthropic_sse_event_bytes(
"content_block_start",
serde_json::json!({
"type": "content_block_start",
"index": index_value,
"content_block": {
"type": "thinking",
"thinking": "",
}
}),
));
body.extend(runtime_anthropic_sse_event_bytes(
"content_block_delta",
serde_json::json!({
"type": "content_block_delta",
"index": index,
"delta": {
"type": "thinking_delta",
"thinking": block
.get("thinking")
.and_then(serde_json::Value::as_str)
.unwrap_or(""),
}
}),
));
}
Some("tool_use") => {
let input_json = block
.get("input")
.cloned()
.unwrap_or_else(|| serde_json::json!({}));
body.extend(runtime_anthropic_sse_event_bytes(
"content_block_start",
serde_json::json!({
"type": "content_block_start",
"index": index_value,
"content_block": {
"type": "tool_use",
"id": block.get("id").cloned().unwrap_or(serde_json::Value::String("tool_use".to_string())),
"name": block.get("name").cloned().unwrap_or(serde_json::Value::String("tool".to_string())),
"input": serde_json::json!({}),
}
}),
));
body.extend(runtime_anthropic_sse_event_bytes(
"content_block_delta",
serde_json::json!({
"type": "content_block_delta",
"index": index,
"delta": {
"type": "input_json_delta",
"partial_json": serde_json::to_string(&input_json)
.unwrap_or_else(|_| "{}".to_string()),
}
}),
));
}
Some("server_tool_use") => {
let input_json = block
.get("input")
.cloned()
.unwrap_or_else(|| serde_json::json!({}));
body.extend(runtime_anthropic_sse_event_bytes(
"content_block_start",
serde_json::json!({
"type": "content_block_start",
"index": index_value,
"content_block": {
"type": "server_tool_use",
"id": block.get("id").cloned().unwrap_or(serde_json::Value::String("server_tool_use".to_string())),
"name": block.get("name").cloned().unwrap_or(serde_json::Value::String("web_search".to_string())),
"input": serde_json::json!({}),
}
}),
));
body.extend(runtime_anthropic_sse_event_bytes(
"content_block_delta",
serde_json::json!({
"type": "content_block_delta",
"index": index,
"delta": {
"type": "input_json_delta",
"partial_json": serde_json::to_string(&input_json)
.unwrap_or_else(|_| "{}".to_string()),
}
}),
));
}
Some("web_search_tool_result") => {
body.extend(runtime_anthropic_sse_event_bytes(
"content_block_start",
serde_json::json!({
"type": "content_block_start",
"index": index_value,
"content_block": {
"type": "web_search_tool_result",
"tool_use_id": block.get("tool_use_id").cloned().unwrap_or(serde_json::Value::String("web_search_call".to_string())),
"content": block.get("content").cloned().unwrap_or_else(|| serde_json::Value::Array(Vec::new())),
}
}),
));
}
_ => {
body.extend(runtime_anthropic_sse_event_bytes(
"content_block_start",
serde_json::json!({
"type": "content_block_start",
"index": index_value,
"content_block": {
"type": "text",
"text": "",
}
}),
));
body.extend(runtime_anthropic_sse_event_bytes(
"content_block_delta",
serde_json::json!({
"type": "content_block_delta",
"index": index,
"delta": {
"type": "text_delta",
"text": block.get("text").and_then(serde_json::Value::as_str).unwrap_or(""),
}
}),
));
}
}
body.extend(runtime_anthropic_sse_event_bytes(
"content_block_stop",
serde_json::json!({
"type": "content_block_stop",
"index": index,
}),
));
}
body.extend(runtime_anthropic_sse_event_bytes(
"message_delta",
serde_json::json!({
"type": "message_delta",
"delta": {
"stop_reason": stop_reason,
"stop_sequence": stop_sequence,
},
"usage": usage,
}),
));
body.extend(runtime_anthropic_sse_event_bytes(
"message_stop",
serde_json::json!({
"type": "message_stop",
}),
));
RuntimeBufferedResponseParts {
status: 200,
headers: vec![("Content-Type".to_string(), b"text/event-stream".to_vec())],
body,
}
}
fn runtime_response_body_looks_like_sse(body: &[u8]) -> bool {
let trimmed = body
.iter()
.copied()
.skip_while(|byte| byte.is_ascii_whitespace());
let prefix = trimmed.take(8).collect::<Vec<_>>();
prefix.starts_with(b"event:") || prefix.starts_with(b"data:")
}
fn runtime_anthropic_sse_response_parts_from_responses_sse_bytes(
body: &[u8],
requested_model: &str,
want_thinking: bool,
) -> Result<RuntimeBufferedResponseParts> {
let mut reader = RuntimeAnthropicSseReader::new(
Box::new(Cursor::new(body.to_vec())),
requested_model.to_string(),
want_thinking,
);
let mut translated = Vec::new();
reader
.read_to_end(&mut translated)
.context("failed to translate buffered Responses SSE body")?;
Ok(RuntimeBufferedResponseParts {
status: 200,
headers: vec![("Content-Type".to_string(), b"text/event-stream".to_vec())],
body: translated,
})
}
fn buffer_runtime_streaming_response_parts(
response: RuntimeStreamingResponse,
) -> Result<RuntimeBufferedResponseParts> {
let RuntimeStreamingResponse {
status,
headers,
mut body,
..
} = response;
let mut buffered_body = Vec::new();
body.read_to_end(&mut buffered_body)
.context("failed to buffer streaming runtime response")?;
Ok(RuntimeBufferedResponseParts {
status,
headers: headers
.into_iter()
.map(|(name, value)| (name, value.into_bytes()))
.collect(),
body: buffered_body,
})
}
#[derive(Debug, Clone, Default)]
struct RuntimeAnthropicStreamToolUse {
call_id: String,
name: String,
arguments: String,
saw_delta: bool,
}
struct RuntimeAnthropicSseReader {
inner: Box<dyn Read + Send>,
pending: VecDeque<u8>,
upstream_line: Vec<u8>,
upstream_data_lines: Vec<String>,
message_id: String,
model: String,
want_thinking: bool,
content_index: usize,
thinking_block_open: bool,
text_block_open: bool,
has_tool_calls: bool,
has_content: bool,
input_tokens: u64,
output_tokens: u64,
cached_tokens: Option<u64>,
active_tool_use: Option<RuntimeAnthropicStreamToolUse>,
terminal_sent: bool,
inner_finished: bool,
}
impl RuntimeAnthropicSseReader {
fn new(inner: Box<dyn Read + Send>, model: String, want_thinking: bool) -> Self {
let mut reader = Self {
inner,
pending: VecDeque::new(),
upstream_line: Vec::new(),
upstream_data_lines: Vec::new(),
message_id: runtime_anthropic_message_id(),
model,
want_thinking,
content_index: 0,
thinking_block_open: false,
text_block_open: false,
has_tool_calls: false,
has_content: false,
input_tokens: 0,
output_tokens: 0,
cached_tokens: None,
active_tool_use: None,
terminal_sent: false,
inner_finished: false,
};
reader.push_event(
"message_start",
serde_json::json!({
"type": "message_start",
"message": {
"id": reader.message_id.clone(),
"type": "message",
"role": "assistant",
"content": [],
"model": reader.model.clone(),
"stop_reason": serde_json::Value::Null,
"stop_sequence": serde_json::Value::Null,
"usage": {
"input_tokens": 0,
"output_tokens": 0,
}
}
}),
);
reader
}
fn push_event(&mut self, event_type: &str, data: serde_json::Value) {
let frame = format!(
"event: {event_type}\ndata: {}\n\n",
serde_json::to_string(&data).unwrap_or_else(|_| "{}".to_string())
);
self.pending.extend(frame.into_bytes());
}
fn close_thinking_block(&mut self) {
if !self.thinking_block_open {
return;
}
self.push_event(
"content_block_stop",
serde_json::json!({
"type": "content_block_stop",
"index": self.content_index,
}),
);
self.content_index += 1;
self.thinking_block_open = false;
}
fn close_text_block(&mut self) {
if !self.text_block_open {
return;
}
self.push_event(
"content_block_stop",
serde_json::json!({
"type": "content_block_stop",
"index": self.content_index,
}),
);
self.content_index += 1;
self.text_block_open = false;
}
fn ensure_text_block(&mut self) {
if self.text_block_open {
return;
}
self.push_event(
"content_block_start",
serde_json::json!({
"type": "content_block_start",
"index": self.content_index,
"content_block": {
"type": "text",
"text": "",
}
}),
);
self.text_block_open = true;
}
fn ensure_thinking_block(&mut self) {
if self.thinking_block_open {
return;
}
self.push_event(
"content_block_start",
serde_json::json!({
"type": "content_block_start",
"index": self.content_index,
"content_block": {
"type": "thinking",
"thinking": "",
}
}),
);
self.thinking_block_open = true;
}
fn start_tool_use_block(&mut self, call_id: &str, name: &str) {
self.close_thinking_block();
self.close_text_block();
self.push_event(
"content_block_start",
serde_json::json!({
"type": "content_block_start",
"index": self.content_index,
"content_block": {
"type": "tool_use",
"id": call_id,
"name": name,
"input": {},
}
}),
);
self.active_tool_use = Some(RuntimeAnthropicStreamToolUse {
call_id: call_id.to_string(),
name: name.to_string(),
..RuntimeAnthropicStreamToolUse::default()
});
self.has_content = true;
self.has_tool_calls = true;
}
fn finish_active_tool_use(
&mut self,
arguments_override: Option<&str>,
name_override: Option<&str>,
call_id_override: Option<&str>,
) {
let Some(mut active_tool_use) = self.active_tool_use.take() else {
return;
};
if let Some(name) = name_override {
active_tool_use.name = name.to_string();
}
if let Some(call_id) = call_id_override {
active_tool_use.call_id = call_id.to_string();
}
if let Some(arguments) = arguments_override
&& !active_tool_use.saw_delta
{
active_tool_use.arguments = arguments.to_string();
}
if !active_tool_use.saw_delta && !active_tool_use.arguments.is_empty() {
self.push_event(
"content_block_delta",
serde_json::json!({
"type": "content_block_delta",
"index": self.content_index,
"delta": {
"type": "input_json_delta",
"partial_json": active_tool_use.arguments,
}
}),
);
}
self.push_event(
"content_block_stop",
serde_json::json!({
"type": "content_block_stop",
"index": self.content_index,
}),
);
self.content_index += 1;
}
fn finish_success(&mut self) {
if self.terminal_sent {
return;
}
self.finish_active_tool_use(None, None, None);
self.close_thinking_block();
self.close_text_block();
if !self.has_content {
self.ensure_text_block();
self.close_text_block();
}
let mut usage = serde_json::Map::new();
usage.insert(
"input_tokens".to_string(),
serde_json::Value::Number(self.input_tokens.into()),
);
usage.insert(
"output_tokens".to_string(),
serde_json::Value::Number(self.output_tokens.into()),
);
if let Some(cached_tokens) = self.cached_tokens {
usage.insert(
"cache_read_input_tokens".to_string(),
serde_json::Value::Number(cached_tokens.into()),
);
}
self.push_event(
"message_delta",
serde_json::json!({
"type": "message_delta",
"delta": {
"stop_reason": if self.has_tool_calls { "tool_use" } else { "end_turn" },
},
"usage": usage,
}),
);
self.push_event(
"message_stop",
serde_json::json!({
"type": "message_stop",
}),
);
self.terminal_sent = true;
self.inner_finished = true;
}
fn finish_error(&mut self, message: &str) {
if self.terminal_sent {
return;
}
self.finish_active_tool_use(None, None, None);
self.close_thinking_block();
self.close_text_block();
self.ensure_text_block();
self.push_event(
"content_block_delta",
serde_json::json!({
"type": "content_block_delta",
"index": self.content_index,
"delta": {
"type": "text_delta",
"text": format!("[Error] {message}"),
}
}),
);
self.has_content = true;
self.close_text_block();
self.push_event(
"error",
serde_json::json!({
"type": "error",
"error": {
"type": "api_error",
"message": message,
}
}),
);
self.push_event(
"message_stop",
serde_json::json!({
"type": "message_stop",
}),
);
self.terminal_sent = true;
self.inner_finished = true;
}
fn observe_upstream_event(&mut self, value: &serde_json::Value) {
match value.get("type").and_then(serde_json::Value::as_str) {
Some("response.reasoning_summary_text.delta") if self.want_thinking => {
self.close_text_block();
self.ensure_thinking_block();
if let Some(delta) = value.get("delta").and_then(serde_json::Value::as_str) {
self.push_event(
"content_block_delta",
serde_json::json!({
"type": "content_block_delta",
"index": self.content_index,
"delta": {
"type": "thinking_delta",
"thinking": delta,
}
}),
);
self.has_content = true;
}
}
Some("response.output_text.delta") => {
self.close_thinking_block();
self.ensure_text_block();
if let Some(delta) = value.get("delta").and_then(serde_json::Value::as_str) {
self.push_event(
"content_block_delta",
serde_json::json!({
"type": "content_block_delta",
"index": self.content_index,
"delta": {
"type": "text_delta",
"text": delta,
}
}),
);
self.has_content = true;
}
}
Some("response.output_item.added") => {
if value
.get("item")
.and_then(|item| item.get("type"))
.and_then(serde_json::Value::as_str)
== Some("function_call")
{
let call_id = value
.get("item")
.and_then(|item| item.get("call_id"))
.and_then(serde_json::Value::as_str)
.unwrap_or("tool_call");
let name = value
.get("item")
.and_then(|item| item.get("name"))
.and_then(serde_json::Value::as_str)
.unwrap_or("tool");
self.start_tool_use_block(call_id, name);
}
}
Some("response.function_call_arguments.delta") => {
if let Some(delta) = value.get("delta").and_then(serde_json::Value::as_str) {
if let Some(active_tool_use) = self.active_tool_use.as_mut() {
active_tool_use.saw_delta = true;
active_tool_use.arguments.push_str(delta);
}
self.push_event(
"content_block_delta",
serde_json::json!({
"type": "content_block_delta",
"index": self.content_index,
"delta": {
"type": "input_json_delta",
"partial_json": delta,
}
}),
);
}
}
Some("response.function_call_arguments.done") => {
if let Some(active_tool_use) = self.active_tool_use.as_mut()
&& let Some(arguments) =
value.get("arguments").and_then(serde_json::Value::as_str)
&& !active_tool_use.saw_delta
{
active_tool_use.arguments = arguments.to_string();
}
}
Some("response.output_item.done") => {
if value
.get("item")
.and_then(|item| item.get("type"))
.and_then(serde_json::Value::as_str)
== Some("function_call")
{
if self.active_tool_use.is_none() {
let call_id = value
.get("item")
.and_then(|item| item.get("call_id"))
.and_then(serde_json::Value::as_str)
.unwrap_or("tool_call");
let name = value
.get("item")
.and_then(|item| item.get("name"))
.and_then(serde_json::Value::as_str)
.unwrap_or("tool");
self.start_tool_use_block(call_id, name);
}
let arguments = value
.get("item")
.and_then(|item| item.get("arguments"))
.and_then(serde_json::Value::as_str);
let name = value
.get("item")
.and_then(|item| item.get("name"))
.and_then(serde_json::Value::as_str);
let call_id = value
.get("item")
.and_then(|item| item.get("call_id"))
.and_then(serde_json::Value::as_str);
self.finish_active_tool_use(arguments, name, call_id);
}
}
Some("response.completed") => {
let (input_tokens, output_tokens, cached_tokens) =
runtime_anthropic_usage_from_value(value);
self.input_tokens = input_tokens;
self.output_tokens = output_tokens;
self.cached_tokens = cached_tokens;
self.finish_success();
}
Some("error" | "response.failed") => {
let message = value
.get("error")
.and_then(|error| error.get("message"))
.and_then(serde_json::Value::as_str)
.unwrap_or("Codex returned an error.");
self.finish_error(message);
}
_ => {}
}
}
fn process_upstream_event(&mut self) -> io::Result<()> {
if self.upstream_data_lines.is_empty() {
return Ok(());
}
let payload = self.upstream_data_lines.join("\n");
self.upstream_data_lines.clear();
let value = serde_json::from_str::<serde_json::Value>(&payload).map_err(|err| {
io::Error::new(
io::ErrorKind::InvalidData,
format!("failed to parse runtime Responses SSE payload: {err}"),
)
})?;
self.observe_upstream_event(&value);
Ok(())
}
fn observe_upstream_bytes(&mut self, chunk: &[u8]) -> io::Result<()> {
for byte in chunk {
self.upstream_line.push(*byte);
if *byte != b'\n' {
continue;
}
let line_text = String::from_utf8_lossy(&self.upstream_line);
let trimmed = line_text.trim_end_matches(['\r', '\n']);
if trimmed.is_empty() {
self.process_upstream_event()?;
self.upstream_line.clear();
if self.inner_finished {
break;
}
continue;
}
if let Some(payload) = trimmed.strip_prefix("data:") {
self.upstream_data_lines
.push(payload.trim_start().to_string());
}
self.upstream_line.clear();
}
Ok(())
}
}
impl Read for RuntimeAnthropicSseReader {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
loop {
let read = buf.len().min(self.pending.len());
if read > 0 {
for (index, byte) in self.pending.drain(..read).enumerate() {
buf[index] = byte;
}
return Ok(read);
}
if self.inner_finished {
return Ok(0);
}
let mut upstream_buffer = [0_u8; 8192];
let read = self.inner.read(&mut upstream_buffer)?;
if read == 0 {
self.finish_success();
continue;
}
self.observe_upstream_bytes(&upstream_buffer[..read])?;
}
}
}
fn translate_runtime_buffered_responses_reply_to_anthropic(
parts: RuntimeBufferedResponseParts,
request: &RuntimeAnthropicMessagesRequest,
) -> Result<RuntimeResponsesReply> {
if parts.status >= 400 {
return Ok(RuntimeResponsesReply::Buffered(
runtime_anthropic_error_from_upstream_parts(parts),
));
}
let content_type = runtime_buffered_response_content_type(&parts)
.unwrap_or_default()
.to_ascii_lowercase();
let looks_like_sse = content_type.contains("text/event-stream")
|| runtime_response_body_looks_like_sse(&parts.body);
if request.stream && looks_like_sse {
return Ok(RuntimeResponsesReply::Buffered(
runtime_anthropic_sse_response_parts_from_responses_sse_bytes(
&parts.body,
&request.requested_model,
request.want_thinking,
)?,
));
}
let response = if looks_like_sse {
runtime_anthropic_response_from_sse_bytes(
&parts.body,
&request.requested_model,
request.want_thinking,
)?
} else {
let value = serde_json::from_slice::<serde_json::Value>(&parts.body)
.context("failed to parse buffered Responses JSON body")?;
if value.get("error").is_some() {
return Ok(RuntimeResponsesReply::Buffered(
runtime_anthropic_error_from_upstream_parts(parts),
));
}
runtime_anthropic_response_from_json_value(
&value,
&request.requested_model,
request.want_thinking,
)
};
if request.stream {
return Ok(RuntimeResponsesReply::Buffered(
runtime_anthropic_sse_response_parts_from_message_value(response),
));
}
Ok(RuntimeResponsesReply::Buffered(
runtime_anthropic_json_response_parts(response),
))
}
fn translate_runtime_responses_reply_to_anthropic(
response: RuntimeResponsesReply,
request: &RuntimeAnthropicMessagesRequest,
) -> Result<RuntimeResponsesReply> {
if request.server_tools.needs_buffered_translation() {
let parts = match response {
RuntimeResponsesReply::Buffered(parts) => parts,
RuntimeResponsesReply::Streaming(response) => {
buffer_runtime_streaming_response_parts(response)?
}
};
return translate_runtime_buffered_responses_reply_to_anthropic(parts, request);
}
match response {
RuntimeResponsesReply::Buffered(parts) => {
translate_runtime_buffered_responses_reply_to_anthropic(parts, request)
}
RuntimeResponsesReply::Streaming(response) => {
if !request.stream {
let parts = buffer_runtime_streaming_response_parts(response)?;
return translate_runtime_buffered_responses_reply_to_anthropic(parts, request);
}
let mut headers = response.headers;
headers.retain(|(name, _)| !name.eq_ignore_ascii_case("content-type"));
headers.push(("Content-Type".to_string(), "text/event-stream".to_string()));
Ok(RuntimeResponsesReply::Streaming(RuntimeStreamingResponse {
status: response.status,
headers,
body: Box::new(RuntimeAnthropicSseReader::new(
response.body,
request.requested_model.clone(),
request.want_thinking,
)),
request_id: response.request_id,
profile_name: response.profile_name,
log_path: response.log_path,
shared: response.shared,
_inflight_guard: response._inflight_guard,
}))
}
}
}
fn parse_runtime_sse_payload(data_lines: &[String]) -> Option<serde_json::Value> {
if data_lines.is_empty() {
return None;
}
let payload = data_lines.join("\n");
serde_json::from_str::<serde_json::Value>(&payload).ok()
}
fn parse_runtime_sse_event(data_lines: &[String]) -> RuntimeParsedSseEvent {
let Some(value) = parse_runtime_sse_payload(data_lines) else {
return RuntimeParsedSseEvent::default();
};
RuntimeParsedSseEvent {
quota_blocked: extract_runtime_proxy_quota_message_from_value(&value).is_some(),
previous_response_not_found: extract_runtime_proxy_previous_response_message_from_value(
&value,
)
.is_some(),
response_ids: extract_runtime_response_ids_from_value(&value),
event_type: runtime_response_event_type_from_value(&value),
}
}
fn extract_runtime_response_ids_from_sse(data_lines: &[String]) -> Vec<String> {
parse_runtime_sse_payload(data_lines)
.map(|value| extract_runtime_response_ids_from_value(&value))
.unwrap_or_default()
}
fn extract_runtime_proxy_quota_message(body: &[u8]) -> Option<String> {
if let Ok(value) = serde_json::from_slice::<serde_json::Value>(body)
&& let Some(message) = extract_runtime_proxy_quota_message_from_value(&value)
{
return Some(message);
}
extract_runtime_proxy_quota_message_from_text(&String::from_utf8_lossy(body))
}
fn extract_runtime_proxy_quota_message_from_response_reply(
response: &RuntimeResponsesReply,
) -> Option<String> {
match response {
RuntimeResponsesReply::Buffered(parts) => extract_runtime_proxy_quota_message(&parts.body),
RuntimeResponsesReply::Streaming(_) => None,
}
}
fn extract_runtime_proxy_quota_message_from_websocket_payload(
payload: &RuntimeWebsocketErrorPayload,
) -> Option<String> {
match payload {
RuntimeWebsocketErrorPayload::Text(text) => {
extract_runtime_proxy_quota_message_from_text(text)
}
RuntimeWebsocketErrorPayload::Binary(bytes) => extract_runtime_proxy_quota_message(bytes),
RuntimeWebsocketErrorPayload::Empty => None,
}
}
fn extract_runtime_proxy_overload_message_from_websocket_payload(
payload: &RuntimeWebsocketErrorPayload,
) -> Option<String> {
match payload {
RuntimeWebsocketErrorPayload::Text(text) => {
if let Ok(value) = serde_json::from_str::<serde_json::Value>(text)
&& let Some(message) = extract_runtime_proxy_overload_message_from_value(&value)
{
return Some(message);
}
extract_runtime_proxy_overload_message_from_text(text)
}
RuntimeWebsocketErrorPayload::Binary(bytes) => {
if let Ok(value) = serde_json::from_slice::<serde_json::Value>(bytes)
&& let Some(message) = extract_runtime_proxy_overload_message_from_value(&value)
{
return Some(message);
}
extract_runtime_proxy_overload_message_from_text(&String::from_utf8_lossy(bytes))
}
RuntimeWebsocketErrorPayload::Empty => None,
}
}
fn extract_runtime_proxy_previous_response_message(body: &[u8]) -> Option<String> {
serde_json::from_slice::<serde_json::Value>(body)
.ok()
.and_then(|value| extract_runtime_proxy_previous_response_message_from_value(&value))
}
fn extract_runtime_proxy_overload_message(status: u16, body: &[u8]) -> Option<String> {
if let Ok(value) = serde_json::from_slice::<serde_json::Value>(body)
&& let Some(message) = extract_runtime_proxy_overload_message_from_value(&value)
{
return Some(message);
}
let body_text = String::from_utf8_lossy(body).trim().to_string();
if matches!(status, 429 | 500 | 502 | 503 | 504 | 529)
&& let Some(message) = extract_runtime_proxy_overload_message_from_text(&body_text)
{
return Some(message);
}
(status == 500).then(|| {
if body_text.is_empty() {
"Upstream Codex backend is currently experiencing high demand.".to_string()
} else {
body_text
}
})
}
fn extract_runtime_proxy_overload_message_from_value(value: &serde_json::Value) -> Option<String> {
let direct_error = value.get("error");
let response_error = value
.get("response")
.and_then(|response| response.get("error"));
for error in [direct_error, response_error].into_iter().flatten() {
let code = error.get("code").and_then(serde_json::Value::as_str);
let message = error
.get("message")
.and_then(serde_json::Value::as_str)
.or_else(|| error.get("detail").and_then(serde_json::Value::as_str));
if matches!(code, Some("server_is_overloaded" | "slow_down")) {
return Some(
message
.unwrap_or("Upstream Codex backend is currently overloaded.")
.to_string(),
);
}
if let Some(message) = message.filter(|message| runtime_proxy_overload_message(message)) {
return Some(message.to_string());
}
}
match value {
serde_json::Value::Array(values) => values
.iter()
.find_map(extract_runtime_proxy_overload_message_from_value),
serde_json::Value::Object(map) => map
.values()
.find_map(extract_runtime_proxy_overload_message_from_value),
_ => None,
}
}
fn extract_runtime_proxy_overload_message_from_text(text: &str) -> Option<String> {
let trimmed = text.trim();
if trimmed.is_empty() {
return None;
}
runtime_proxy_overload_message(trimmed).then(|| trimmed.to_string())
}
fn extract_runtime_proxy_quota_message_from_value(value: &serde_json::Value) -> Option<String> {
if let Some(message) = extract_runtime_proxy_quota_message_candidate(value) {
return Some(message);
}
match value {
serde_json::Value::Array(values) => values
.iter()
.find_map(extract_runtime_proxy_quota_message_from_value),
serde_json::Value::Object(map) => map
.values()
.find_map(extract_runtime_proxy_quota_message_from_value),
_ => None,
}
}
fn extract_runtime_proxy_quota_message_candidate(value: &serde_json::Value) -> Option<String> {
match value {
serde_json::Value::String(message) => {
runtime_proxy_usage_limit_message(message).then(|| message.to_string())
}
serde_json::Value::Object(map) => {
let message = map
.get("message")
.and_then(serde_json::Value::as_str)
.or_else(|| map.get("detail").and_then(serde_json::Value::as_str))
.or_else(|| map.get("error").and_then(serde_json::Value::as_str));
let code = map.get("code").and_then(serde_json::Value::as_str);
let error_type = map.get("type").and_then(serde_json::Value::as_str);
let code_matches = matches!(code, Some("insufficient_quota" | "rate_limit_exceeded"));
let type_matches = matches!(error_type, Some("usage_limit_reached"));
let message_matches = message.is_some_and(runtime_proxy_usage_limit_message);
if !(code_matches || type_matches || message_matches) {
return None;
}
Some(
message
.unwrap_or("Upstream Codex account quota was exhausted.")
.to_string(),
)
}
_ => None,
}
}
fn extract_runtime_proxy_quota_message_from_text(text: &str) -> Option<String> {
let trimmed = text.trim();
if trimmed.is_empty() {
return None;
}
let lower = trimmed.to_ascii_lowercase();
if runtime_proxy_usage_limit_message(trimmed)
|| lower.contains("usage_limit_reached")
|| lower.contains("insufficient_quota")
|| lower.contains("rate_limit_exceeded")
{
Some(trimmed.to_string())
} else {
None
}
}
fn runtime_proxy_usage_limit_message(message: &str) -> bool {
let lower = message.to_ascii_lowercase();
lower.contains("you've hit your usage limit")
|| lower.contains("you have hit your usage limit")
|| lower.contains("the usage limit has been reached")
|| lower.contains("usage limit has been reached")
|| lower.contains("usage limit")
&& (lower.contains("try again at")
|| lower.contains("request to your admin")
|| lower.contains("more access now"))
}
fn runtime_proxy_overload_message(message: &str) -> bool {
let lower = message.to_ascii_lowercase();
lower.contains("selected model is at capacity")
|| (lower.contains("model is at capacity")
&& (lower.contains("try a different model") || lower.contains("please try again")))
|| lower.contains("backend under high demand")
|| lower.contains("experiencing high demand")
|| lower.contains("server is overloaded")
|| lower.contains("currently overloaded")
}
fn runtime_proxy_body_snippet(body: &[u8], max_chars: usize) -> String {
let normalized = String::from_utf8_lossy(body)
.split_whitespace()
.collect::<Vec<_>>()
.join(" ");
if normalized.is_empty() {
return "-".to_string();
}
let snippet = normalized.chars().take(max_chars).collect::<String>();
if normalized.chars().count() > max_chars {
format!("{snippet}...")
} else {
snippet
}
}
fn extract_runtime_proxy_previous_response_message_from_value(
value: &serde_json::Value,
) -> Option<String> {
let direct_error = value.get("error");
let response_error = value
.get("response")
.and_then(|response| response.get("error"));
for error in [direct_error, response_error].into_iter().flatten() {
let code = error.get("code").and_then(serde_json::Value::as_str)?;
if code != "previous_response_not_found" {
continue;
}
return Some(
error
.get("message")
.and_then(serde_json::Value::as_str)
.unwrap_or("Previous response could not be found on the selected Codex account.")
.to_string(),
);
}
None
}
#[cfg(test)]
fn extract_runtime_response_ids_from_payload(payload: &str) -> Vec<String> {
serde_json::from_str::<serde_json::Value>(payload)
.ok()
.map(|value| extract_runtime_response_ids_from_value(&value))
.unwrap_or_default()
}
fn extract_runtime_response_ids_from_body_bytes(body: &[u8]) -> Vec<String> {
serde_json::from_slice::<serde_json::Value>(body)
.ok()
.map(|value| extract_runtime_response_ids_from_value(&value))
.unwrap_or_default()
}
fn push_runtime_response_id(response_ids: &mut Vec<String>, id: Option<&str>) {
if let Some(id) = id
&& !response_ids.iter().any(|existing| existing == id)
{
response_ids.push(id.to_string());
}
}
fn extract_runtime_response_ids_from_value(value: &serde_json::Value) -> Vec<String> {
let mut response_ids = Vec::new();
push_runtime_response_id(
&mut response_ids,
value
.get("response")
.and_then(|response| response.get("id"))
.and_then(serde_json::Value::as_str),
);
push_runtime_response_id(
&mut response_ids,
value.get("response_id").and_then(serde_json::Value::as_str),
);
if value
.get("object")
.and_then(serde_json::Value::as_str)
.is_some_and(|object| object == "response" || object.ends_with(".response"))
{
push_runtime_response_id(
&mut response_ids,
value.get("id").and_then(serde_json::Value::as_str),
);
}
response_ids
}
fn extract_runtime_turn_state_from_value(value: &serde_json::Value) -> Option<String> {
value
.get("response")
.and_then(|response| response.get("headers"))
.and_then(extract_runtime_turn_state_from_headers_value)
.or_else(|| {
value
.get("headers")
.and_then(extract_runtime_turn_state_from_headers_value)
})
}
fn extract_runtime_turn_state_from_headers_value(value: &serde_json::Value) -> Option<String> {
let headers = value.as_object()?;
headers.iter().find_map(|(name, value)| {
if name.eq_ignore_ascii_case("x-codex-turn-state") {
match value {
serde_json::Value::String(value) => Some(value.clone()),
serde_json::Value::Array(items) => items.iter().find_map(|item| match item {
serde_json::Value::String(value) => Some(value.clone()),
_ => None,
}),
_ => None,
}
} else {
None
}
})
}
fn run_child(
binary: &OsString,
args: &[OsString],
codex_home: &Path,
extra_env: &[(&str, OsString)],
removed_env: &[&str],
runtime_proxy: Option<&RuntimeProxyEndpoint>,
) -> Result<ExitStatus> {
let mut command = Command::new(binary);
command.args(args).env("CODEX_HOME", codex_home);
for key in removed_env {
command.env_remove(key);
}
for (key, value) in extra_env {
command.env(key, value);
}
let mut child = command
.spawn()
.with_context(|| format!("failed to execute {}", binary.to_string_lossy()))?;
let _child_runtime_broker_lease = match runtime_proxy {
Some(proxy) => match proxy.create_child_lease(child.id()) {
Ok(lease) => Some(lease),
Err(err) => {
let _ = child.kill();
let _ = child.wait();
return Err(err);
}
},
None => None,
};
let status = child
.wait()
.with_context(|| format!("failed to wait for {}", binary.to_string_lossy()))?;
Ok(status)
}
fn exit_with_status(status: ExitStatus) -> Result<()> {
std::process::exit(status.code().unwrap_or(1));
}
fn collect_run_profile_reports(
state: &AppState,
profile_names: Vec<String>,
base_url: Option<&str>,
) -> Vec<RunProfileProbeReport> {
let jobs = profile_names
.into_iter()
.enumerate()
.filter_map(|(order_index, name)| {
let profile = state.profiles.get(&name)?;
Some(RunProfileProbeJob {
name,
order_index,
codex_home: profile.codex_home.clone(),
})
})
.collect();
let base_url = base_url.map(str::to_owned);
map_parallel(jobs, |job| {
let auth = read_auth_summary(&job.codex_home);
let result = if auth.quota_compatible {
fetch_usage(&job.codex_home, base_url.as_deref()).map_err(|err| err.to_string())
} else {
Err("auth mode is not quota-compatible".to_string())
};
RunProfileProbeReport {
name: job.name,
order_index: job.order_index,
auth,
result,
}
})
}
fn probe_run_profile(
state: &AppState,
profile_name: &str,
order_index: usize,
base_url: Option<&str>,
) -> Result<RunProfileProbeReport> {
let profile = state
.profiles
.get(profile_name)
.with_context(|| format!("profile '{}' is missing", profile_name))?;
let auth = read_auth_summary(&profile.codex_home);
let result = if auth.quota_compatible {
fetch_usage(&profile.codex_home, base_url).map_err(|err| err.to_string())
} else {
Err("auth mode is not quota-compatible".to_string())
};
Ok(RunProfileProbeReport {
name: profile_name.to_string(),
order_index,
auth,
result,
})
}
fn run_profile_probe_is_ready(report: &RunProfileProbeReport, include_code_review: bool) -> bool {
match report.result.as_ref() {
Ok(usage) => collect_blocked_limits(usage, include_code_review).is_empty(),
Err(_) => false,
}
}
fn run_preflight_reports_with_current_first(
state: &AppState,
current_profile: &str,
current_report: RunProfileProbeReport,
base_url: Option<&str>,
) -> Vec<RunProfileProbeReport> {
let mut reports = Vec::with_capacity(state.profiles.len());
reports.push(current_report);
reports.extend(
collect_run_profile_reports(
state,
profile_rotation_order(state, current_profile),
base_url,
)
.into_iter()
.map(|mut report| {
report.order_index += 1;
report
}),
);
reports
}
fn ready_profile_candidates(
reports: &[RunProfileProbeReport],
include_code_review: bool,
preferred_profile: Option<&str>,
state: &AppState,
persisted_usage_snapshots: Option<&BTreeMap<String, RuntimeProfileUsageSnapshot>>,
) -> Vec<ReadyProfileCandidate> {
let candidates = reports
.iter()
.filter_map(|report| {
if !report.auth.quota_compatible {
return None;
}
let (usage, quota_source) = match report.result.as_ref() {
Ok(usage) => (usage.clone(), RuntimeQuotaSource::LiveProbe),
Err(_) => {
let snapshot = persisted_usage_snapshots
.and_then(|snapshots| snapshots.get(&report.name))?;
let now = Local::now().timestamp();
if !runtime_usage_snapshot_is_usable(snapshot, now) {
return None;
}
(
usage_from_runtime_usage_snapshot(snapshot),
RuntimeQuotaSource::PersistedSnapshot,
)
}
};
if !collect_blocked_limits(&usage, include_code_review).is_empty() {
return None;
}
Some(ReadyProfileCandidate {
name: report.name.clone(),
usage,
order_index: report.order_index,
preferred: preferred_profile == Some(report.name.as_str()),
quota_source,
})
})
.collect::<Vec<_>>();
schedule_ready_profile_candidates(candidates, state, preferred_profile)
}
fn schedule_ready_profile_candidates(
mut candidates: Vec<ReadyProfileCandidate>,
state: &AppState,
preferred_profile: Option<&str>,
) -> Vec<ReadyProfileCandidate> {
if candidates.len() <= 1 {
return candidates;
}
let now = Local::now().timestamp();
let best_total_pressure = candidates
.iter()
.map(|candidate| ready_profile_score(candidate).total_pressure)
.min()
.unwrap_or(i64::MAX);
candidates.sort_by_key(|candidate| {
ready_profile_runtime_sort_key(candidate, state, best_total_pressure, now)
});
if let Some(preferred_name) = preferred_profile {
if let Some(preferred_index) = candidates.iter().position(|candidate| {
candidate.name == preferred_name
&& !profile_in_run_selection_cooldown(state, &candidate.name, now)
}) {
let preferred_score = ready_profile_score(&candidates[preferred_index]).total_pressure;
let selected_score = ready_profile_score(&candidates[0]).total_pressure;
if preferred_index > 0
&& score_within_bps(
preferred_score,
selected_score,
RUN_SELECTION_HYSTERESIS_BPS,
)
{
let preferred_candidate = candidates.remove(preferred_index);
candidates.insert(0, preferred_candidate);
}
}
}
candidates
}
fn ready_profile_runtime_sort_key(
candidate: &ReadyProfileCandidate,
state: &AppState,
best_total_pressure: i64,
now: i64,
) -> (
usize,
usize,
i64,
(
i64,
i64,
i64,
Reverse<i64>,
Reverse<i64>,
Reverse<i64>,
i64,
i64,
usize,
usize,
usize,
),
) {
let score = ready_profile_score(candidate);
let near_optimal = score_within_bps(
score.total_pressure,
best_total_pressure,
RUN_SELECTION_NEAR_OPTIMAL_BPS,
);
let recently_used =
near_optimal && profile_in_run_selection_cooldown(state, &candidate.name, now);
let last_selected_at = if near_optimal {
state
.last_run_selected_at
.get(&candidate.name)
.copied()
.unwrap_or(i64::MIN)
} else {
i64::MIN
};
(
if near_optimal { 0usize } else { 1usize },
if recently_used { 1usize } else { 0usize },
last_selected_at,
ready_profile_sort_key(candidate),
)
}
fn ready_profile_sort_key(
candidate: &ReadyProfileCandidate,
) -> (
i64,
i64,
i64,
Reverse<i64>,
Reverse<i64>,
Reverse<i64>,
i64,
i64,
usize,
usize,
usize,
) {
let score = ready_profile_score(candidate);
(
score.total_pressure,
score.weekly_pressure,
score.five_hour_pressure,
Reverse(score.reserve_floor),
Reverse(score.weekly_remaining),
Reverse(score.five_hour_remaining),
score.weekly_reset_at,
score.five_hour_reset_at,
runtime_quota_source_sort_key(RuntimeRouteKind::Responses, candidate.quota_source),
if candidate.preferred { 0usize } else { 1usize },
candidate.order_index,
)
}
fn ready_profile_score(candidate: &ReadyProfileCandidate) -> ReadyProfileScore {
ready_profile_score_for_route(&candidate.usage, RuntimeRouteKind::Responses)
}
fn ready_profile_score_for_route(
usage: &UsageResponse,
route_kind: RuntimeRouteKind,
) -> ReadyProfileScore {
let weekly = required_main_window_snapshot(usage, "weekly");
let five_hour = required_main_window_snapshot(usage, "5h");
let weekly_pressure = weekly.map_or(i64::MAX, |window| window.pressure_score);
let five_hour_pressure = five_hour.map_or(i64::MAX, |window| window.pressure_score);
let weekly_remaining = weekly.map_or(0, |window| window.remaining_percent);
let five_hour_remaining = five_hour.map_or(0, |window| window.remaining_percent);
let weekly_weight = match route_kind {
RuntimeRouteKind::Responses | RuntimeRouteKind::Websocket => 10,
RuntimeRouteKind::Compact | RuntimeRouteKind::Standard => 8,
};
let reserve_bias = match runtime_quota_pressure_band_for_route(usage, route_kind) {
RuntimeQuotaPressureBand::Healthy => 0,
RuntimeQuotaPressureBand::Thin => 250_000,
RuntimeQuotaPressureBand::Critical => 1_000_000,
RuntimeQuotaPressureBand::Exhausted | RuntimeQuotaPressureBand::Unknown => i64::MAX / 4,
};
ReadyProfileScore {
total_pressure: reserve_bias
.saturating_add(weekly_pressure.saturating_mul(weekly_weight))
.saturating_add(five_hour_pressure),
weekly_pressure,
five_hour_pressure,
reserve_floor: weekly_remaining.min(five_hour_remaining),
weekly_remaining,
five_hour_remaining,
weekly_reset_at: weekly.map_or(i64::MAX, |window| window.reset_at),
five_hour_reset_at: five_hour.map_or(i64::MAX, |window| window.reset_at),
}
}
fn profile_in_run_selection_cooldown(state: &AppState, profile_name: &str, now: i64) -> bool {
let Some(last_selected_at) = state.last_run_selected_at.get(profile_name).copied() else {
return false;
};
now.saturating_sub(last_selected_at) < RUN_SELECTION_COOLDOWN_SECONDS
}
fn score_within_bps(candidate_score: i64, best_score: i64, bps: i64) -> bool {
if candidate_score <= best_score {
return true;
}
let lhs = i128::from(candidate_score).saturating_mul(10_000);
let rhs = i128::from(best_score).saturating_mul(i128::from(10_000 + bps));
lhs <= rhs
}
fn required_main_window_snapshot(usage: &UsageResponse, label: &str) -> Option<MainWindowSnapshot> {
let window = find_main_window(usage.rate_limit.as_ref()?, label)?;
let remaining_percent = remaining_percent(window.used_percent);
let reset_at = window.reset_at.unwrap_or(i64::MAX);
let seconds_until_reset = if reset_at == i64::MAX {
i64::MAX
} else {
(reset_at - Local::now().timestamp()).max(0)
};
let pressure_score = seconds_until_reset
.saturating_mul(1_000)
.checked_div(remaining_percent.max(1))
.unwrap_or(i64::MAX);
Some(MainWindowSnapshot {
remaining_percent,
reset_at,
pressure_score,
})
}
fn active_profile_selection_order(state: &AppState, current_profile: &str) -> Vec<String> {
std::iter::once(current_profile.to_string())
.chain(profile_rotation_order(state, current_profile))
.collect()
}
fn map_parallel<I, O, F>(inputs: Vec<I>, func: F) -> Vec<O>
where
I: Send,
O: Send,
F: Fn(I) -> O + Sync,
{
if inputs.len() <= 1 {
return inputs.into_iter().map(func).collect();
}
thread::scope(|scope| {
let func = &func;
let mut handles = Vec::with_capacity(inputs.len());
for input in inputs {
handles.push(scope.spawn(move || func(input)));
}
handles
.into_iter()
.map(|handle| handle.join().expect("parallel worker panicked"))
.collect()
})
}
fn find_ready_profiles(
state: &AppState,
current_profile: &str,
base_url: Option<&str>,
include_code_review: bool,
) -> Vec<String> {
ready_profile_candidates(
&collect_run_profile_reports(
state,
profile_rotation_order(state, current_profile),
base_url,
),
include_code_review,
None,
state,
None,
)
.into_iter()
.map(|candidate| candidate.name)
.collect()
}
fn profile_rotation_order(state: &AppState, current_profile: &str) -> Vec<String> {
let names: Vec<String> = state.profiles.keys().cloned().collect();
let Some(index) = names.iter().position(|name| name == current_profile) else {
return names
.into_iter()
.filter(|name| name != current_profile)
.collect();
};
names
.iter()
.skip(index + 1)
.chain(names.iter().take(index))
.cloned()
.collect()
}
fn is_review_invocation(args: &[OsString]) -> bool {
args.iter().any(|arg| arg == "review")
}
fn same_path(left: &Path, right: &Path) -> bool {
normalize_path_for_compare(left) == normalize_path_for_compare(right)
}
fn normalize_path_for_compare(path: &Path) -> PathBuf {
fs::canonicalize(path).unwrap_or_else(|_| path.to_path_buf())
}
fn absolutize(path: PathBuf) -> Result<PathBuf> {
if path.is_absolute() {
return Ok(path);
}
let current_dir = env::current_dir().context("failed to determine current directory")?;
Ok(current_dir.join(path))
}
fn legacy_default_codex_home() -> Result<PathBuf> {
Ok(home_dir()
.context("failed to determine home directory")?
.join(DEFAULT_CODEX_DIR))
}
fn prodex_default_shared_codex_root(root: &Path) -> PathBuf {
root.join(DEFAULT_CODEX_DIR)
}
fn resolve_shared_codex_root(root: &Path, path: PathBuf) -> PathBuf {
if path.is_absolute() {
path
} else {
root.join(path)
}
}
fn select_default_codex_home(
shared_codex_root: &Path,
legacy_codex_home: &Path,
override_active: bool,
) -> PathBuf {
if override_active || shared_codex_root.exists() || !legacy_codex_home.exists() {
shared_codex_root.to_path_buf()
} else {
legacy_codex_home.to_path_buf()
}
}
fn default_codex_home(paths: &AppPaths) -> Result<PathBuf> {
let legacy = legacy_default_codex_home()?;
Ok(select_default_codex_home(
&paths.shared_codex_root,
&legacy,
env::var_os("PRODEX_SHARED_CODEX_HOME").is_some(),
))
}
impl AppPaths {
fn discover() -> Result<Self> {
let root = match env::var_os("PRODEX_HOME") {
Some(path) => absolutize(PathBuf::from(path))?,
None => home_dir()
.context("failed to determine home directory")?
.join(DEFAULT_PRODEX_DIR),
};
Ok(Self {
state_file: root.join("state.json"),
managed_profiles_root: root.join("profiles"),
shared_codex_root: match env::var_os("PRODEX_SHARED_CODEX_HOME") {
Some(path) => resolve_shared_codex_root(&root, PathBuf::from(path)),
None => prodex_default_shared_codex_root(&root),
},
legacy_shared_codex_root: root.join("shared"),
root,
})
}
}
impl AppState {
fn load_with_recovery(paths: &AppPaths) -> Result<RecoveredLoad<Self>> {
cleanup_stale_login_dirs(paths);
if !paths.state_file.exists() && !state_last_good_file_path(paths).exists() {
return Ok(RecoveredLoad {
value: Self::default(),
recovered_from_backup: false,
});
}
let loaded = load_json_file_with_backup::<Self>(
&paths.state_file,
&state_last_good_file_path(paths),
)?;
Ok(RecoveredLoad {
value: compact_app_state(loaded.value, Local::now().timestamp()),
recovered_from_backup: loaded.recovered_from_backup,
})
}
fn load(paths: &AppPaths) -> Result<Self> {
Ok(Self::load_with_recovery(paths)?.value)
}
fn save(&self, paths: &AppPaths) -> Result<()> {
cleanup_stale_login_dirs(paths);
let _lock = acquire_state_file_lock(paths)?;
let existing = Self::load(paths)?;
let merged = compact_app_state(
merge_app_state_for_save(existing, self),
Local::now().timestamp(),
);
let json =
serde_json::to_string_pretty(&merged).context("failed to serialize prodex state")?;
write_state_json_atomic(paths, &json)?;
Ok(())
}
}
fn codex_bin() -> OsString {
env::var_os("PRODEX_CODEX_BIN").unwrap_or_else(|| OsString::from("codex"))
}
fn claude_bin() -> OsString {
env::var_os("PRODEX_CLAUDE_BIN").unwrap_or_else(|| OsString::from("claude"))
}
impl Drop for RuntimeBrokerLease {
fn drop(&mut self) {
let _ = fs::remove_file(&self.path);
}
}
impl RuntimeProxyEndpoint {
fn create_child_lease(&self, pid: u32) -> Result<RuntimeBrokerLease> {
create_runtime_broker_lease_in_dir_for_pid(&self.lease_dir, pid)
}
}
fn runtime_broker_key(upstream_base_url: &str, include_code_review: bool) -> String {
let mut hasher = DefaultHasher::new();
upstream_base_url.hash(&mut hasher);
include_code_review.hash(&mut hasher);
RUNTIME_PROXY_OPENAI_MOUNT_PATH.hash(&mut hasher);
format!("{:016x}", hasher.finish())
}
fn runtime_process_pid_alive(pid: u32) -> bool {
let proc_dir = PathBuf::from(format!("/proc/{pid}"));
if proc_dir.exists() {
return true;
}
collect_process_rows().into_iter().any(|row| row.pid == pid)
}
fn runtime_random_token(prefix: &str) -> String {
let nanos = SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_nanos();
let sequence = STATE_SAVE_SEQUENCE.fetch_add(1, Ordering::Relaxed);
format!("{prefix}-{}-{nanos:x}-{sequence:x}", std::process::id())
}
fn runtime_broker_startup_grace_seconds() -> i64 {
let ready_timeout_seconds = runtime_broker_ready_timeout_ms().div_ceil(1_000) as i64;
ready_timeout_seconds
.saturating_add(1)
.max(RUNTIME_BROKER_IDLE_GRACE_SECONDS)
}
fn load_runtime_broker_registry(
paths: &AppPaths,
broker_key: &str,
) -> Result<Option<RuntimeBrokerRegistry>> {
let path = runtime_broker_registry_file_path(paths, broker_key);
let backup_path = runtime_broker_registry_last_good_file_path(paths, broker_key);
if !path.exists() && !backup_path.exists() {
return Ok(None);
}
let loaded = load_json_file_with_backup::<RuntimeBrokerRegistry>(&path, &backup_path)?;
Ok(Some(loaded.value))
}
fn save_runtime_broker_registry(
paths: &AppPaths,
broker_key: &str,
registry: &RuntimeBrokerRegistry,
) -> Result<()> {
let path = runtime_broker_registry_file_path(paths, broker_key);
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)
.with_context(|| format!("failed to create {}", parent.display()))?;
}
let json = serde_json::to_string_pretty(registry)
.context("failed to serialize runtime broker registry")?;
write_json_file_with_backup(
&path,
&runtime_broker_registry_last_good_file_path(paths, broker_key),
&json,
|content| {
let _: RuntimeBrokerRegistry = serde_json::from_str(content)
.context("failed to validate runtime broker registry")?;
Ok(())
},
)
}
fn remove_runtime_broker_registry_if_token_matches(
paths: &AppPaths,
broker_key: &str,
instance_token: &str,
) {
let Ok(Some(existing)) = load_runtime_broker_registry(paths, broker_key) else {
return;
};
if existing.instance_token != instance_token {
return;
}
for path in [
runtime_broker_registry_file_path(paths, broker_key),
runtime_broker_registry_last_good_file_path(paths, broker_key),
] {
let _ = fs::remove_file(path);
}
}
fn runtime_broker_client() -> Result<Client> {
Client::builder()
.connect_timeout(Duration::from_millis(
runtime_broker_health_connect_timeout_ms(),
))
.timeout(Duration::from_millis(
runtime_broker_health_read_timeout_ms(),
))
.build()
.context("failed to build runtime broker control client")
}
fn runtime_broker_health_url(registry: &RuntimeBrokerRegistry) -> String {
format!("http://{}/__prodex/runtime/health", registry.listen_addr)
}
fn runtime_broker_metrics_url(registry: &RuntimeBrokerRegistry) -> String {
format!("http://{}/__prodex/runtime/metrics", registry.listen_addr)
}
fn runtime_broker_metrics_prometheus_url(registry: &RuntimeBrokerRegistry) -> String {
format!(
"http://{}/__prodex/runtime/metrics/prometheus",
registry.listen_addr
)
}
fn runtime_broker_activate_url(registry: &RuntimeBrokerRegistry) -> String {
format!("http://{}/__prodex/runtime/activate", registry.listen_addr)
}
fn legacy_runtime_proxy_openai_mount_path(version: &str) -> String {
format!("{LEGACY_RUNTIME_PROXY_OPENAI_MOUNT_PATH_PREFIX}{version}")
}
fn parse_prodex_version_output(output: &str) -> Option<String> {
let mut parts = output.split_whitespace();
let binary_name = parts.next()?;
let version = parts.next()?;
if binary_name == "prodex" && !version.is_empty() {
return Some(version.to_string());
}
None
}
fn read_prodex_version_from_executable(executable: &Path) -> Result<String> {
let output = Command::new(executable)
.arg("--version")
.stdin(Stdio::null())
.stderr(Stdio::null())
.output()
.with_context(|| format!("failed to run {} --version", executable.display()))?;
if !output.status.success() {
bail!(
"{} --version exited with status {}",
executable.display(),
output
.status
.code()
.map(|code| code.to_string())
.unwrap_or_else(|| "signal".to_string())
);
}
let stdout = String::from_utf8_lossy(&output.stdout);
parse_prodex_version_output(&stdout).with_context(|| {
format!(
"failed to parse prodex version output from {}",
executable.display()
)
})
}
fn runtime_process_executable_path(pid: u32) -> Option<PathBuf> {
collect_process_rows()
.into_iter()
.find(|row| row.pid == pid)
.and_then(|row| {
row.args
.into_iter()
.filter(|arg| Path::new(arg).exists())
.last()
})
.map(PathBuf::from)
.or_else(|| fs::read_link(format!("/proc/{pid}/exe")).ok())
}
fn runtime_broker_openai_mount_path(registry: &RuntimeBrokerRegistry) -> Result<String> {
if let Some(openai_mount_path) = registry.openai_mount_path.as_deref() {
return Ok(openai_mount_path.to_string());
}
let executable = runtime_process_executable_path(registry.pid).with_context(|| {
format!(
"failed to resolve executable for runtime broker pid {}",
registry.pid
)
})?;
let version = read_prodex_version_from_executable(&executable)?;
Ok(legacy_runtime_proxy_openai_mount_path(&version))
}
fn runtime_proxy_endpoint_from_registry(
paths: &AppPaths,
broker_key: &str,
registry: &RuntimeBrokerRegistry,
) -> Result<RuntimeProxyEndpoint> {
let lease = create_runtime_broker_lease(paths, broker_key)?;
let lease_dir = runtime_broker_lease_dir(paths, broker_key);
let listen_addr = registry.listen_addr.parse().with_context(|| {
format!(
"invalid runtime broker listen address {}",
registry.listen_addr
)
})?;
Ok(RuntimeProxyEndpoint {
listen_addr,
openai_mount_path: runtime_broker_openai_mount_path(registry)?,
lease_dir,
_lease: Some(lease),
})
}
fn runtime_broker_process_args(
current_profile: &str,
upstream_base_url: &str,
include_code_review: bool,
broker_key: &str,
instance_token: &str,
admin_token: &str,
listen_addr: Option<&str>,
) -> Vec<OsString> {
let mut args = vec![
OsString::from("__runtime-broker"),
OsString::from("--current-profile"),
OsString::from(current_profile),
OsString::from("--upstream-base-url"),
OsString::from(upstream_base_url),
];
if include_code_review {
args.push(OsString::from("--include-code-review"));
}
args.extend([
OsString::from("--broker-key"),
OsString::from(broker_key),
OsString::from("--instance-token"),
OsString::from(instance_token),
OsString::from("--admin-token"),
OsString::from(admin_token),
]);
if let Some(listen_addr) = listen_addr {
args.push(OsString::from("--listen-addr"));
args.push(OsString::from(listen_addr));
}
args
}
fn probe_runtime_broker_health(
client: &Client,
registry: &RuntimeBrokerRegistry,
) -> Result<Option<RuntimeBrokerHealth>> {
let response = match client
.get(runtime_broker_health_url(registry))
.header("X-Prodex-Admin-Token", ®istry.admin_token)
.send()
{
Ok(response) => response,
Err(_) => return Ok(None),
};
if !response.status().is_success() {
return Ok(None);
}
let health = response
.json::<RuntimeBrokerHealth>()
.context("failed to decode runtime broker health response")?;
Ok(Some(health))
}
fn probe_runtime_broker_metrics(
client: &Client,
registry: &RuntimeBrokerRegistry,
) -> Result<Option<RuntimeBrokerMetrics>> {
let response = match client
.get(runtime_broker_metrics_url(registry))
.header("X-Prodex-Admin-Token", ®istry.admin_token)
.send()
{
Ok(response) => response,
Err(_) => return Ok(None),
};
if !response.status().is_success() {
return Ok(None);
}
let metrics = response
.json::<RuntimeBrokerMetrics>()
.context("failed to decode runtime broker metrics response")?;
Ok(Some(metrics))
}
fn collect_live_runtime_broker_observations(paths: &AppPaths) -> Vec<RuntimeBrokerObservation> {
let Ok(client) = runtime_broker_client() else {
return Vec::new();
};
let mut observations = Vec::new();
for broker_key in runtime_broker_registry_keys(paths) {
let Ok(Some(registry)) = load_runtime_broker_registry(paths, &broker_key) else {
continue;
};
if !runtime_process_pid_alive(registry.pid) {
continue;
}
let Ok(Some(metrics)) = probe_runtime_broker_metrics(&client, ®istry) else {
continue;
};
observations.push(RuntimeBrokerObservation {
broker_key,
listen_addr: registry.listen_addr,
metrics,
});
}
observations
}
fn collect_runtime_broker_metrics_targets(paths: &AppPaths) -> Vec<String> {
let mut targets = Vec::new();
for broker_key in runtime_broker_registry_keys(paths) {
let Ok(Some(registry)) = load_runtime_broker_registry(paths, &broker_key) else {
continue;
};
if !runtime_process_pid_alive(registry.pid) {
continue;
}
targets.push(runtime_broker_metrics_prometheus_url(®istry));
}
targets
}
fn format_runtime_broker_metrics_targets(targets: &[String]) -> String {
match targets {
[] => "-".to_string(),
[target] => target.clone(),
[first, rest @ ..] => format!("{first} (+{} more)", rest.len()),
}
}
fn activate_runtime_broker_profile(
client: &Client,
registry: &RuntimeBrokerRegistry,
current_profile: &str,
) -> Result<()> {
let response = client
.post(runtime_broker_activate_url(registry))
.header("X-Prodex-Admin-Token", ®istry.admin_token)
.json(&serde_json::json!({
"current_profile": current_profile,
}))
.send()
.context("failed to send runtime broker activation request")?;
if !response.status().is_success() {
let status = response.status();
let body = response.text().unwrap_or_default();
bail!(
"runtime broker activation failed with HTTP {}{}",
status,
if body.is_empty() {
String::new()
} else {
format!(": {body}")
}
);
}
Ok(())
}
fn create_runtime_broker_lease(paths: &AppPaths, broker_key: &str) -> Result<RuntimeBrokerLease> {
let lease_dir = runtime_broker_lease_dir(paths, broker_key);
create_runtime_broker_lease_in_dir_for_pid(&lease_dir, std::process::id())
}
fn create_runtime_broker_lease_in_dir_for_pid(
lease_dir: &Path,
pid: u32,
) -> Result<RuntimeBrokerLease> {
fs::create_dir_all(lease_dir)
.with_context(|| format!("failed to create {}", lease_dir.display()))?;
let path = lease_dir.join(format!("{}-{}.lease", pid, runtime_random_token("lease")));
fs::write(&path, format!("pid={pid}\n"))
.with_context(|| format!("failed to write {}", path.display()))?;
Ok(RuntimeBrokerLease { path })
}
fn cleanup_runtime_broker_stale_leases(paths: &AppPaths, broker_key: &str) -> usize {
let lease_dir = runtime_broker_lease_dir(paths, broker_key);
let Ok(entries) = fs::read_dir(&lease_dir) else {
return 0;
};
let mut live = 0usize;
for entry in entries.flatten() {
let path = entry.path();
let Some(file_name) = path.file_name().and_then(|name| name.to_str()) else {
continue;
};
let pid = file_name
.split('-')
.next()
.and_then(|value| value.parse::<u32>().ok());
if pid.is_some_and(runtime_process_pid_alive) {
live += 1;
} else {
let _ = fs::remove_file(path);
}
}
live
}
fn wait_for_existing_runtime_broker_recovery_or_exit(
client: &Client,
paths: &AppPaths,
broker_key: &str,
upstream_base_url: &str,
include_code_review: bool,
) -> Result<Option<RuntimeBrokerRegistry>> {
let started_at = Instant::now();
let poll_interval = Duration::from_millis(RUNTIME_BROKER_POLL_INTERVAL_MS);
while started_at.elapsed() < Duration::from_millis(runtime_broker_ready_timeout_ms()) {
let Some(existing) = load_runtime_broker_registry(paths, broker_key)? else {
return Ok(None);
};
if existing.upstream_base_url == upstream_base_url
&& existing.include_code_review == include_code_review
&& let Some(health) = probe_runtime_broker_health(client, &existing)?
&& health.instance_token == existing.instance_token
{
return Ok(Some(existing));
}
if !runtime_process_pid_alive(existing.pid) {
remove_runtime_broker_registry_if_token_matches(
paths,
broker_key,
&existing.instance_token,
);
return Ok(None);
}
thread::sleep(poll_interval);
}
Ok(None)
}
fn find_compatible_runtime_broker_registry(
client: &Client,
paths: &AppPaths,
excluded_broker_key: &str,
upstream_base_url: &str,
include_code_review: bool,
) -> Result<Option<(String, RuntimeBrokerRegistry)>> {
for broker_key in runtime_broker_registry_keys(paths) {
if broker_key == excluded_broker_key {
continue;
}
let Some(registry) = load_runtime_broker_registry(paths, &broker_key)? else {
continue;
};
if registry.upstream_base_url != upstream_base_url
|| registry.include_code_review != include_code_review
{
continue;
}
if !runtime_process_pid_alive(registry.pid) {
remove_runtime_broker_registry_if_token_matches(
paths,
&broker_key,
®istry.instance_token,
);
continue;
}
if let Some(health) = probe_runtime_broker_health(client, ®istry)?
&& health.instance_token == registry.instance_token
{
return Ok(Some((broker_key, registry)));
}
}
Ok(None)
}
fn wait_for_runtime_broker_ready(
client: &Client,
paths: &AppPaths,
broker_key: &str,
expected_instance_token: &str,
) -> Result<RuntimeBrokerRegistry> {
let started_at = Instant::now();
let poll_interval = Duration::from_millis(RUNTIME_BROKER_POLL_INTERVAL_MS);
while started_at.elapsed() < Duration::from_millis(runtime_broker_ready_timeout_ms()) {
if let Some(registry) = load_runtime_broker_registry(paths, broker_key)? {
if registry.instance_token == expected_instance_token
&& let Some(health) = probe_runtime_broker_health(client, ®istry)?
&& health.instance_token == expected_instance_token
{
return Ok(registry);
}
}
thread::sleep(poll_interval);
}
bail!("timed out waiting for runtime broker readiness");
}
fn spawn_runtime_broker_process(
paths: &AppPaths,
current_profile: &str,
upstream_base_url: &str,
include_code_review: bool,
broker_key: &str,
instance_token: &str,
admin_token: &str,
listen_addr: Option<&str>,
) -> Result<()> {
let current_exe = env::current_exe().context("failed to locate current prodex binary")?;
Command::new(current_exe)
.args(runtime_broker_process_args(
current_profile,
upstream_base_url,
include_code_review,
broker_key,
instance_token,
admin_token,
listen_addr,
))
.env("PRODEX_HOME", &paths.root)
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::null())
.spawn()
.context("failed to spawn runtime broker process")?;
Ok(())
}
fn preferred_runtime_broker_listen_addr(
paths: &AppPaths,
broker_key: &str,
) -> Result<Option<String>> {
Ok(
load_runtime_broker_registry(paths, broker_key)?.and_then(|registry| {
(!runtime_process_pid_alive(registry.pid)).then_some(registry.listen_addr)
}),
)
}
fn ensure_runtime_rotation_proxy_endpoint(
paths: &AppPaths,
current_profile: &str,
upstream_base_url: &str,
include_code_review: bool,
) -> Result<RuntimeProxyEndpoint> {
let broker_key = runtime_broker_key(upstream_base_url, include_code_review);
let ensure_lock_path = runtime_broker_ensure_lock_path(paths, &broker_key);
let _ensure_lock = acquire_json_file_lock(&ensure_lock_path)?;
let preferred_listen_addr = preferred_runtime_broker_listen_addr(paths, &broker_key)?;
let broker_client = runtime_broker_client()?;
if let Some(existing) = wait_for_existing_runtime_broker_recovery_or_exit(
&broker_client,
paths,
&broker_key,
upstream_base_url,
include_code_review,
)? {
activate_runtime_broker_profile(&broker_client, &existing, current_profile)?;
return runtime_proxy_endpoint_from_registry(paths, &broker_key, &existing);
}
if let Some(existing) = load_runtime_broker_registry(paths, &broker_key)? {
if !runtime_process_pid_alive(existing.pid) {
remove_runtime_broker_registry_if_token_matches(
paths,
&broker_key,
&existing.instance_token,
);
} else if existing.upstream_base_url == upstream_base_url
&& existing.include_code_review == include_code_review
&& let Some(health) = probe_runtime_broker_health(&broker_client, &existing)?
&& health.instance_token == existing.instance_token
{
activate_runtime_broker_profile(&broker_client, &existing, current_profile)?;
return runtime_proxy_endpoint_from_registry(paths, &broker_key, &existing);
}
}
if let Some((existing_broker_key, existing)) = find_compatible_runtime_broker_registry(
&broker_client,
paths,
&broker_key,
upstream_base_url,
include_code_review,
)? {
activate_runtime_broker_profile(&broker_client, &existing, current_profile)?;
return runtime_proxy_endpoint_from_registry(paths, &existing_broker_key, &existing);
}
let instance_token = runtime_random_token("broker");
let admin_token = runtime_random_token("admin");
spawn_runtime_broker_process(
paths,
current_profile,
upstream_base_url,
include_code_review,
&broker_key,
&instance_token,
&admin_token,
preferred_listen_addr.as_deref(),
)?;
let registry =
wait_for_runtime_broker_ready(&broker_client, paths, &broker_key, &instance_token)?;
activate_runtime_broker_profile(&broker_client, ®istry, current_profile)?;
runtime_proxy_endpoint_from_registry(paths, &broker_key, ®istry)
}
#[cfg(test)]
#[path = "../tests/support/main_internal_harness.rs"]
mod main_internal_tests;