use std::sync::Arc;
use std::sync::Mutex;
use crate::product::otel::OtelManager;
use crate::product::protocol::ThreadId;
use crate::product::protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
use crate::product::protocol::models::TranscriptItem;
use crate::product::protocol::openai_models::ModelInfo;
use crate::product::protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
use crate::product::protocol::protocol::SessionSource;
use lha_llm::RuntimeBuildSpec;
use lha_llm::RuntimeCapabilities;
use lha_llm::RuntimeClient;
use lha_llm::RuntimeClientFactory;
use lha_llm::RuntimeSession;
use lha_llm::TurnRequest;
pub use lha_llm::WEB_SEARCH_ELIGIBLE_HEADER;
use crate::product::agent::AuthManager;
use crate::product::agent::config::Config;
use crate::product::agent::default_client::ReqwestClientOptions;
use crate::product::agent::default_client::build_reqwest_client_with_options;
use crate::product::agent::dynamic_context_window::DynamicContextWindowFailure;
use crate::product::agent::dynamic_context_window::DynamicContextWindowState;
use crate::product::agent::dynamic_context_window::DynamicContextWindowSuccess;
use crate::product::agent::error::Result;
use crate::product::agent::features::Feature;
use crate::product::agent::input_slimming::InputSlimmingWireApi;
use crate::product::agent::models_manager::manager::ModelsManager;
use lha_llm::RuntimeEndpoint;
struct TurnRuntimeState {
config: Arc<Config>,
auth_manager: Option<Arc<AuthManager>>,
#[cfg_attr(not(test), allow(dead_code))]
runtime_factory: Arc<dyn RuntimeClientFactory>,
#[cfg_attr(not(test), allow(dead_code))]
conversation_id: ThreadId,
model_info: ModelInfo,
dynamic_context_window: Option<Arc<Mutex<DynamicContextWindowState>>>,
otel_manager: OtelManager,
endpoint: RuntimeEndpoint,
effort: Option<ReasoningEffortConfig>,
summary: ReasoningSummaryConfig,
session_source: SessionSource,
runtime: Arc<dyn RuntimeClient>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct DynamicContextWindowStatus {
pub(crate) current_context_window: i64,
pub(crate) locked: bool,
}
#[derive(Clone)]
pub(crate) struct TurnRuntime {
state: Arc<TurnRuntimeState>,
}
#[allow(clippy::too_many_arguments)]
impl TurnRuntime {
pub(crate) fn new_with_dynamic_context_window(
config: Arc<Config>,
auth_manager: Option<Arc<AuthManager>>,
runtime_factory: Arc<dyn RuntimeClientFactory>,
model_info: ModelInfo,
dynamic_context_window: Option<Arc<Mutex<DynamicContextWindowState>>>,
otel_manager: OtelManager,
endpoint: RuntimeEndpoint,
effort: Option<ReasoningEffortConfig>,
summary: ReasoningSummaryConfig,
conversation_id: ThreadId,
session_source: SessionSource,
) -> Self {
let mut endpoint = endpoint;
if !config
.features
.enabled(crate::product::agent::features::Feature::ResponsesWebsockets)
{
endpoint.set_realtime_turn_streaming_enabled(false);
}
let http_client_options = ReqwestClientOptions {
http1_only: config.features.enabled(Feature::ForceHttp1Streaming),
};
let runtime = runtime_factory.build_client(RuntimeBuildSpec {
endpoint_id: config.model_provider_id.clone(),
http_client: build_reqwest_client_with_options(http_client_options),
model_info: model_info.clone().into(),
telemetry: Arc::new(otel_manager.clone()),
endpoint: endpoint.clone(),
effort,
summary,
session_id: conversation_id.to_string(),
origin_tag: Some(session_source_to_origin_tag(&session_source)),
show_raw_agent_reasoning: config.show_raw_agent_reasoning,
model_verbosity: config.model_verbosity,
web_search_mode: config.web_search_mode,
experimental_beta_feature_keys: crate::product::agent::features::FEATURES
.iter()
.filter_map(|spec| {
if spec.stage.experimental_menu_description().is_some()
&& config.features.enabled(spec.id)
{
Some(spec.key.to_string())
} else {
None
}
})
.collect(),
sse_fixture_path: (*crate::product::agent::flags::CODEX_RS_SSE_FIXTURE)
.map(str::to_string),
});
Self {
state: Arc::new(TurnRuntimeState {
config,
auth_manager,
runtime_factory,
conversation_id,
model_info,
dynamic_context_window,
otel_manager,
endpoint,
effort,
summary,
session_source,
runtime,
}),
}
}
pub(crate) fn new_session(&self) -> Box<dyn RuntimeSession> {
self.state.runtime.new_session()
}
fn effective_model_info(&self) -> ModelInfo {
let mut model_info = self.state.model_info.clone();
if let Some(dynamic_context_window) = &self.state.dynamic_context_window
&& model_info.context_window.is_none()
{
let dynamic_context_window = dynamic_context_window
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
model_info.context_window = Some(dynamic_context_window.current_context_window());
}
model_info
}
pub fn get_model_context_window(&self) -> Option<i64> {
let model_info = self.effective_model_info();
let effective_context_window_percent = model_info.effective_context_window_percent;
model_info.context_window.map(|context_window| {
context_window.saturating_mul(effective_context_window_percent) / 100
})
}
pub fn config(&self) -> Arc<Config> {
Arc::clone(&self.state.config)
}
pub fn endpoint(&self) -> RuntimeEndpoint {
self.state.endpoint.clone()
}
pub fn get_otel_manager(&self) -> OtelManager {
self.state.otel_manager.clone()
}
pub(crate) fn auth_manager(&self) -> Option<Arc<AuthManager>> {
self.state.auth_manager.clone()
}
pub(crate) fn endpoint_name(&self) -> &str {
self.state.endpoint.name.as_str()
}
pub fn get_session_source(&self) -> SessionSource {
self.state.session_source.clone()
}
pub fn get_model(&self) -> String {
self.state.model_info.slug.clone()
}
pub fn get_model_info(&self) -> ModelInfo {
self.effective_model_info()
}
pub(crate) fn input_slimming_wire_api(&self) -> InputSlimmingWireApi {
if self.state.endpoint.uses_responses_api() {
InputSlimmingWireApi::Responses
} else if self.state.endpoint.uses_chat_completions_api() {
InputSlimmingWireApi::Chat
} else {
debug_assert!(self.state.endpoint.uses_messages_api());
InputSlimmingWireApi::Messages
}
}
pub fn get_reasoning_effort(&self) -> Option<ReasoningEffortConfig> {
self.state.effort
}
pub fn get_reasoning_summary(&self) -> ReasoningSummaryConfig {
self.state.summary
}
pub fn runtime_capabilities(&self) -> RuntimeCapabilities {
self.state.runtime.capabilities()
}
#[allow(clippy::too_many_arguments)]
#[cfg_attr(not(test), allow(dead_code))]
pub(crate) fn derive_runtime(
&self,
config: Arc<Config>,
model_info: ModelInfo,
dynamic_context_window: Option<Arc<Mutex<DynamicContextWindowState>>>,
otel_manager: OtelManager,
effort: Option<ReasoningEffortConfig>,
summary: ReasoningSummaryConfig,
session_source: SessionSource,
) -> Self {
Self::new_with_dynamic_context_window(
config,
self.state.auth_manager.clone(),
Arc::clone(&self.state.runtime_factory),
model_info,
dynamic_context_window,
otel_manager,
self.state.endpoint.clone(),
effort,
summary,
self.state.conversation_id,
session_source,
)
}
pub(crate) async fn derive_runtime_for_model(
&self,
models_manager: &ModelsManager,
model: &str,
) -> Self {
let config = self.config();
let model_info = models_manager.get_model_info(model, &config).await;
self.derive_runtime(
config,
model_info,
self.dynamic_context_window(),
self.get_otel_manager(),
self.get_reasoning_effort(),
self.get_reasoning_summary(),
self.get_session_source(),
)
}
pub(crate) fn record_dynamic_context_window_success(
&self,
input_tokens: i64,
) -> Option<DynamicContextWindowSuccess> {
let effective_context_window_percent =
self.state.model_info.effective_context_window_percent;
self.state
.dynamic_context_window
.as_ref()
.and_then(|window| {
window
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner)
.record_success(input_tokens, effective_context_window_percent)
})
}
pub(crate) fn should_defer_auto_compact_until_after_dynamic_probe(&self) -> bool {
self.state
.dynamic_context_window
.as_ref()
.is_some_and(|window| {
!window
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner)
.is_locked()
})
}
pub(crate) fn dynamic_context_window_auto_compact_limit(&self) -> Option<i64> {
self.state
.dynamic_context_window
.as_ref()
.and_then(|_| self.get_model_context_window())
}
pub(crate) fn dynamic_context_window_status(&self) -> Option<DynamicContextWindowStatus> {
self.state.dynamic_context_window.as_ref().map(|window| {
let window = window
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner);
DynamicContextWindowStatus {
current_context_window: window.current_context_window(),
locked: window.is_locked(),
}
})
}
pub(crate) fn should_preflight_dynamic_context_window_compact(
&self,
input_tokens: i64,
) -> bool {
let effective_context_window_percent =
self.state.model_info.effective_context_window_percent;
self.state
.dynamic_context_window
.as_ref()
.is_some_and(|window| {
window
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner)
.should_preflight_compact(input_tokens, effective_context_window_percent)
})
}
pub(crate) fn record_dynamic_context_window_probe_failure(
&self,
turn_id: &str,
input_tokens: i64,
) -> DynamicContextWindowFailure {
let effective_context_window_percent =
self.state.model_info.effective_context_window_percent;
self.state
.dynamic_context_window
.as_ref()
.map(|window| {
window
.lock()
.unwrap_or_else(std::sync::PoisonError::into_inner)
.record_probe_failure(turn_id, input_tokens, effective_context_window_percent)
})
.unwrap_or(DynamicContextWindowFailure {
should_retry: false,
learned_context_window: None,
})
}
pub(crate) fn dynamic_context_window(&self) -> Option<Arc<Mutex<DynamicContextWindowState>>> {
self.state.dynamic_context_window.clone()
}
pub(crate) fn estimated_input_tokens_for_turn_request(
&self,
request: &TurnRequest,
) -> Option<i64> {
self.state.runtime.estimated_input_tokens(request)
}
pub async fn compact_turn_request(&self, request: &TurnRequest) -> Result<Vec<TranscriptItem>> {
self.state
.runtime
.compact_conversation_history(request)
.await
.map_err(Into::into)
}
}
fn session_source_to_origin_tag(session_source: &SessionSource) -> String {
match session_source {
SessionSource::Cli => "cli".to_string(),
SessionSource::VSCode => "vscode".to_string(),
SessionSource::Exec => "exec".to_string(),
SessionSource::Mcp => "mcp".to_string(),
SessionSource::Agent => "agent".to_string(),
SessionSource::Unknown => "unknown".to_string(),
}
}
impl std::fmt::Debug for TurnRuntime {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("TurnRuntime")
.field("model", &self.state.model_info.slug)
.field("endpoint", &self.state.endpoint.name)
.finish()
}
}