pub mod error;
pub mod openai_compatible;
pub mod rate_limiter;
pub mod simple_model;
pub mod simulated;
pub mod span;
pub mod strategies;
pub mod stub;
pub use error::LlmError;
pub use openai_compatible::OpenAICompatibleModel;
pub use rate_limiter::RateLimiter;
pub use simple_model::SimpleOpenAIModel;
pub use span::LlmRequestSpan;
pub use stub::StubModel;
use crate::agents::AgentConfig;
use async_openai::types::{
ChatCompletionRequestMessage, ChatCompletionTool, ChatCompletionToolChoiceOption,
CreateChatCompletionResponse,
};
use async_trait::async_trait;
use dyn_clone::DynClone;
use std::fmt::Debug;
#[derive(Debug, Clone)]
pub struct RequestConfig {
pub messages: Vec<ChatCompletionRequestMessage>,
pub tools: Option<Vec<ChatCompletionTool>>,
pub tool_choice: Option<ChatCompletionToolChoiceOption>,
pub presence_penalty: Option<f32>,
}
pub struct ChatCompletionResult {
pub response: CreateChatCompletionResponse,
pub raw_request: String,
pub timing: TimingMetadata,
pub provider_backend: Option<String>,
pub shrink_info: Option<ShrinkInfo>,
}
pub struct TimingMetadata {
pub ttft_ms: Option<u64>,
pub generation_ms: Option<u64>,
}
#[derive(Debug, Clone)]
pub struct ShrinkInfo {
pub floor_used: bool,
pub available_space: u32,
pub requested_max: u32,
pub floor: u32,
pub estimated_input: u32,
pub context_window: u32,
}
#[async_trait]
pub trait AiModel: Send + Sync + DynClone + Debug {
async fn chat_completion(
&self,
agent: &AgentConfig,
request_config: RequestConfig,
) -> Result<ChatCompletionResult, LlmError>;
}
dyn_clone::clone_trait_object!(AiModel);
#[derive(Debug, Clone, Default)]
pub struct RequestOverrides {
pub max_tokens: Option<u32>,
}
#[async_trait]
pub trait ChatStrategy: Send + Sync {
async fn prepare_request(
&self,
agent: &AgentConfig,
request: &RequestConfig,
overrides: &RequestOverrides,
) -> Result<serde_json::Value, LlmError>;
async fn parse_response(
&self,
response_body: &str,
) -> Result<CreateChatCompletionResponse, LlmError>;
fn endpoint_suffix(&self) -> &str {
"/chat/completions"
}
fn supports_streaming(&self) -> bool {
true
}
}
#[cfg(test)]
mod tests {
use super::*;
#[derive(Debug)]
struct DefaultStrategy;
#[async_trait]
impl ChatStrategy for DefaultStrategy {
async fn prepare_request(
&self,
_agent: &AgentConfig,
_request: &RequestConfig,
_overrides: &RequestOverrides,
) -> Result<serde_json::Value, LlmError> {
Ok(serde_json::json!({}))
}
async fn parse_response(
&self,
_response_body: &str,
) -> Result<CreateChatCompletionResponse, LlmError> {
Err(LlmError::Other("not implemented".into()))
}
}
#[test]
fn chat_strategy_default_endpoint_suffix() {
let strategy = DefaultStrategy;
assert_eq!(strategy.endpoint_suffix(), "/chat/completions");
}
#[test]
fn chat_strategy_default_supports_streaming() {
let strategy = DefaultStrategy;
assert!(strategy.supports_streaming());
}
#[test]
fn request_overrides_default() {
let overrides = RequestOverrides::default();
assert!(overrides.max_tokens.is_none());
}
}