use std::time::Duration;
use reqwest::blocking::Client as HttpClient;
use crate::config::ResolvedAuth;
use crate::inference::redact;
use super::error::ChatError;
use super::types::{
ChatCompletionOutput, ChatCompletionRequest, ChatCompletionResponse, ChatMessage,
ReasoningEffort,
};
pub const DEFAULT_CHAT_TIMEOUT: Duration = Duration::from_secs(30);
#[derive(Debug, Clone)]
pub struct ChatClient {
base_url: String,
model: String,
max_tokens: Option<u32>,
reasoning_effort: Option<ReasoningEffort>,
chat_template_kwargs: Option<serde_json::Value>,
auth: ResolvedAuth,
http: HttpClient,
}
impl ChatClient {
pub fn new(base_url: impl Into<String>, model: impl Into<String>) -> Result<Self, ChatError> {
Self::with_timeout(base_url, model, DEFAULT_CHAT_TIMEOUT)
}
pub fn with_timeout(
base_url: impl Into<String>,
model: impl Into<String>,
timeout: Duration,
) -> Result<Self, ChatError> {
Self::with_timeout_and_max_tokens(base_url, model, timeout, None)
}
pub fn with_max_tokens(
base_url: impl Into<String>,
model: impl Into<String>,
max_tokens: Option<u32>,
) -> Result<Self, ChatError> {
Self::with_timeout_and_max_tokens(base_url, model, DEFAULT_CHAT_TIMEOUT, max_tokens)
}
pub fn with_timeout_and_max_tokens(
base_url: impl Into<String>,
model: impl Into<String>,
timeout: Duration,
max_tokens: Option<u32>,
) -> Result<Self, ChatError> {
Self::with_timeout_max_tokens_and_auth(
base_url,
model,
timeout,
max_tokens,
ResolvedAuth::default(),
)
}
pub fn with_timeout_max_tokens_and_auth(
base_url: impl Into<String>,
model: impl Into<String>,
timeout: Duration,
max_tokens: Option<u32>,
auth: ResolvedAuth,
) -> Result<Self, ChatError> {
Self::with_optional_timeout_max_tokens_and_auth(
base_url,
model,
Some(timeout),
max_tokens,
auth,
)
}
pub fn with_no_timeout_and_max_tokens(
base_url: impl Into<String>,
model: impl Into<String>,
max_tokens: Option<u32>,
) -> Result<Self, ChatError> {
Self::with_optional_timeout_max_tokens_and_auth(
base_url,
model,
None,
max_tokens,
ResolvedAuth::default(),
)
}
fn with_optional_timeout_max_tokens_and_auth(
base_url: impl Into<String>,
model: impl Into<String>,
timeout: Option<Duration>,
max_tokens: Option<u32>,
auth: ResolvedAuth,
) -> Result<Self, ChatError> {
let mut builder = HttpClient::builder();
if let Some(timeout) = timeout {
builder = builder.timeout(timeout);
}
let http = builder.build().map_err(|err| ChatError::Build {
message: redact(&err.to_string()),
})?;
Ok(Self {
base_url: base_url.into(),
model: model.into(),
max_tokens,
reasoning_effort: None,
chat_template_kwargs: None,
auth,
http,
})
}
#[must_use]
pub const fn with_reasoning_effort(mut self, effort: ReasoningEffort) -> Self {
self.reasoning_effort = Some(effort);
self
}
#[must_use]
pub fn with_chat_template_kwargs(mut self, value: serde_json::Value) -> Self {
self.chat_template_kwargs = Some(value);
self
}
#[must_use]
pub fn model(&self) -> &str {
&self.model
}
#[must_use]
pub fn base_url(&self) -> &str {
&self.base_url
}
pub fn complete(
&self,
messages: Vec<ChatMessage>,
temperature: f32,
) -> Result<String, ChatError> {
self.complete_raw(messages, temperature)
.map(|output| output.content)
}
pub fn complete_raw(
&self,
messages: Vec<ChatMessage>,
temperature: f32,
) -> Result<ChatCompletionOutput, ChatError> {
let url = format!("{}/chat/completions", self.base_url.trim_end_matches('/'));
let body = ChatCompletionRequest {
model: self.model.clone(),
messages,
max_tokens: self.max_tokens,
reasoning_effort: self.reasoning_effort,
temperature,
chat_template_kwargs: self.chat_template_kwargs.clone(),
};
let mut request = self.http.post(&url).json(&body);
if let Some(key) = &self.auth.api_key {
request = request.bearer_auth(key);
}
for (name, value) in &self.auth.extra_headers {
request = request.header(name.as_str(), value.as_str());
}
let response = request.send().map_err(|err| ChatError::Http {
status: None,
message: redact(&err.to_string()),
timed_out: err.is_timeout(),
})?;
let status = response.status();
if !status.is_success() {
let snippet = response.text().unwrap_or_default();
return Err(ChatError::Http {
status: Some(status.as_u16()),
message: redact(&snippet),
timed_out: false,
});
}
let text = response.text().map_err(|_| ChatError::MalformedResponse)?;
let completion: ChatCompletionResponse =
serde_json::from_str(&text).map_err(|_| ChatError::MalformedResponse)?;
let message = completion
.choices
.first()
.map(|choice| &choice.message)
.ok_or(ChatError::MalformedResponse)?;
let content = message
.content
.clone()
.filter(|content| !content.trim().is_empty())
.ok_or(ChatError::MalformedResponse)?;
Ok(ChatCompletionOutput {
content,
reasoning_content: message.reasoning_content.clone(),
raw_response: text,
})
}
}
#[must_use]
pub fn strip_code_fences(content: &str) -> String {
let stripped = content
.trim()
.trim_start_matches("```json")
.trim_start_matches("```")
.trim_end_matches("```")
.trim();
match (stripped.find('{'), stripped.rfind('}')) {
(Some(start), Some(end)) if end > start => stripped[start..=end].to_owned(),
_ => stripped.to_owned(),
}
}
#[cfg(test)]
mod tests;