1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
use std::sync::Arc;
use std::time::Duration;
use secrecy::SecretString;
use crate::auth::CredentialProvider;
#[cfg(any(feature = "native-http", feature = "wasm-http"))]
use crate::error::{LiterLlmError, Result};
#[cfg(feature = "tower")]
use crate::tower::{BudgetConfig, CacheConfig, CacheStore, LlmHook, RateLimitConfig};
/// Configuration for an LLM client.
///
/// `api_key` is stored as a [`SecretString`] so it is zeroed on drop and never
/// printed accidentally. Access it via [`secrecy::ExposeSecret`].
#[derive(Clone)]
pub struct ClientConfig {
/// API key for authentication (stored as a secret).
pub api_key: SecretString,
/// Override base URL. When set, all requests go here regardless of model
/// name, and provider auto-detection is skipped.
pub base_url: Option<String>,
/// Request timeout.
pub timeout: Duration,
/// Maximum number of retries on 429 / 5xx responses.
pub max_retries: u32,
/// Extra headers sent on every request.
///
/// Use `Vec<(String, String)>` rather than `HashMap` to preserve insertion
/// order and avoid non-deterministic iteration when building the reqwest
/// `HeaderMap`. Access via [`ClientConfig::headers`]; do not mutate
/// directly from outside this crate.
pub(crate) extra_headers: Vec<(String, String)>,
/// Optional dynamic credential provider for token-based auth
/// (Azure AD, Vertex OAuth2) or refreshable credentials (AWS STS).
///
/// When set, the client calls `resolve()` before each request to obtain
/// a fresh credential. When `None`, the static `api_key` is used.
pub credential_provider: Option<Arc<dyn CredentialProvider>>,
/// Configuration for the response cache Tower middleware layer.
///
/// When set, bindings and advanced Rust users can use this to construct
/// a [`CacheLayer`](crate::tower::CacheLayer) in their Tower stack.
#[cfg(feature = "tower")]
pub cache_config: Option<CacheConfig>,
/// Custom cache store backend for the cache Tower middleware layer.
///
/// When set alongside `cache_config`, the cache layer will use this
/// store instead of the default in-memory LRU.
#[cfg(feature = "tower")]
pub cache_store: Option<Arc<dyn CacheStore>>,
/// Configuration for the budget enforcement Tower middleware layer.
///
/// When set, bindings and advanced Rust users can use this to construct
/// a [`BudgetLayer`](crate::tower::BudgetLayer) in their Tower stack.
#[cfg(feature = "tower")]
pub budget_config: Option<BudgetConfig>,
/// User-defined hooks for the hooks Tower middleware layer.
///
/// These hooks are invoked at request lifecycle points (pre-request,
/// post-response, on-error) when a
/// [`HooksLayer`](crate::tower::HooksLayer) is constructed from this
/// config.
#[cfg(feature = "tower")]
pub hooks: Vec<Arc<dyn LlmHook>>,
/// Cooldown duration after transient errors (rate limit, timeout, server error).
/// When set, the client rejects requests with `ServiceUnavailable` during cooldown.
#[cfg(feature = "tower")]
pub cooldown_duration: Option<Duration>,
/// Per-model rate limiting configuration (RPM/TPM).
#[cfg(feature = "tower")]
pub rate_limit_config: Option<RateLimitConfig>,
/// Background health check interval. When set, periodically probes the provider
/// and rejects requests when the provider is unhealthy.
#[cfg(feature = "tower")]
pub health_check_interval: Option<Duration>,
/// Enable per-request cost tracking. Costs are accumulated atomically and
/// logged via `tracing::info`.
#[cfg(feature = "tower")]
pub enable_cost_tracking: bool,
/// Enable OpenTelemetry-compatible tracing spans for every request.
#[cfg(feature = "tower")]
pub enable_tracing: bool,
/// Automatically load the API key from the provider's environment variable
/// when no explicit key is provided.
///
/// When `true` (the default) and `api_key` is empty, [`DefaultClient::new`]
/// reads the provider's designated environment variable (e.g.
/// `OPENAI_API_KEY` for OpenAI). Set to `false` to suppress this behaviour
/// and require the caller to supply the key explicitly.
///
/// Has no effect on WASM targets, where `std::env::var` is unavailable.
pub load_env: bool,
}
impl ClientConfig {
/// Create a config with the given API key and sensible defaults.
pub fn new(api_key: impl Into<String>) -> Self {
Self {
api_key: SecretString::from(api_key.into()),
base_url: None,
timeout: Duration::from_secs(60),
max_retries: 3,
extra_headers: Vec::new(),
credential_provider: None,
load_env: true,
#[cfg(feature = "tower")]
cache_config: None,
#[cfg(feature = "tower")]
cache_store: None,
#[cfg(feature = "tower")]
budget_config: None,
#[cfg(feature = "tower")]
hooks: Vec::new(),
#[cfg(feature = "tower")]
cooldown_duration: None,
#[cfg(feature = "tower")]
rate_limit_config: None,
#[cfg(feature = "tower")]
health_check_interval: None,
#[cfg(feature = "tower")]
enable_cost_tracking: false,
#[cfg(feature = "tower")]
enable_tracing: false,
}
}
/// Return the extra headers as an ordered slice of `(name, value)` pairs.
pub fn headers(&self) -> &[(String, String)] {
&self.extra_headers
}
}
/// Note: intentionally does *not* implement `Debug` so the secret key is never
/// accidentally logged via `{:?}`.
impl std::fmt::Debug for ClientConfig {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
// Redact all header values — they may contain API keys or secrets.
let redacted_headers: Vec<(&str, &str)> = self
.extra_headers
.iter()
.map(|(k, _v)| (k.as_str(), "[redacted]"))
.collect();
let mut dbg = f.debug_struct("ClientConfig");
dbg.field("api_key", &"[redacted]")
.field("base_url", &self.base_url)
.field("timeout", &self.timeout)
.field("max_retries", &self.max_retries)
.field("extra_headers", &redacted_headers)
.field("load_env", &self.load_env)
.field(
"credential_provider",
&self.credential_provider.as_ref().map(|_| "[configured]"),
);
#[cfg(feature = "tower")]
{
dbg.field("cache_config", &self.cache_config)
.field("cache_store", &self.cache_store.as_ref().map(|_| "[configured]"))
.field("budget_config", &self.budget_config)
.field("hooks_count", &self.hooks.len())
.field("cooldown_duration", &self.cooldown_duration)
.field("rate_limit_config", &self.rate_limit_config)
.field("health_check_interval", &self.health_check_interval)
.field("enable_cost_tracking", &self.enable_cost_tracking)
.field("enable_tracing", &self.enable_tracing);
}
dbg.finish()
}
}
/// Builder for [`ClientConfig`].
///
/// Construct with [`ClientConfigBuilder::new`] and call builder methods to
/// customise the configuration, then call [`ClientConfigBuilder::build`] to
/// obtain a [`ClientConfig`].
#[must_use]
pub struct ClientConfigBuilder {
pub(crate) config: ClientConfig,
}
impl ClientConfigBuilder {
/// Create a new builder with the given API key and sensible defaults.
pub fn new(api_key: impl Into<String>) -> Self {
Self {
config: ClientConfig::new(api_key),
}
}
/// Create a builder with no explicit API key.
///
/// `load_env` is `true` by default, so the key will be read from the
/// provider's environment variable (e.g. `OPENAI_API_KEY`) at client
/// construction time. Call `.load_env(false)` to opt out.
pub fn from_env() -> Self {
Self {
config: ClientConfig::new(""),
}
}
/// Enable or disable automatic API key loading from environment variables.
///
/// When `true` (the default) and no explicit `api_key` was provided,
/// [`DefaultClient::new`] reads the provider's designated environment
/// variable. Set to `false` to require an explicit key.
///
/// Has no effect on WASM targets.
pub fn load_env(mut self, enabled: bool) -> Self {
self.config.load_env = enabled;
self
}
/// Override the provider base URL for all requests.
pub fn base_url(mut self, url: impl Into<String>) -> Self {
self.config.base_url = Some(url.into());
self
}
/// Set the per-request timeout (default: 60 s).
pub fn timeout(mut self, timeout: Duration) -> Self {
self.config.timeout = timeout;
self
}
/// Set the maximum number of retries on 429 / 5xx responses (default: 3).
pub fn max_retries(mut self, retries: u32) -> Self {
self.config.max_retries = retries;
self
}
/// Set a dynamic credential provider for token-based or refreshable auth.
///
/// When configured, the client calls `resolve()` before each request
/// instead of using the static `api_key` for authentication.
pub fn credential_provider(mut self, provider: Arc<dyn CredentialProvider>) -> Self {
self.config.credential_provider = Some(provider);
self
}
/// Add a custom header sent on every request.
///
/// Returns an error if either `key` or `value` is not a valid HTTP header
/// name / value.
///
/// This method is only available when the `native-http` feature is enabled
/// because header validation relies on `reqwest`'s header types.
#[cfg(any(feature = "native-http", feature = "wasm-http"))]
pub fn header(mut self, key: impl Into<String>, value: impl Into<String>) -> Result<Self> {
let key = key.into();
let value = value.into();
// Validate header name.
reqwest::header::HeaderName::from_bytes(key.as_bytes()).map_err(|e| LiterLlmError::InvalidHeader {
name: key.clone(),
reason: e.to_string(),
})?;
// Validate header value.
reqwest::header::HeaderValue::from_str(&value).map_err(|e| LiterLlmError::InvalidHeader {
name: key.clone(),
reason: e.to_string(),
})?;
self.config.extra_headers.push((key, value));
Ok(self)
}
/// Set the response cache configuration for the Tower middleware stack.
///
/// When set, bindings and advanced Rust users can read this from the
/// built [`ClientConfig`] to construct a
/// [`CacheLayer`](crate::tower::CacheLayer).
#[cfg(feature = "tower")]
pub fn cache(mut self, config: CacheConfig) -> Self {
self.config.cache_config = Some(config);
self
}
/// Set a custom cache store backend for the Tower cache middleware.
///
/// When set alongside [`cache`](Self::cache), the cache layer will use
/// this store instead of the default in-memory LRU.
#[cfg(feature = "tower")]
pub fn cache_store(mut self, store: Arc<dyn CacheStore>) -> Self {
self.config.cache_store = Some(store);
self
}
/// Set the budget enforcement configuration for the Tower middleware stack.
///
/// When set, bindings and advanced Rust users can read this from the
/// built [`ClientConfig`] to construct a
/// [`BudgetLayer`](crate::tower::BudgetLayer).
#[cfg(feature = "tower")]
pub fn budget(mut self, config: BudgetConfig) -> Self {
self.config.budget_config = Some(config);
self
}
/// Add a single hook to the Tower hooks middleware stack.
///
/// Hooks are invoked sequentially in registration order at request
/// lifecycle points (pre-request, post-response, on-error).
#[cfg(feature = "tower")]
pub fn hook(mut self, hook: Arc<dyn LlmHook>) -> Self {
self.config.hooks.push(hook);
self
}
/// Set the full list of hooks for the Tower hooks middleware stack,
/// replacing any previously registered hooks.
///
/// Hooks are invoked sequentially in registration order.
#[cfg(feature = "tower")]
pub fn hooks(mut self, hooks: Vec<Arc<dyn LlmHook>>) -> Self {
self.config.hooks = hooks;
self
}
/// Set the cooldown duration after transient errors.
///
/// When set, the client rejects requests with `ServiceUnavailable` for
/// the given duration after a transient error (rate limit, timeout,
/// server error).
#[cfg(feature = "tower")]
pub fn cooldown(mut self, duration: Duration) -> Self {
self.config.cooldown_duration = Some(duration);
self
}
/// Set per-model rate limiting configuration.
///
/// When set, requests exceeding the configured RPM or TPM limits are
/// rejected with [`LiterLlmError::RateLimited`](crate::error::LiterLlmError::RateLimited).
#[cfg(feature = "tower")]
pub fn rate_limit(mut self, config: RateLimitConfig) -> Self {
self.config.rate_limit_config = Some(config);
self
}
/// Set the background health check interval.
///
/// When set, the client periodically probes the provider and rejects
/// requests when the provider is unhealthy.
#[cfg(feature = "tower")]
pub fn health_check(mut self, interval: Duration) -> Self {
self.config.health_check_interval = Some(interval);
self
}
/// Enable or disable per-request cost tracking.
///
/// When enabled, estimated USD cost is recorded on the current tracing
/// span as `gen_ai.usage.cost`.
#[cfg(feature = "tower")]
pub fn cost_tracking(mut self, enabled: bool) -> Self {
self.config.enable_cost_tracking = enabled;
self
}
/// Enable or disable OpenTelemetry-compatible tracing spans.
///
/// When enabled, every request is wrapped in a `gen_ai` tracing span
/// with semantic convention attributes.
#[cfg(feature = "tower")]
pub fn tracing(mut self, enabled: bool) -> Self {
self.config.enable_tracing = enabled;
self
}
/// Consume the builder and return the completed [`ClientConfig`].
#[must_use]
pub fn build(self) -> ClientConfig {
self.config
}
}