1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
use crate::providers::ollama::config::OllamaParams;
use crate::retry_api::RetryOptions;
use crate::{CommonParams, HttpConfig, LlmBuilder, LlmError};
/// Ollama-specific builder
///
/// Retry: call `.with_retry(RetryOptions::backoff())` to enable unified retry
/// for chat operations.
pub struct OllamaBuilder {
pub(crate) base: LlmBuilder,
base_url: Option<String>,
model: Option<String>,
common_params: CommonParams,
ollama_params: OllamaParams,
http_config: HttpConfig,
tracing_config: Option<crate::tracing::TracingConfig>,
retry_options: Option<RetryOptions>,
}
impl OllamaBuilder {
/// Create a new Ollama builder
pub fn new(base: LlmBuilder) -> Self {
Self {
base,
base_url: None,
model: None,
common_params: CommonParams::default(),
ollama_params: OllamaParams::default(),
http_config: HttpConfig::default(),
tracing_config: None,
retry_options: None,
}
}
/// Set the base URL for Ollama API
///
/// # Arguments
/// * `url` - The base URL (e.g., "<http://localhost:11434>")
pub fn base_url<S: Into<String>>(mut self, url: S) -> Self {
self.base_url = Some(url.into());
self
}
/// Set the model to use
///
/// # Arguments
/// * `model` - The model name (e.g., "llama3.2", "mistral:7b")
pub fn model<S: Into<String>>(mut self, model: S) -> Self {
self.model = Some(model.into());
self
}
/// Set the temperature for generation
///
/// # Arguments
/// * `temperature` - Temperature value (0.0 to 2.0)
pub const fn temperature(mut self, temperature: f32) -> Self {
self.common_params.temperature = Some(temperature);
self
}
/// Set the maximum number of tokens to generate
///
/// # Arguments
/// * `max_tokens` - Maximum tokens to generate
pub const fn max_tokens(mut self, max_tokens: u32) -> Self {
self.common_params.max_tokens = Some(max_tokens);
self
}
/// Set the top-p value for nucleus sampling
///
/// # Arguments
/// * `top_p` - Top-p value (0.0 to 1.0)
pub const fn top_p(mut self, top_p: f32) -> Self {
self.common_params.top_p = Some(top_p);
self
}
/// Set how long to keep the model loaded in memory
///
/// # Arguments
/// * `duration` - Duration string (e.g., "5m", "1h", "30s")
pub fn keep_alive<S: Into<String>>(mut self, duration: S) -> Self {
self.ollama_params.keep_alive = Some(duration.into());
self
}
/// Enable or disable raw mode (bypass templating)
///
/// # Arguments
/// * `raw` - Whether to enable raw mode
pub const fn raw(mut self, raw: bool) -> Self {
self.ollama_params.raw = Some(raw);
self
}
/// Set the output format
///
/// # Arguments
/// * `format` - Format string ("json" or JSON schema)
pub fn format<S: Into<String>>(mut self, format: S) -> Self {
self.ollama_params.format = Some(format.into());
self
}
/// Add a model option
///
/// # Arguments
/// * `key` - Option key
/// * `value` - Option value
pub fn option<K: Into<String>>(mut self, key: K, value: serde_json::Value) -> Self {
let mut options = self.ollama_params.options.unwrap_or_default();
options.insert(key.into(), value);
self.ollama_params.options = Some(options);
self
}
/// Set multiple model options at once
///
/// # Arguments
/// * `options` - `HashMap` of options
pub fn options(
mut self,
options: std::collections::HashMap<String, serde_json::Value>,
) -> Self {
self.ollama_params.options = Some(options);
self
}
/// Enable or disable NUMA support
///
/// # Arguments
/// * `numa` - Whether to enable NUMA support
pub const fn numa(mut self, numa: bool) -> Self {
self.ollama_params.numa = Some(numa);
self
}
/// Set the context window size
///
/// # Arguments
/// * `num_ctx` - Context window size
pub const fn num_ctx(mut self, num_ctx: u32) -> Self {
self.ollama_params.num_ctx = Some(num_ctx);
self
}
/// Set the number of GPU layers to use
///
/// # Arguments
/// * `num_gpu` - Number of GPU layers
pub const fn num_gpu(mut self, num_gpu: u32) -> Self {
self.ollama_params.num_gpu = Some(num_gpu);
self
}
/// Set the batch size for processing
///
/// # Arguments
/// * `num_batch` - Batch size
pub const fn num_batch(mut self, num_batch: u32) -> Self {
self.ollama_params.num_batch = Some(num_batch);
self
}
/// Set the main GPU to use
///
/// # Arguments
/// * `main_gpu` - Main GPU index
pub const fn main_gpu(mut self, main_gpu: u32) -> Self {
self.ollama_params.main_gpu = Some(main_gpu);
self
}
/// Enable or disable memory mapping
///
/// # Arguments
/// * `use_mmap` - Whether to use memory mapping
pub const fn use_mmap(mut self, use_mmap: bool) -> Self {
self.ollama_params.use_mmap = Some(use_mmap);
self
}
/// Set the number of threads to use
///
/// # Arguments
/// * `num_thread` - Number of threads
pub const fn num_thread(mut self, num_thread: u32) -> Self {
self.ollama_params.num_thread = Some(num_thread);
self
}
/// Enable reasoning mode for reasoning models
///
/// # Arguments
/// * `enabled` - Whether to enable reasoning mode
pub const fn reasoning(mut self, enabled: bool) -> Self {
self.ollama_params.think = Some(enabled);
self
}
/// Enable thinking mode for thinking models (alias for reasoning)
///
/// # Arguments
/// * `think` - Whether to enable thinking mode
///
/// # Deprecated
/// Use `reasoning()` instead for consistency with other providers
#[deprecated(since = "0.7.1", note = "Use `reasoning()` instead for consistency")]
pub const fn think(self, think: bool) -> Self {
self.reasoning(think)
}
// === Tracing Configuration ===
/// Set custom tracing configuration
pub fn tracing(mut self, config: crate::tracing::TracingConfig) -> Self {
self.tracing_config = Some(config);
self
}
/// Enable debug tracing (development-friendly configuration)
pub fn debug_tracing(self) -> Self {
self.tracing(crate::tracing::TracingConfig::development())
}
/// Enable minimal tracing (info level, LLM only)
pub fn minimal_tracing(self) -> Self {
self.tracing(crate::tracing::TracingConfig::minimal())
}
/// Enable production-ready JSON tracing
pub fn json_tracing(self) -> Self {
self.tracing(crate::tracing::TracingConfig::json_production())
}
/// Enable pretty-printed formatting for JSON bodies and headers in tracing
pub fn pretty_json(mut self, pretty: bool) -> Self {
let config = self
.tracing_config
.take()
.unwrap_or_else(crate::tracing::TracingConfig::development)
.with_pretty_json(pretty);
self.tracing_config = Some(config);
self
}
/// Control masking of sensitive values (API keys, tokens) in tracing logs
pub fn mask_sensitive_values(mut self, mask: bool) -> Self {
let config = self
.tracing_config
.take()
.unwrap_or_else(crate::tracing::TracingConfig::development)
.with_mask_sensitive_values(mask);
self.tracing_config = Some(config);
self
}
/// Set unified retry options for chat operations
pub fn with_retry(mut self, options: RetryOptions) -> Self {
self.retry_options = Some(options);
self
}
/// Build the Ollama client
pub async fn build(self) -> Result<crate::providers::ollama::OllamaClient, LlmError> {
let base_url = self
.base_url
.unwrap_or_else(|| "http://localhost:11434".to_string());
// Initialize tracing if configured
let _tracing_guard = if let Some(ref tracing_config) = self.tracing_config {
crate::tracing::init_tracing(tracing_config.clone())?
} else {
None
};
let mut config = crate::providers::ollama::OllamaConfig::builder()
.base_url(base_url)
.common_params(self.common_params)
.http_config(self.http_config)
.ollama_params(self.ollama_params);
if let Some(model) = self.model {
config = config.model(model);
}
let config = config.build()?;
let http_client = self.base.build_http_client()?;
let mut client = crate::providers::ollama::OllamaClient::new(config, http_client);
client.set_tracing_guard(_tracing_guard);
client.set_tracing_config(self.tracing_config);
client.set_retry_options(self.retry_options.clone());
Ok(client)
}
}