1use zeph_llm::any::AnyProvider;
5
6#[derive(Debug, thiserror::Error)]
13pub enum BootstrapError {
14 #[error("config error: {0}")]
15 Config(#[from] crate::config::ConfigError),
16 #[error("provider error: {0}")]
17 Provider(String),
18 #[error("memory error: {0}")]
19 Memory(String),
20 #[error("vault init error: {0}")]
21 VaultInit(crate::vault::AgeVaultError),
22 #[error("I/O error: {0}")]
23 Io(#[from] std::io::Error),
24}
25use zeph_llm::claude::ClaudeProvider;
26use zeph_llm::compatible::CompatibleProvider;
27use zeph_llm::gemini::GeminiProvider;
28use zeph_llm::http::llm_client;
29use zeph_llm::ollama::OllamaProvider;
30use zeph_llm::openai::OpenAiProvider;
31use zeph_llm::router::cascade::ClassifierMode;
32use zeph_llm::router::{CascadeRouterConfig, RouterProvider};
33
34use crate::config::{Config, ProviderKind};
35
36pub fn create_provider(config: &Config) -> Result<AnyProvider, BootstrapError> {
37 match config.llm.provider {
38 ProviderKind::Ollama | ProviderKind::Claude => {
39 create_named_provider(config.llm.provider.as_str(), config)
40 }
41 ProviderKind::OpenAi => create_named_provider("openai", config),
42 ProviderKind::Gemini => create_named_provider("gemini", config),
43 ProviderKind::Compatible => create_named_provider("compatible", config),
44 #[cfg(feature = "candle")]
45 ProviderKind::Candle => {
46 let candle_cfg = config.llm.candle.as_ref().ok_or_else(|| {
47 BootstrapError::Provider(
48 "llm.candle config section required for candle provider".into(),
49 )
50 })?;
51 let source = match candle_cfg.source.as_str() {
52 "local" => zeph_llm::candle_provider::loader::ModelSource::Local {
53 path: std::path::PathBuf::from(&candle_cfg.local_path),
54 },
55 _ => zeph_llm::candle_provider::loader::ModelSource::HuggingFace {
56 repo_id: config.llm.model.clone(),
57 filename: candle_cfg.filename.clone(),
58 },
59 };
60 build_candle_provider(source, candle_cfg, &candle_cfg.device)
61 }
62 ProviderKind::Orchestrator => {
63 let orch = build_orchestrator(config)?;
64 Ok(AnyProvider::Orchestrator(Box::new(orch)))
65 }
66 ProviderKind::Router => {
67 let router_cfg = config.llm.router.as_ref().ok_or_else(|| {
68 BootstrapError::Provider(
69 "llm.router config section required for router provider".into(),
70 )
71 })?;
72
73 let mut providers = Vec::new();
74 for name in &router_cfg.chain {
75 match create_named_provider(name, config) {
76 Ok(p) => providers.push(p),
77 Err(e) => {
78 tracing::warn!(
79 provider = name.as_str(),
80 error = %e,
81 "skipping router chain provider (will initialize on demand if needed)"
82 );
83 }
84 }
85 }
86 if providers.is_empty() {
87 return Err(BootstrapError::Provider(format!(
88 "router chain is empty: none of [{}] could be initialized",
89 router_cfg.chain.join(", ")
90 )));
91 }
92 let router = if router_cfg.strategy == crate::config::RouterStrategyConfig::Thompson {
93 let state_path = router_cfg
94 .thompson_state_path
95 .as_deref()
96 .map(std::path::Path::new);
97 RouterProvider::new(providers).with_thompson(state_path)
98 } else if router_cfg.strategy == crate::config::RouterStrategyConfig::Cascade {
99 let cascade_cfg = router_cfg.cascade.clone().unwrap_or_default();
100 let router_cascade_cfg = build_cascade_router_config(&cascade_cfg, config);
101 RouterProvider::new(providers).with_cascade(router_cascade_cfg)
102 } else if config.llm.router_ema_enabled {
103 let raw_alpha = config.llm.router_ema_alpha;
104 let alpha = raw_alpha.clamp(f64::MIN_POSITIVE, 1.0);
105 if (alpha - raw_alpha).abs() > f64::EPSILON {
106 tracing::warn!(
107 raw_alpha,
108 clamped = alpha,
109 "router_ema_alpha out of range [MIN_POSITIVE, 1.0], clamped"
110 );
111 }
112 RouterProvider::new(providers).with_ema(alpha, config.llm.router_reorder_interval)
113 } else {
114 RouterProvider::new(providers)
115 };
116 Ok(AnyProvider::Router(Box::new(router)))
117 }
118 #[cfg(not(feature = "candle"))]
119 ProviderKind::Candle => Err(BootstrapError::Provider(
120 "candle feature is not enabled".into(),
121 )),
122 }
123}
124
125fn build_cascade_router_config(
126 cascade_cfg: &crate::config::CascadeConfig,
127 config: &Config,
128) -> CascadeRouterConfig {
129 let classifier_mode = match cascade_cfg.classifier_mode {
130 crate::config::CascadeClassifierMode::Heuristic => ClassifierMode::Heuristic,
131 crate::config::CascadeClassifierMode::Judge => ClassifierMode::Judge,
132 };
133 let raw_threshold = cascade_cfg.quality_threshold;
135 let quality_threshold = if raw_threshold.is_finite() {
136 raw_threshold.clamp(0.0, 1.0)
137 } else {
138 tracing::warn!(
139 raw_threshold,
140 "cascade quality_threshold is non-finite, defaulting to 0.5"
141 );
142 0.5
143 };
144 if (quality_threshold - raw_threshold).abs() > f64::EPSILON {
145 tracing::warn!(
146 raw_threshold,
147 clamped = quality_threshold,
148 "cascade quality_threshold out of range [0.0, 1.0], clamped"
149 );
150 }
151 let window_size = cascade_cfg.window_size.max(1);
153 if window_size != cascade_cfg.window_size {
154 tracing::warn!(
155 raw = cascade_cfg.window_size,
156 "cascade window_size=0 is invalid, clamped to 1"
157 );
158 }
159 let summary_provider = if classifier_mode == ClassifierMode::Judge {
161 if let Some(model_spec) = config.llm.summary_model.as_deref() {
162 match create_summary_provider(model_spec, config) {
163 Ok(p) => Some(p),
164 Err(e) => {
165 tracing::warn!(
166 error = %e,
167 "cascade: failed to build judge provider, falling back to heuristic"
168 );
169 None
170 }
171 }
172 } else {
173 tracing::warn!(
174 "cascade: classifier_mode=judge requires [llm] summary_model to \
175 be configured; falling back to heuristic"
176 );
177 None
178 }
179 } else {
180 None
181 };
182 CascadeRouterConfig {
183 quality_threshold,
184 max_escalations: cascade_cfg.max_escalations,
185 classifier_mode,
186 window_size,
187 max_cascade_tokens: cascade_cfg.max_cascade_tokens,
188 summary_provider,
189 cost_tiers: cascade_cfg.cost_tiers.clone(),
190 }
191}
192
193fn named_ollama(config: &Config) -> AnyProvider {
194 let tool_use = config.llm.ollama.as_ref().is_some_and(|c| c.tool_use);
195 let mut provider = OllamaProvider::new(
196 &config.llm.base_url,
197 config.llm.model.clone(),
198 config.llm.embedding_model.clone(),
199 )
200 .with_tool_use(tool_use);
201 if let Some(ref vm) = config.llm.vision_model {
202 provider = provider.with_vision_model(vm.clone());
203 }
204 AnyProvider::Ollama(provider)
205}
206
207fn named_claude(config: &Config) -> Result<AnyProvider, BootstrapError> {
208 let cloud = config.llm.cloud.as_ref().ok_or_else(|| {
209 BootstrapError::Provider("llm.cloud config section required for Claude provider".into())
210 })?;
211 let api_key = config
212 .secrets
213 .claude_api_key
214 .as_ref()
215 .ok_or_else(|| BootstrapError::Provider("ZEPH_CLAUDE_API_KEY not found in vault".into()))?
216 .expose()
217 .to_owned();
218 let provider = ClaudeProvider::new(api_key, cloud.model.clone(), cloud.max_tokens)
219 .with_client(llm_client(config.timeouts.llm_request_timeout_secs))
220 .with_extended_context(cloud.enable_extended_context)
221 .with_thinking_opt(cloud.thinking.clone())
222 .map_err(|e| BootstrapError::Provider(format!("invalid thinking config: {e}")))?
223 .with_server_compaction(cloud.server_compaction);
224 Ok(AnyProvider::Claude(provider))
225}
226
227fn named_openai(config: &Config) -> Result<AnyProvider, BootstrapError> {
228 let openai_cfg = config.llm.openai.as_ref().ok_or_else(|| {
229 BootstrapError::Provider("llm.openai config section required for OpenAI provider".into())
230 })?;
231 let api_key = config
232 .secrets
233 .openai_api_key
234 .as_ref()
235 .ok_or_else(|| BootstrapError::Provider("ZEPH_OPENAI_API_KEY not found in vault".into()))?
236 .expose()
237 .to_owned();
238 Ok(AnyProvider::OpenAi(
239 OpenAiProvider::new(
240 api_key,
241 openai_cfg.base_url.clone(),
242 openai_cfg.model.clone(),
243 openai_cfg.max_tokens,
244 openai_cfg.embedding_model.clone(),
245 openai_cfg.reasoning_effort.clone(),
246 )
247 .with_client(llm_client(config.timeouts.llm_request_timeout_secs)),
248 ))
249}
250
251fn named_gemini(config: &Config) -> Result<AnyProvider, BootstrapError> {
252 let gemini_cfg = config.llm.gemini.as_ref().ok_or_else(|| {
253 BootstrapError::Provider("llm.gemini config section required for Gemini provider".into())
254 })?;
255 let api_key = config
256 .secrets
257 .gemini_api_key
258 .as_ref()
259 .ok_or_else(|| BootstrapError::Provider("ZEPH_GEMINI_API_KEY not found in vault".into()))?
260 .expose()
261 .to_owned();
262 let mut provider =
263 GeminiProvider::new(api_key, gemini_cfg.model.clone(), gemini_cfg.max_tokens)
264 .with_base_url(gemini_cfg.base_url.clone())
265 .with_client(llm_client(config.timeouts.llm_request_timeout_secs));
266 if let Some(ref em) = gemini_cfg.embedding_model {
267 provider = provider.with_embedding_model(em.clone());
268 }
269 if let Some(level) = gemini_cfg.thinking_level {
270 provider = provider.with_thinking_level(level);
271 }
272 if let Some(budget) = gemini_cfg.thinking_budget {
273 provider = provider
274 .with_thinking_budget(budget)
275 .map_err(|e| BootstrapError::Provider(e.to_string()))?;
276 }
277 if let Some(include) = gemini_cfg.include_thoughts {
278 provider = provider.with_include_thoughts(include);
279 }
280 Ok(AnyProvider::Gemini(provider))
281}
282
283pub fn create_named_provider(name: &str, config: &Config) -> Result<AnyProvider, BootstrapError> {
284 match name {
285 "ollama" => Ok(named_ollama(config)),
286 "claude" => named_claude(config),
287 "openai" => named_openai(config),
288 "gemini" => named_gemini(config),
289 other => {
290 if let Some(entries) = &config.llm.compatible {
291 let entry = if other == "compatible" {
292 entries.first()
293 } else {
294 entries.iter().find(|e| e.name == other)
295 };
296 if let Some(entry) = entry {
297 let has_key = entry.api_key.is_some()
298 || config.secrets.compatible_api_keys.contains_key(&entry.name)
299 || is_local_endpoint(&entry.base_url);
300 if !has_key {
301 return Err(BootstrapError::Provider(format!(
302 "ZEPH_COMPATIBLE_{}_API_KEY required for '{}' \
303 (set api_key in config, vault secret, or use a local endpoint)",
304 entry.name.to_uppercase(),
305 entry.name
306 )));
307 }
308 let api_key = entry.api_key.clone().unwrap_or_else(|| {
310 config
311 .secrets
312 .compatible_api_keys
313 .get(&entry.name)
314 .map(|s| s.expose().to_owned()) .unwrap_or_default()
316 });
317 return Ok(AnyProvider::Compatible(CompatibleProvider::new(
318 entry.name.clone(),
319 api_key,
320 entry.base_url.clone(),
321 entry.model.clone(),
322 entry.max_tokens,
323 entry.embedding_model.clone(),
324 )));
325 }
326 }
327 Err(BootstrapError::Provider(format!(
328 "unknown provider: {other}"
329 )))
330 }
331 }
332}
333
334pub fn create_summary_provider(
343 model_spec: &str,
344 config: &Config,
345) -> Result<AnyProvider, BootstrapError> {
346 let (backend, model_override) = if let Some((b, m)) = model_spec.split_once('/') {
347 (b, Some(m))
348 } else {
349 (model_spec, None)
350 };
351
352 match backend {
353 "ollama" => {
354 let model = model_override.ok_or_else(|| {
355 BootstrapError::Provider(
356 "ollama summary_model requires format 'ollama/<model>'".into(),
357 )
358 })?;
359 Ok(AnyProvider::Ollama(OllamaProvider::new(
360 &config.llm.base_url,
361 model.to_owned(),
362 String::new(),
363 )))
364 }
365 "claude" => summary_claude(model_override, config),
366 "openai" => summary_openai(model_override, config),
367 "gemini" => summary_gemini(model_override, config),
368 "compatible" => {
369 let name = model_override.ok_or_else(|| {
370 BootstrapError::Provider(
371 "compatible summary_model requires format 'compatible/<name>'".into(),
372 )
373 })?;
374 create_named_provider(name, config)
375 }
376 #[cfg(feature = "candle")]
377 "candle" => {
378 let candle_cfg = config.llm.candle.as_ref().ok_or_else(|| {
379 BootstrapError::Provider(
380 "llm.candle config section required for candle summary provider".into(),
381 )
382 })?;
383 let source = match candle_cfg.source.as_str() {
384 "local" => zeph_llm::candle_provider::loader::ModelSource::Local {
385 path: std::path::PathBuf::from(&candle_cfg.local_path),
386 },
387 _ => zeph_llm::candle_provider::loader::ModelSource::HuggingFace {
388 repo_id: config.llm.model.clone(),
389 filename: candle_cfg.filename.clone(),
390 },
391 };
392 build_candle_provider(source, candle_cfg, &candle_cfg.device)
393 }
394 _ => Err(BootstrapError::Provider(format!(
395 "unsupported summary_model format: '{model_spec}'. \
396 Supported: ollama/<model>, claude[/<model>], openai[/<model>], \
397 compatible/<name>{candle}",
398 candle = if cfg!(feature = "candle") {
399 ", candle"
400 } else {
401 ""
402 }
403 ))),
404 }
405}
406
407fn summary_claude(
408 model_override: Option<&str>,
409 config: &Config,
410) -> Result<AnyProvider, BootstrapError> {
411 let api_key = config
412 .secrets
413 .claude_api_key
414 .as_ref()
415 .ok_or_else(|| {
416 BootstrapError::Provider(
417 "ZEPH_CLAUDE_API_KEY required for claude summary provider".into(),
418 )
419 })?
420 .expose()
421 .to_owned();
422 let cloud = config.llm.cloud.as_ref();
423 let model = model_override
424 .map(str::to_owned)
425 .or_else(|| cloud.map(|c| c.model.clone()))
426 .unwrap_or_else(|| "claude-haiku-4-5-20251001".to_owned());
427 let max_tokens = cloud.map_or(4096, |c| c.max_tokens.min(4096));
429 let provider = ClaudeProvider::new(api_key, model, max_tokens)
432 .with_client(llm_client(config.timeouts.llm_request_timeout_secs));
433 Ok(AnyProvider::Claude(provider))
434}
435
436fn summary_openai(
437 model_override: Option<&str>,
438 config: &Config,
439) -> Result<AnyProvider, BootstrapError> {
440 let api_key = config
441 .secrets
442 .openai_api_key
443 .as_ref()
444 .ok_or_else(|| {
445 BootstrapError::Provider(
446 "ZEPH_OPENAI_API_KEY required for openai summary provider".into(),
447 )
448 })?
449 .expose()
450 .to_owned();
451 let openai_cfg = config.llm.openai.as_ref();
452 let base_url = openai_cfg.map_or_else(
453 || "https://api.openai.com/v1".to_owned(),
454 |c| c.base_url.clone(),
455 );
456 let model = model_override
457 .map(str::to_owned)
458 .or_else(|| openai_cfg.map(|c| c.model.clone()))
459 .unwrap_or_else(|| "gpt-4o-mini".to_owned());
460 let max_tokens = openai_cfg.map_or(4096, |c| c.max_tokens);
461 Ok(AnyProvider::OpenAi(
462 OpenAiProvider::new(api_key, base_url, model, max_tokens, None, None)
463 .with_client(llm_client(config.timeouts.llm_request_timeout_secs)),
464 ))
465}
466
467fn summary_gemini(
468 model_override: Option<&str>,
469 config: &Config,
470) -> Result<AnyProvider, BootstrapError> {
471 let api_key = config
472 .secrets
473 .gemini_api_key
474 .as_ref()
475 .ok_or_else(|| {
476 BootstrapError::Provider(
477 "ZEPH_GEMINI_API_KEY required for gemini summary provider".into(),
478 )
479 })?
480 .expose()
481 .to_owned();
482 let gemini_cfg = config.llm.gemini.as_ref();
483 let model = model_override
484 .map(str::to_owned)
485 .or_else(|| gemini_cfg.map(|c| c.model.clone()))
486 .unwrap_or_else(|| "gemini-2.0-flash".to_owned());
487 let max_tokens = gemini_cfg.map_or(4096, |c| c.max_tokens.min(4096));
488 let base_url = gemini_cfg.map_or_else(
489 || "https://generativelanguage.googleapis.com".to_owned(),
490 |c| c.base_url.clone(),
491 );
492 Ok(AnyProvider::Gemini(
495 GeminiProvider::new(api_key, model, max_tokens)
496 .with_base_url(base_url)
497 .with_client(llm_client(config.timeouts.llm_request_timeout_secs)),
498 ))
499}
500
501#[cfg(feature = "candle")]
502pub fn select_device(
503 preference: &str,
504) -> Result<zeph_llm::candle_provider::Device, BootstrapError> {
505 match preference {
506 "metal" => {
507 #[cfg(feature = "metal")]
508 return zeph_llm::candle_provider::Device::new_metal(0)
509 .map_err(|e| BootstrapError::Provider(e.to_string()));
510 #[cfg(not(feature = "metal"))]
511 return Err(BootstrapError::Provider(
512 "candle compiled without metal feature".into(),
513 ));
514 }
515 "cuda" => {
516 #[cfg(feature = "cuda")]
517 return zeph_llm::candle_provider::Device::new_cuda(0)
518 .map_err(|e| BootstrapError::Provider(e.to_string()));
519 #[cfg(not(feature = "cuda"))]
520 return Err(BootstrapError::Provider(
521 "candle compiled without cuda feature".into(),
522 ));
523 }
524 "auto" => {
525 #[cfg(feature = "metal")]
526 if let Ok(device) = zeph_llm::candle_provider::Device::new_metal(0) {
527 return Ok(device);
528 }
529 #[cfg(feature = "cuda")]
530 if let Ok(device) = zeph_llm::candle_provider::Device::new_cuda(0) {
531 return Ok(device);
532 }
533 Ok(zeph_llm::candle_provider::Device::Cpu)
534 }
535 _ => Ok(zeph_llm::candle_provider::Device::Cpu),
536 }
537}
538
539#[cfg(feature = "candle")]
540fn build_candle_provider(
541 source: zeph_llm::candle_provider::loader::ModelSource,
542 candle_cfg: &crate::config::CandleConfig,
543 device_pref: &str,
544) -> Result<AnyProvider, BootstrapError> {
545 let template =
546 zeph_llm::candle_provider::template::ChatTemplate::parse_str(&candle_cfg.chat_template);
547 let gen_config = zeph_llm::candle_provider::generate::GenerationConfig {
548 temperature: candle_cfg.generation.temperature,
549 top_p: candle_cfg.generation.top_p,
550 top_k: candle_cfg.generation.top_k,
551 max_tokens: candle_cfg.generation.capped_max_tokens(),
552 seed: candle_cfg.generation.seed,
553 repeat_penalty: candle_cfg.generation.repeat_penalty,
554 repeat_last_n: candle_cfg.generation.repeat_last_n,
555 };
556 let device = select_device(device_pref)?;
557 zeph_llm::candle_provider::CandleProvider::new(
558 &source,
559 template,
560 gen_config,
561 candle_cfg.embedding_repo.as_deref(),
562 device,
563 )
564 .map(AnyProvider::Candle)
565 .map_err(|e| BootstrapError::Provider(e.to_string()))
566}
567
568pub fn create_provider_from_config(
573 pcfg: &crate::config::OrchestratorProviderConfig,
574 config: &Config,
575) -> Result<AnyProvider, BootstrapError> {
576 match pcfg.provider_type.as_str() {
577 "ollama" => {
578 let base_url = pcfg.base_url.as_deref().unwrap_or(&config.llm.base_url);
579 let model = pcfg.model.as_deref().unwrap_or(&config.llm.model);
580 let embed = pcfg
581 .embedding_model
582 .clone()
583 .unwrap_or_else(|| config.llm.embedding_model.clone());
584 Ok(AnyProvider::Ollama(OllamaProvider::new(
585 base_url,
586 model.to_owned(),
587 embed,
588 )))
589 }
590 "claude" => pcfg_claude(pcfg, config),
591 "openai" => pcfg_openai(pcfg, config),
592 "gemini" => pcfg_gemini(pcfg, config),
593 "compatible" => {
594 let name = pcfg.model.as_deref().ok_or_else(|| {
595 BootstrapError::Provider(
596 "compatible provider requires 'model' set to the entry name".into(),
597 )
598 })?;
599 create_named_provider(name, config)
600 }
601 #[cfg(feature = "candle")]
602 "candle" => {
603 let candle_cfg = config.llm.candle.as_ref().ok_or_else(|| {
604 BootstrapError::Provider(
605 "llm.candle config section required for candle provider".into(),
606 )
607 })?;
608 let source = match candle_cfg.source.as_str() {
609 "local" => zeph_llm::candle_provider::loader::ModelSource::Local {
610 path: std::path::PathBuf::from(&candle_cfg.local_path),
611 },
612 _ => zeph_llm::candle_provider::loader::ModelSource::HuggingFace {
613 repo_id: pcfg
614 .model
615 .clone()
616 .unwrap_or_else(|| config.llm.model.clone()),
617 filename: candle_cfg.filename.clone(),
618 },
619 };
620 let device_pref = pcfg.device.as_deref().unwrap_or(&candle_cfg.device);
621 build_candle_provider(source, candle_cfg, device_pref)
622 }
623 other => Err(BootstrapError::Provider(format!(
624 "unknown provider type: '{other}'"
625 ))),
626 }
627}
628
629fn pcfg_claude(
630 pcfg: &crate::config::OrchestratorProviderConfig,
631 config: &Config,
632) -> Result<AnyProvider, BootstrapError> {
633 let api_key = config
634 .secrets
635 .claude_api_key
636 .as_ref()
637 .ok_or_else(|| {
638 BootstrapError::Provider("ZEPH_CLAUDE_API_KEY required for claude provider".into())
639 })?
640 .expose()
641 .to_owned();
642 let cloud = config.llm.cloud.as_ref();
643 let model = pcfg
644 .model
645 .as_deref()
646 .or_else(|| cloud.map(|c| c.model.as_str()))
647 .unwrap_or("claude-haiku-4-5-20251001");
648 let max_tokens = cloud.map_or(4096, |c| c.max_tokens);
649 let enable_extended_context = cloud.is_some_and(|c| c.enable_extended_context);
650 let provider = ClaudeProvider::new(api_key, model.to_owned(), max_tokens)
651 .with_client(llm_client(config.timeouts.llm_request_timeout_secs))
652 .with_extended_context(enable_extended_context);
653 Ok(AnyProvider::Claude(provider))
654}
655
656fn pcfg_openai(
657 pcfg: &crate::config::OrchestratorProviderConfig,
658 config: &Config,
659) -> Result<AnyProvider, BootstrapError> {
660 let api_key = config
661 .secrets
662 .openai_api_key
663 .as_ref()
664 .ok_or_else(|| {
665 BootstrapError::Provider("ZEPH_OPENAI_API_KEY required for openai provider".into())
666 })?
667 .expose()
668 .to_owned();
669 let openai_cfg = config.llm.openai.as_ref();
670 let base_url = pcfg
671 .base_url
672 .clone()
673 .or_else(|| openai_cfg.map(|c| c.base_url.clone()))
674 .unwrap_or_else(|| "https://api.openai.com/v1".to_owned());
675 let model = pcfg
676 .model
677 .as_deref()
678 .or_else(|| openai_cfg.map(|c| c.model.as_str()))
679 .unwrap_or("gpt-4o-mini");
680 let max_tokens = openai_cfg.map_or(4096, |c| c.max_tokens);
681 let embed = pcfg
682 .embedding_model
683 .clone()
684 .or_else(|| openai_cfg.and_then(|c| c.embedding_model.clone()));
685 Ok(AnyProvider::OpenAi(
686 OpenAiProvider::new(api_key, base_url, model.to_owned(), max_tokens, embed, None)
687 .with_client(llm_client(config.timeouts.llm_request_timeout_secs)),
688 ))
689}
690
691fn pcfg_gemini(
692 pcfg: &crate::config::OrchestratorProviderConfig,
693 config: &Config,
694) -> Result<AnyProvider, BootstrapError> {
695 let api_key = config
696 .secrets
697 .gemini_api_key
698 .as_ref()
699 .ok_or_else(|| {
700 BootstrapError::Provider("ZEPH_GEMINI_API_KEY required for gemini provider".into())
701 })?
702 .expose()
703 .to_owned();
704 let gemini_cfg = config.llm.gemini.as_ref();
705 let model = pcfg
706 .model
707 .as_deref()
708 .or_else(|| gemini_cfg.map(|c| c.model.as_str()))
709 .unwrap_or("gemini-2.0-flash");
710 let max_tokens = gemini_cfg.map_or(4096, |c| c.max_tokens);
711 let base_url = gemini_cfg.map_or_else(
712 || "https://generativelanguage.googleapis.com".to_owned(),
713 |c| c.base_url.clone(),
714 );
715 let mut provider = GeminiProvider::new(api_key, model.to_owned(), max_tokens)
716 .with_base_url(base_url)
717 .with_client(llm_client(config.timeouts.llm_request_timeout_secs));
718 if let Some(em) = gemini_cfg.and_then(|c| c.embedding_model.as_deref()) {
719 provider = provider.with_embedding_model(em);
720 }
721 if let Some(level) = gemini_cfg.and_then(|c| c.thinking_level) {
722 provider = provider.with_thinking_level(level);
723 }
724 if let Some(budget) = gemini_cfg.and_then(|c| c.thinking_budget) {
725 provider = provider
726 .with_thinking_budget(budget)
727 .map_err(|e| BootstrapError::Provider(e.to_string()))?;
728 }
729 if let Some(include) = gemini_cfg.and_then(|c| c.include_thoughts) {
730 provider = provider.with_include_thoughts(include);
731 }
732 Ok(AnyProvider::Gemini(provider))
733}
734
735#[allow(clippy::too_many_lines)] fn build_sub_provider(
737 pcfg: &crate::config::OrchestratorProviderConfig,
738 config: &Config,
739) -> Result<zeph_llm::orchestrator::SubProvider, BootstrapError> {
740 use zeph_llm::orchestrator::SubProvider;
741 match pcfg.provider_type.as_str() {
742 "ollama" => {
743 let base_url = pcfg.base_url.as_deref().unwrap_or(&config.llm.base_url);
744 let model = pcfg.model.as_deref().unwrap_or(&config.llm.model);
745 let embed = pcfg
746 .embedding_model
747 .clone()
748 .unwrap_or_else(|| config.llm.embedding_model.clone());
749 Ok(SubProvider::Ollama(OllamaProvider::new(
750 base_url,
751 model.to_owned(),
752 embed,
753 )))
754 }
755 "claude" => {
756 let cloud = config.llm.cloud.as_ref().ok_or_else(|| {
757 BootstrapError::Provider("llm.cloud config required for claude sub-provider".into())
758 })?;
759 let api_key = config
760 .secrets
761 .claude_api_key
762 .as_ref()
763 .ok_or_else(|| {
764 BootstrapError::Provider(
765 "ZEPH_CLAUDE_API_KEY required for claude sub-provider".into(),
766 )
767 })?
768 .expose()
769 .to_owned();
770 let model = pcfg.model.as_deref().unwrap_or(&cloud.model);
771 let sub = ClaudeProvider::new(api_key, model.to_owned(), cloud.max_tokens)
772 .with_client(llm_client(config.timeouts.llm_request_timeout_secs))
773 .with_extended_context(cloud.enable_extended_context)
774 .with_thinking_opt(cloud.thinking.clone())
775 .map_err(|e| BootstrapError::Provider(format!("invalid thinking config: {e}")))?
776 .with_server_compaction(cloud.server_compaction);
777 Ok(SubProvider::Claude(sub))
778 }
779 "openai" => {
780 let openai_cfg = config.llm.openai.as_ref().ok_or_else(|| {
781 BootstrapError::Provider(
782 "llm.openai config required for openai sub-provider".into(),
783 )
784 })?;
785 let api_key = config
786 .secrets
787 .openai_api_key
788 .as_ref()
789 .ok_or_else(|| {
790 BootstrapError::Provider(
791 "ZEPH_OPENAI_API_KEY required for openai sub-provider".into(),
792 )
793 })?
794 .expose()
795 .to_owned();
796 let base_url = pcfg
797 .base_url
798 .clone()
799 .unwrap_or_else(|| openai_cfg.base_url.clone());
800 let model = pcfg.model.as_deref().unwrap_or(&openai_cfg.model);
801 let embed = pcfg
802 .embedding_model
803 .clone()
804 .or_else(|| openai_cfg.embedding_model.clone());
805 Ok(SubProvider::OpenAi(
806 OpenAiProvider::new(
807 api_key,
808 base_url,
809 model.to_owned(),
810 openai_cfg.max_tokens,
811 embed,
812 openai_cfg.reasoning_effort.clone(),
813 )
814 .with_client(llm_client(config.timeouts.llm_request_timeout_secs)),
815 ))
816 }
817 "gemini" => {
818 let api_key = config
819 .secrets
820 .gemini_api_key
821 .as_ref()
822 .ok_or_else(|| {
823 BootstrapError::Provider(
824 "ZEPH_GEMINI_API_KEY required for gemini sub-provider".into(),
825 )
826 })?
827 .expose()
828 .to_owned();
829 let gemini_cfg = config.llm.gemini.as_ref();
830 let model = pcfg
831 .model
832 .as_deref()
833 .or_else(|| gemini_cfg.map(|c| c.model.as_str()))
834 .unwrap_or("gemini-2.0-flash");
835 let max_tokens = gemini_cfg.map_or(8192, |c| c.max_tokens);
836 let base_url = gemini_cfg.map_or_else(
837 || "https://generativelanguage.googleapis.com".to_owned(),
838 |c| c.base_url.clone(),
839 );
840 let mut provider = GeminiProvider::new(api_key, model.to_owned(), max_tokens)
841 .with_base_url(base_url)
842 .with_client(llm_client(config.timeouts.llm_request_timeout_secs));
843 if let Some(level) = gemini_cfg.and_then(|c| c.thinking_level) {
844 provider = provider.with_thinking_level(level);
845 }
846 if let Some(budget) = gemini_cfg.and_then(|c| c.thinking_budget) {
847 provider = provider.with_thinking_budget(budget).map_err(|e| {
848 BootstrapError::Provider(format!("invalid thinking_budget: {e}"))
849 })?;
850 }
851 if let Some(include) = gemini_cfg.and_then(|c| c.include_thoughts) {
852 provider = provider.with_include_thoughts(include);
853 }
854 Ok(SubProvider::Gemini(provider))
855 }
856 #[cfg(feature = "candle")]
857 "candle" => {
858 let candle_cfg = config.llm.candle.as_ref().ok_or_else(|| {
859 BootstrapError::Provider(
860 "llm.candle config required for candle sub-provider".into(),
861 )
862 })?;
863 let source = match candle_cfg.source.as_str() {
864 "local" => zeph_llm::candle_provider::loader::ModelSource::Local {
865 path: std::path::PathBuf::from(&candle_cfg.local_path),
866 },
867 _ => zeph_llm::candle_provider::loader::ModelSource::HuggingFace {
868 repo_id: pcfg
869 .model
870 .clone()
871 .unwrap_or_else(|| config.llm.model.clone()),
872 filename: candle_cfg.filename.clone(),
873 },
874 };
875 let device_pref = pcfg.device.as_deref().unwrap_or(&candle_cfg.device);
876 let any = build_candle_provider(source, candle_cfg, device_pref)?;
877 if let AnyProvider::Candle(p) = any {
878 Ok(SubProvider::Candle(p))
879 } else {
880 unreachable!("build_candle_provider always returns AnyProvider::Candle")
881 }
882 }
883 other => Err(BootstrapError::Provider(format!(
884 "unknown orchestrator sub-provider type: {other}"
885 ))),
886 }
887}
888
889pub fn build_orchestrator(
890 config: &Config,
891) -> Result<zeph_llm::orchestrator::ModelOrchestrator, BootstrapError> {
892 use std::collections::HashMap;
893 use zeph_llm::orchestrator::{ModelOrchestrator, TaskType};
894
895 let orch_cfg = config.llm.orchestrator.as_ref().ok_or_else(|| {
896 BootstrapError::Provider(
897 "llm.orchestrator config section required for orchestrator provider".into(),
898 )
899 })?;
900
901 let mut providers = HashMap::new();
902 for (name, pcfg) in &orch_cfg.providers {
903 let provider = build_sub_provider(pcfg, config)?;
904 providers.insert(name.clone(), provider);
905 }
906
907 let mut routes = HashMap::new();
908 for (task_str, chain) in &orch_cfg.routes {
909 let task = TaskType::parse_str(task_str);
910 routes.insert(task, chain.clone());
911 }
912
913 ModelOrchestrator::new(
914 routes,
915 providers,
916 orch_cfg.default.clone(),
917 orch_cfg.embed.clone(),
918 )
919 .map_err(|e| BootstrapError::Provider(e.to_string()))
920}
921
922fn is_local_endpoint(base_url: &str) -> bool {
925 let after_scheme = base_url
927 .strip_prefix("https://")
928 .or_else(|| base_url.strip_prefix("http://"))
929 .unwrap_or(base_url);
930 let host = after_scheme
931 .split('/')
932 .next()
933 .and_then(|h| h.split(':').next())
934 .unwrap_or(after_scheme);
935
936 if host.eq_ignore_ascii_case("localhost")
937 || host == "127.0.0.1"
938 || host == "::1"
939 || host == "[::1]"
940 {
941 return true;
942 }
943 if let Ok(ip) = host.parse::<std::net::IpAddr>() {
944 return match ip {
945 std::net::IpAddr::V4(v4) => v4.is_loopback() || v4.is_private() || v4.is_link_local(),
946 std::net::IpAddr::V6(v6) => v6.is_loopback(),
947 };
948 }
949 #[allow(clippy::case_sensitive_file_extension_comparisons)]
951 {
952 host.ends_with(".local") || host.ends_with(".internal")
953 }
954}
955
956#[cfg(test)]
957mod tests {
958 use super::*;
959
960 #[test]
961 fn local_endpoints_detected() {
962 assert!(is_local_endpoint("http://localhost:11434/v1"));
963 assert!(is_local_endpoint("http://127.0.0.1:8080"));
964 assert!(is_local_endpoint("https://localhost/api"));
965 assert!(is_local_endpoint("http://192.168.1.100:11434/v1"));
966 assert!(is_local_endpoint("http://10.0.0.5:8000"));
967 assert!(is_local_endpoint("http://172.16.0.1:9090"));
968 assert!(is_local_endpoint("http://myhost.local:11434"));
969 assert!(is_local_endpoint("http://service.internal:8080"));
970 }
971
972 #[test]
973 fn remote_endpoints_not_local() {
974 assert!(!is_local_endpoint("https://api.openai.com/v1"));
975 assert!(!is_local_endpoint("https://api.anthropic.com"));
976 assert!(!is_local_endpoint("http://8.8.8.8:11434"));
977 assert!(!is_local_endpoint("https://my-server.example.com/v1"));
978 }
979}