Skip to main content

zeph_core/bootstrap/
provider.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4use zeph_llm::any::AnyProvider;
5
6/// Error type for bootstrap / provider construction failures.
7///
8/// String-based variants flatten the error chain intentionally: bootstrap errors are
9/// terminal (the application exits), so downcasting is not needed at this stage.
10/// If a future phase requires programmatic retry on specific failures, expand these
11/// variants into typed sub-errors.
12#[derive(Debug, thiserror::Error)]
13pub enum BootstrapError {
14    #[error("config error: {0}")]
15    Config(#[from] crate::config::ConfigError),
16    #[error("provider error: {0}")]
17    Provider(String),
18    #[error("memory error: {0}")]
19    Memory(String),
20    #[error("vault init error: {0}")]
21    VaultInit(crate::vault::AgeVaultError),
22    #[error("I/O error: {0}")]
23    Io(#[from] std::io::Error),
24}
25use zeph_llm::claude::ClaudeProvider;
26use zeph_llm::compatible::CompatibleProvider;
27use zeph_llm::gemini::GeminiProvider;
28use zeph_llm::http::llm_client;
29use zeph_llm::ollama::OllamaProvider;
30use zeph_llm::openai::OpenAiProvider;
31use zeph_llm::router::cascade::ClassifierMode;
32use zeph_llm::router::{CascadeRouterConfig, RouterProvider};
33
34use crate::config::{Config, ProviderKind};
35
36pub fn create_provider(config: &Config) -> Result<AnyProvider, BootstrapError> {
37    match config.llm.provider {
38        ProviderKind::Ollama | ProviderKind::Claude => {
39            create_named_provider(config.llm.provider.as_str(), config)
40        }
41        ProviderKind::OpenAi => create_named_provider("openai", config),
42        ProviderKind::Gemini => create_named_provider("gemini", config),
43        ProviderKind::Compatible => create_named_provider("compatible", config),
44        #[cfg(feature = "candle")]
45        ProviderKind::Candle => {
46            let candle_cfg = config.llm.candle.as_ref().ok_or_else(|| {
47                BootstrapError::Provider(
48                    "llm.candle config section required for candle provider".into(),
49                )
50            })?;
51            let source = match candle_cfg.source.as_str() {
52                "local" => zeph_llm::candle_provider::loader::ModelSource::Local {
53                    path: std::path::PathBuf::from(&candle_cfg.local_path),
54                },
55                _ => zeph_llm::candle_provider::loader::ModelSource::HuggingFace {
56                    repo_id: config.llm.model.clone(),
57                    filename: candle_cfg.filename.clone(),
58                },
59            };
60            build_candle_provider(source, candle_cfg, &candle_cfg.device)
61        }
62        ProviderKind::Orchestrator => {
63            let orch = build_orchestrator(config)?;
64            Ok(AnyProvider::Orchestrator(Box::new(orch)))
65        }
66        ProviderKind::Router => {
67            let router_cfg = config.llm.router.as_ref().ok_or_else(|| {
68                BootstrapError::Provider(
69                    "llm.router config section required for router provider".into(),
70                )
71            })?;
72
73            let mut providers = Vec::new();
74            for name in &router_cfg.chain {
75                match create_named_provider(name, config) {
76                    Ok(p) => providers.push(p),
77                    Err(e) => {
78                        tracing::warn!(
79                            provider = name.as_str(),
80                            error = %e,
81                            "skipping router chain provider (will initialize on demand if needed)"
82                        );
83                    }
84                }
85            }
86            if providers.is_empty() {
87                return Err(BootstrapError::Provider(format!(
88                    "router chain is empty: none of [{}] could be initialized",
89                    router_cfg.chain.join(", ")
90                )));
91            }
92            let router = if router_cfg.strategy == crate::config::RouterStrategyConfig::Thompson {
93                let state_path = router_cfg
94                    .thompson_state_path
95                    .as_deref()
96                    .map(std::path::Path::new);
97                RouterProvider::new(providers).with_thompson(state_path)
98            } else if router_cfg.strategy == crate::config::RouterStrategyConfig::Cascade {
99                let cascade_cfg = router_cfg.cascade.clone().unwrap_or_default();
100                let router_cascade_cfg = build_cascade_router_config(&cascade_cfg, config);
101                RouterProvider::new(providers).with_cascade(router_cascade_cfg)
102            } else if config.llm.router_ema_enabled {
103                let raw_alpha = config.llm.router_ema_alpha;
104                let alpha = raw_alpha.clamp(f64::MIN_POSITIVE, 1.0);
105                if (alpha - raw_alpha).abs() > f64::EPSILON {
106                    tracing::warn!(
107                        raw_alpha,
108                        clamped = alpha,
109                        "router_ema_alpha out of range [MIN_POSITIVE, 1.0], clamped"
110                    );
111                }
112                RouterProvider::new(providers).with_ema(alpha, config.llm.router_reorder_interval)
113            } else {
114                RouterProvider::new(providers)
115            };
116            Ok(AnyProvider::Router(Box::new(router)))
117        }
118        #[cfg(not(feature = "candle"))]
119        ProviderKind::Candle => Err(BootstrapError::Provider(
120            "candle feature is not enabled".into(),
121        )),
122    }
123}
124
125fn build_cascade_router_config(
126    cascade_cfg: &crate::config::CascadeConfig,
127    config: &Config,
128) -> CascadeRouterConfig {
129    let classifier_mode = match cascade_cfg.classifier_mode {
130        crate::config::CascadeClassifierMode::Heuristic => ClassifierMode::Heuristic,
131        crate::config::CascadeClassifierMode::Judge => ClassifierMode::Judge,
132    };
133    // SEC-CASCADE-01: clamp quality_threshold to [0.0, 1.0]; reject NaN/Inf.
134    let raw_threshold = cascade_cfg.quality_threshold;
135    let quality_threshold = if raw_threshold.is_finite() {
136        raw_threshold.clamp(0.0, 1.0)
137    } else {
138        tracing::warn!(
139            raw_threshold,
140            "cascade quality_threshold is non-finite, defaulting to 0.5"
141        );
142        0.5
143    };
144    if (quality_threshold - raw_threshold).abs() > f64::EPSILON {
145        tracing::warn!(
146            raw_threshold,
147            clamped = quality_threshold,
148            "cascade quality_threshold out of range [0.0, 1.0], clamped"
149        );
150    }
151    // SEC-CASCADE-02: clamp window_size to minimum 1 to prevent silent no-op tracking.
152    let window_size = cascade_cfg.window_size.max(1);
153    if window_size != cascade_cfg.window_size {
154        tracing::warn!(
155            raw = cascade_cfg.window_size,
156            "cascade window_size=0 is invalid, clamped to 1"
157        );
158    }
159    // Build summary provider for judge mode.
160    let summary_provider = if classifier_mode == ClassifierMode::Judge {
161        if let Some(model_spec) = config.llm.summary_model.as_deref() {
162            match create_summary_provider(model_spec, config) {
163                Ok(p) => Some(p),
164                Err(e) => {
165                    tracing::warn!(
166                        error = %e,
167                        "cascade: failed to build judge provider, falling back to heuristic"
168                    );
169                    None
170                }
171            }
172        } else {
173            tracing::warn!(
174                "cascade: classifier_mode=judge requires [llm] summary_model to \
175                 be configured; falling back to heuristic"
176            );
177            None
178        }
179    } else {
180        None
181    };
182    CascadeRouterConfig {
183        quality_threshold,
184        max_escalations: cascade_cfg.max_escalations,
185        classifier_mode,
186        window_size,
187        max_cascade_tokens: cascade_cfg.max_cascade_tokens,
188        summary_provider,
189        cost_tiers: cascade_cfg.cost_tiers.clone(),
190    }
191}
192
193fn named_ollama(config: &Config) -> AnyProvider {
194    let tool_use = config.llm.ollama.as_ref().is_some_and(|c| c.tool_use);
195    let mut provider = OllamaProvider::new(
196        &config.llm.base_url,
197        config.llm.model.clone(),
198        config.llm.embedding_model.clone(),
199    )
200    .with_tool_use(tool_use);
201    if let Some(ref vm) = config.llm.vision_model {
202        provider = provider.with_vision_model(vm.clone());
203    }
204    AnyProvider::Ollama(provider)
205}
206
207fn named_claude(config: &Config) -> Result<AnyProvider, BootstrapError> {
208    let cloud = config.llm.cloud.as_ref().ok_or_else(|| {
209        BootstrapError::Provider("llm.cloud config section required for Claude provider".into())
210    })?;
211    let api_key = config
212        .secrets
213        .claude_api_key
214        .as_ref()
215        .ok_or_else(|| BootstrapError::Provider("ZEPH_CLAUDE_API_KEY not found in vault".into()))?
216        .expose()
217        .to_owned();
218    let provider = ClaudeProvider::new(api_key, cloud.model.clone(), cloud.max_tokens)
219        .with_client(llm_client(config.timeouts.llm_request_timeout_secs))
220        .with_extended_context(cloud.enable_extended_context)
221        .with_thinking_opt(cloud.thinking.clone())
222        .map_err(|e| BootstrapError::Provider(format!("invalid thinking config: {e}")))?
223        .with_server_compaction(cloud.server_compaction);
224    Ok(AnyProvider::Claude(provider))
225}
226
227fn named_openai(config: &Config) -> Result<AnyProvider, BootstrapError> {
228    let openai_cfg = config.llm.openai.as_ref().ok_or_else(|| {
229        BootstrapError::Provider("llm.openai config section required for OpenAI provider".into())
230    })?;
231    let api_key = config
232        .secrets
233        .openai_api_key
234        .as_ref()
235        .ok_or_else(|| BootstrapError::Provider("ZEPH_OPENAI_API_KEY not found in vault".into()))?
236        .expose()
237        .to_owned();
238    Ok(AnyProvider::OpenAi(
239        OpenAiProvider::new(
240            api_key,
241            openai_cfg.base_url.clone(),
242            openai_cfg.model.clone(),
243            openai_cfg.max_tokens,
244            openai_cfg.embedding_model.clone(),
245            openai_cfg.reasoning_effort.clone(),
246        )
247        .with_client(llm_client(config.timeouts.llm_request_timeout_secs)),
248    ))
249}
250
251fn named_gemini(config: &Config) -> Result<AnyProvider, BootstrapError> {
252    let gemini_cfg = config.llm.gemini.as_ref().ok_or_else(|| {
253        BootstrapError::Provider("llm.gemini config section required for Gemini provider".into())
254    })?;
255    let api_key = config
256        .secrets
257        .gemini_api_key
258        .as_ref()
259        .ok_or_else(|| BootstrapError::Provider("ZEPH_GEMINI_API_KEY not found in vault".into()))?
260        .expose()
261        .to_owned();
262    let mut provider =
263        GeminiProvider::new(api_key, gemini_cfg.model.clone(), gemini_cfg.max_tokens)
264            .with_base_url(gemini_cfg.base_url.clone())
265            .with_client(llm_client(config.timeouts.llm_request_timeout_secs));
266    if let Some(ref em) = gemini_cfg.embedding_model {
267        provider = provider.with_embedding_model(em.clone());
268    }
269    if let Some(level) = gemini_cfg.thinking_level {
270        provider = provider.with_thinking_level(level);
271    }
272    if let Some(budget) = gemini_cfg.thinking_budget {
273        provider = provider
274            .with_thinking_budget(budget)
275            .map_err(|e| BootstrapError::Provider(e.to_string()))?;
276    }
277    if let Some(include) = gemini_cfg.include_thoughts {
278        provider = provider.with_include_thoughts(include);
279    }
280    Ok(AnyProvider::Gemini(provider))
281}
282
283pub fn create_named_provider(name: &str, config: &Config) -> Result<AnyProvider, BootstrapError> {
284    match name {
285        "ollama" => Ok(named_ollama(config)),
286        "claude" => named_claude(config),
287        "openai" => named_openai(config),
288        "gemini" => named_gemini(config),
289        other => {
290            if let Some(entries) = &config.llm.compatible {
291                let entry = if other == "compatible" {
292                    entries.first()
293                } else {
294                    entries.iter().find(|e| e.name == other)
295                };
296                if let Some(entry) = entry {
297                    let has_key = entry.api_key.is_some()
298                        || config.secrets.compatible_api_keys.contains_key(&entry.name)
299                        || is_local_endpoint(&entry.base_url);
300                    if !has_key {
301                        return Err(BootstrapError::Provider(format!(
302                            "ZEPH_COMPATIBLE_{}_API_KEY required for '{}' \
303                             (set api_key in config, vault secret, or use a local endpoint)",
304                            entry.name.to_uppercase(),
305                            entry.name
306                        )));
307                    }
308                    // Resolve key: config field > vault secret > empty for local.
309                    let api_key = entry.api_key.clone().unwrap_or_else(|| {
310                        config
311                            .secrets
312                            .compatible_api_keys
313                            .get(&entry.name)
314                            .map(|s| s.expose().to_owned()) // lgtm[rust/cleartext-logging]
315                            .unwrap_or_default()
316                    });
317                    return Ok(AnyProvider::Compatible(CompatibleProvider::new(
318                        entry.name.clone(),
319                        api_key,
320                        entry.base_url.clone(),
321                        entry.model.clone(),
322                        entry.max_tokens,
323                        entry.embedding_model.clone(),
324                    )));
325                }
326            }
327            Err(BootstrapError::Provider(format!(
328                "unknown provider: {other}"
329            )))
330        }
331    }
332}
333
334/// Create an `AnyProvider` for use as the summarization provider.
335///
336/// `model_spec` format (set via `[llm] summary_model`):
337/// - `ollama/<model>` — Ollama at the configured `base_url`, e.g. `ollama/qwen3:1.7b`
338/// - `claude` or `claude/<model>` — Claude API; requires `ZEPH_CLAUDE_API_KEY`
339/// - `openai` or `openai/<model>` — OpenAI-compatible API; requires `ZEPH_OPENAI_API_KEY`
340/// - `compatible/<name>` — named entry from `[[llm.compatible]]`
341/// - `candle` — local candle model (requires `[llm.candle]` config; feature-gated)
342pub fn create_summary_provider(
343    model_spec: &str,
344    config: &Config,
345) -> Result<AnyProvider, BootstrapError> {
346    let (backend, model_override) = if let Some((b, m)) = model_spec.split_once('/') {
347        (b, Some(m))
348    } else {
349        (model_spec, None)
350    };
351
352    match backend {
353        "ollama" => {
354            let model = model_override.ok_or_else(|| {
355                BootstrapError::Provider(
356                    "ollama summary_model requires format 'ollama/<model>'".into(),
357                )
358            })?;
359            Ok(AnyProvider::Ollama(OllamaProvider::new(
360                &config.llm.base_url,
361                model.to_owned(),
362                String::new(),
363            )))
364        }
365        "claude" => summary_claude(model_override, config),
366        "openai" => summary_openai(model_override, config),
367        "gemini" => summary_gemini(model_override, config),
368        "compatible" => {
369            let name = model_override.ok_or_else(|| {
370                BootstrapError::Provider(
371                    "compatible summary_model requires format 'compatible/<name>'".into(),
372                )
373            })?;
374            create_named_provider(name, config)
375        }
376        #[cfg(feature = "candle")]
377        "candle" => {
378            let candle_cfg = config.llm.candle.as_ref().ok_or_else(|| {
379                BootstrapError::Provider(
380                    "llm.candle config section required for candle summary provider".into(),
381                )
382            })?;
383            let source = match candle_cfg.source.as_str() {
384                "local" => zeph_llm::candle_provider::loader::ModelSource::Local {
385                    path: std::path::PathBuf::from(&candle_cfg.local_path),
386                },
387                _ => zeph_llm::candle_provider::loader::ModelSource::HuggingFace {
388                    repo_id: config.llm.model.clone(),
389                    filename: candle_cfg.filename.clone(),
390                },
391            };
392            build_candle_provider(source, candle_cfg, &candle_cfg.device)
393        }
394        _ => Err(BootstrapError::Provider(format!(
395            "unsupported summary_model format: '{model_spec}'. \
396             Supported: ollama/<model>, claude[/<model>], openai[/<model>], \
397             compatible/<name>{candle}",
398            candle = if cfg!(feature = "candle") {
399                ", candle"
400            } else {
401                ""
402            }
403        ))),
404    }
405}
406
407fn summary_claude(
408    model_override: Option<&str>,
409    config: &Config,
410) -> Result<AnyProvider, BootstrapError> {
411    let api_key = config
412        .secrets
413        .claude_api_key
414        .as_ref()
415        .ok_or_else(|| {
416            BootstrapError::Provider(
417                "ZEPH_CLAUDE_API_KEY required for claude summary provider".into(),
418            )
419        })?
420        .expose()
421        .to_owned();
422    let cloud = config.llm.cloud.as_ref();
423    let model = model_override
424        .map(str::to_owned)
425        .or_else(|| cloud.map(|c| c.model.clone()))
426        .unwrap_or_else(|| "claude-haiku-4-5-20251001".to_owned());
427    // Cap summary max_tokens at 4096 — summaries are short.
428    let max_tokens = cloud.map_or(4096, |c| c.max_tokens.min(4096));
429    // Extended context intentionally skipped: summaries are short by design and the
430    // 1M window adds unnecessary cost.
431    let provider = ClaudeProvider::new(api_key, model, max_tokens)
432        .with_client(llm_client(config.timeouts.llm_request_timeout_secs));
433    Ok(AnyProvider::Claude(provider))
434}
435
436fn summary_openai(
437    model_override: Option<&str>,
438    config: &Config,
439) -> Result<AnyProvider, BootstrapError> {
440    let api_key = config
441        .secrets
442        .openai_api_key
443        .as_ref()
444        .ok_or_else(|| {
445            BootstrapError::Provider(
446                "ZEPH_OPENAI_API_KEY required for openai summary provider".into(),
447            )
448        })?
449        .expose()
450        .to_owned();
451    let openai_cfg = config.llm.openai.as_ref();
452    let base_url = openai_cfg.map_or_else(
453        || "https://api.openai.com/v1".to_owned(),
454        |c| c.base_url.clone(),
455    );
456    let model = model_override
457        .map(str::to_owned)
458        .or_else(|| openai_cfg.map(|c| c.model.clone()))
459        .unwrap_or_else(|| "gpt-4o-mini".to_owned());
460    let max_tokens = openai_cfg.map_or(4096, |c| c.max_tokens);
461    Ok(AnyProvider::OpenAi(
462        OpenAiProvider::new(api_key, base_url, model, max_tokens, None, None)
463            .with_client(llm_client(config.timeouts.llm_request_timeout_secs)),
464    ))
465}
466
467fn summary_gemini(
468    model_override: Option<&str>,
469    config: &Config,
470) -> Result<AnyProvider, BootstrapError> {
471    let api_key = config
472        .secrets
473        .gemini_api_key
474        .as_ref()
475        .ok_or_else(|| {
476            BootstrapError::Provider(
477                "ZEPH_GEMINI_API_KEY required for gemini summary provider".into(),
478            )
479        })?
480        .expose()
481        .to_owned();
482    let gemini_cfg = config.llm.gemini.as_ref();
483    let model = model_override
484        .map(str::to_owned)
485        .or_else(|| gemini_cfg.map(|c| c.model.clone()))
486        .unwrap_or_else(|| "gemini-2.0-flash".to_owned());
487    let max_tokens = gemini_cfg.map_or(4096, |c| c.max_tokens.min(4096));
488    let base_url = gemini_cfg.map_or_else(
489        || "https://generativelanguage.googleapis.com".to_owned(),
490        |c| c.base_url.clone(),
491    );
492    // thinking_level intentionally not wired here: summary provider uses a
493    // capped max_tokens budget and is not expected to run thinking models.
494    Ok(AnyProvider::Gemini(
495        GeminiProvider::new(api_key, model, max_tokens)
496            .with_base_url(base_url)
497            .with_client(llm_client(config.timeouts.llm_request_timeout_secs)),
498    ))
499}
500
501#[cfg(feature = "candle")]
502pub fn select_device(
503    preference: &str,
504) -> Result<zeph_llm::candle_provider::Device, BootstrapError> {
505    match preference {
506        "metal" => {
507            #[cfg(feature = "metal")]
508            return zeph_llm::candle_provider::Device::new_metal(0)
509                .map_err(|e| BootstrapError::Provider(e.to_string()));
510            #[cfg(not(feature = "metal"))]
511            return Err(BootstrapError::Provider(
512                "candle compiled without metal feature".into(),
513            ));
514        }
515        "cuda" => {
516            #[cfg(feature = "cuda")]
517            return zeph_llm::candle_provider::Device::new_cuda(0)
518                .map_err(|e| BootstrapError::Provider(e.to_string()));
519            #[cfg(not(feature = "cuda"))]
520            return Err(BootstrapError::Provider(
521                "candle compiled without cuda feature".into(),
522            ));
523        }
524        "auto" => {
525            #[cfg(feature = "metal")]
526            if let Ok(device) = zeph_llm::candle_provider::Device::new_metal(0) {
527                return Ok(device);
528            }
529            #[cfg(feature = "cuda")]
530            if let Ok(device) = zeph_llm::candle_provider::Device::new_cuda(0) {
531                return Ok(device);
532            }
533            Ok(zeph_llm::candle_provider::Device::Cpu)
534        }
535        _ => Ok(zeph_llm::candle_provider::Device::Cpu),
536    }
537}
538
539#[cfg(feature = "candle")]
540fn build_candle_provider(
541    source: zeph_llm::candle_provider::loader::ModelSource,
542    candle_cfg: &crate::config::CandleConfig,
543    device_pref: &str,
544) -> Result<AnyProvider, BootstrapError> {
545    let template =
546        zeph_llm::candle_provider::template::ChatTemplate::parse_str(&candle_cfg.chat_template);
547    let gen_config = zeph_llm::candle_provider::generate::GenerationConfig {
548        temperature: candle_cfg.generation.temperature,
549        top_p: candle_cfg.generation.top_p,
550        top_k: candle_cfg.generation.top_k,
551        max_tokens: candle_cfg.generation.capped_max_tokens(),
552        seed: candle_cfg.generation.seed,
553        repeat_penalty: candle_cfg.generation.repeat_penalty,
554        repeat_last_n: candle_cfg.generation.repeat_last_n,
555    };
556    let device = select_device(device_pref)?;
557    zeph_llm::candle_provider::CandleProvider::new(
558        &source,
559        template,
560        gen_config,
561        candle_cfg.embedding_repo.as_deref(),
562        device,
563    )
564    .map(AnyProvider::Candle)
565    .map_err(|e| BootstrapError::Provider(e.to_string()))
566}
567
568/// Create an `AnyProvider` from a structured provider config (`OrchestratorProviderConfig`).
569///
570/// Mirrors the per-entry creation logic in `build_orchestrator` but returns `AnyProvider`
571/// so the result can be used outside the orchestrator context (e.g. as a summary provider).
572pub fn create_provider_from_config(
573    pcfg: &crate::config::OrchestratorProviderConfig,
574    config: &Config,
575) -> Result<AnyProvider, BootstrapError> {
576    match pcfg.provider_type.as_str() {
577        "ollama" => {
578            let base_url = pcfg.base_url.as_deref().unwrap_or(&config.llm.base_url);
579            let model = pcfg.model.as_deref().unwrap_or(&config.llm.model);
580            let embed = pcfg
581                .embedding_model
582                .clone()
583                .unwrap_or_else(|| config.llm.embedding_model.clone());
584            Ok(AnyProvider::Ollama(OllamaProvider::new(
585                base_url,
586                model.to_owned(),
587                embed,
588            )))
589        }
590        "claude" => pcfg_claude(pcfg, config),
591        "openai" => pcfg_openai(pcfg, config),
592        "gemini" => pcfg_gemini(pcfg, config),
593        "compatible" => {
594            let name = pcfg.model.as_deref().ok_or_else(|| {
595                BootstrapError::Provider(
596                    "compatible provider requires 'model' set to the entry name".into(),
597                )
598            })?;
599            create_named_provider(name, config)
600        }
601        #[cfg(feature = "candle")]
602        "candle" => {
603            let candle_cfg = config.llm.candle.as_ref().ok_or_else(|| {
604                BootstrapError::Provider(
605                    "llm.candle config section required for candle provider".into(),
606                )
607            })?;
608            let source = match candle_cfg.source.as_str() {
609                "local" => zeph_llm::candle_provider::loader::ModelSource::Local {
610                    path: std::path::PathBuf::from(&candle_cfg.local_path),
611                },
612                _ => zeph_llm::candle_provider::loader::ModelSource::HuggingFace {
613                    repo_id: pcfg
614                        .model
615                        .clone()
616                        .unwrap_or_else(|| config.llm.model.clone()),
617                    filename: candle_cfg.filename.clone(),
618                },
619            };
620            let device_pref = pcfg.device.as_deref().unwrap_or(&candle_cfg.device);
621            build_candle_provider(source, candle_cfg, device_pref)
622        }
623        other => Err(BootstrapError::Provider(format!(
624            "unknown provider type: '{other}'"
625        ))),
626    }
627}
628
629fn pcfg_claude(
630    pcfg: &crate::config::OrchestratorProviderConfig,
631    config: &Config,
632) -> Result<AnyProvider, BootstrapError> {
633    let api_key = config
634        .secrets
635        .claude_api_key
636        .as_ref()
637        .ok_or_else(|| {
638            BootstrapError::Provider("ZEPH_CLAUDE_API_KEY required for claude provider".into())
639        })?
640        .expose()
641        .to_owned();
642    let cloud = config.llm.cloud.as_ref();
643    let model = pcfg
644        .model
645        .as_deref()
646        .or_else(|| cloud.map(|c| c.model.as_str()))
647        .unwrap_or("claude-haiku-4-5-20251001");
648    let max_tokens = cloud.map_or(4096, |c| c.max_tokens);
649    let enable_extended_context = cloud.is_some_and(|c| c.enable_extended_context);
650    let provider = ClaudeProvider::new(api_key, model.to_owned(), max_tokens)
651        .with_client(llm_client(config.timeouts.llm_request_timeout_secs))
652        .with_extended_context(enable_extended_context);
653    Ok(AnyProvider::Claude(provider))
654}
655
656fn pcfg_openai(
657    pcfg: &crate::config::OrchestratorProviderConfig,
658    config: &Config,
659) -> Result<AnyProvider, BootstrapError> {
660    let api_key = config
661        .secrets
662        .openai_api_key
663        .as_ref()
664        .ok_or_else(|| {
665            BootstrapError::Provider("ZEPH_OPENAI_API_KEY required for openai provider".into())
666        })?
667        .expose()
668        .to_owned();
669    let openai_cfg = config.llm.openai.as_ref();
670    let base_url = pcfg
671        .base_url
672        .clone()
673        .or_else(|| openai_cfg.map(|c| c.base_url.clone()))
674        .unwrap_or_else(|| "https://api.openai.com/v1".to_owned());
675    let model = pcfg
676        .model
677        .as_deref()
678        .or_else(|| openai_cfg.map(|c| c.model.as_str()))
679        .unwrap_or("gpt-4o-mini");
680    let max_tokens = openai_cfg.map_or(4096, |c| c.max_tokens);
681    let embed = pcfg
682        .embedding_model
683        .clone()
684        .or_else(|| openai_cfg.and_then(|c| c.embedding_model.clone()));
685    Ok(AnyProvider::OpenAi(
686        OpenAiProvider::new(api_key, base_url, model.to_owned(), max_tokens, embed, None)
687            .with_client(llm_client(config.timeouts.llm_request_timeout_secs)),
688    ))
689}
690
691fn pcfg_gemini(
692    pcfg: &crate::config::OrchestratorProviderConfig,
693    config: &Config,
694) -> Result<AnyProvider, BootstrapError> {
695    let api_key = config
696        .secrets
697        .gemini_api_key
698        .as_ref()
699        .ok_or_else(|| {
700            BootstrapError::Provider("ZEPH_GEMINI_API_KEY required for gemini provider".into())
701        })?
702        .expose()
703        .to_owned();
704    let gemini_cfg = config.llm.gemini.as_ref();
705    let model = pcfg
706        .model
707        .as_deref()
708        .or_else(|| gemini_cfg.map(|c| c.model.as_str()))
709        .unwrap_or("gemini-2.0-flash");
710    let max_tokens = gemini_cfg.map_or(4096, |c| c.max_tokens);
711    let base_url = gemini_cfg.map_or_else(
712        || "https://generativelanguage.googleapis.com".to_owned(),
713        |c| c.base_url.clone(),
714    );
715    let mut provider = GeminiProvider::new(api_key, model.to_owned(), max_tokens)
716        .with_base_url(base_url)
717        .with_client(llm_client(config.timeouts.llm_request_timeout_secs));
718    if let Some(em) = gemini_cfg.and_then(|c| c.embedding_model.as_deref()) {
719        provider = provider.with_embedding_model(em);
720    }
721    if let Some(level) = gemini_cfg.and_then(|c| c.thinking_level) {
722        provider = provider.with_thinking_level(level);
723    }
724    if let Some(budget) = gemini_cfg.and_then(|c| c.thinking_budget) {
725        provider = provider
726            .with_thinking_budget(budget)
727            .map_err(|e| BootstrapError::Provider(e.to_string()))?;
728    }
729    if let Some(include) = gemini_cfg.and_then(|c| c.include_thoughts) {
730        provider = provider.with_include_thoughts(include);
731    }
732    Ok(AnyProvider::Gemini(provider))
733}
734
735#[allow(clippy::too_many_lines)] // multi-provider match dispatch: one arm per backend, cannot be meaningfully split
736fn build_sub_provider(
737    pcfg: &crate::config::OrchestratorProviderConfig,
738    config: &Config,
739) -> Result<zeph_llm::orchestrator::SubProvider, BootstrapError> {
740    use zeph_llm::orchestrator::SubProvider;
741    match pcfg.provider_type.as_str() {
742        "ollama" => {
743            let base_url = pcfg.base_url.as_deref().unwrap_or(&config.llm.base_url);
744            let model = pcfg.model.as_deref().unwrap_or(&config.llm.model);
745            let embed = pcfg
746                .embedding_model
747                .clone()
748                .unwrap_or_else(|| config.llm.embedding_model.clone());
749            Ok(SubProvider::Ollama(OllamaProvider::new(
750                base_url,
751                model.to_owned(),
752                embed,
753            )))
754        }
755        "claude" => {
756            let cloud = config.llm.cloud.as_ref().ok_or_else(|| {
757                BootstrapError::Provider("llm.cloud config required for claude sub-provider".into())
758            })?;
759            let api_key = config
760                .secrets
761                .claude_api_key
762                .as_ref()
763                .ok_or_else(|| {
764                    BootstrapError::Provider(
765                        "ZEPH_CLAUDE_API_KEY required for claude sub-provider".into(),
766                    )
767                })?
768                .expose()
769                .to_owned();
770            let model = pcfg.model.as_deref().unwrap_or(&cloud.model);
771            let sub = ClaudeProvider::new(api_key, model.to_owned(), cloud.max_tokens)
772                .with_client(llm_client(config.timeouts.llm_request_timeout_secs))
773                .with_extended_context(cloud.enable_extended_context)
774                .with_thinking_opt(cloud.thinking.clone())
775                .map_err(|e| BootstrapError::Provider(format!("invalid thinking config: {e}")))?
776                .with_server_compaction(cloud.server_compaction);
777            Ok(SubProvider::Claude(sub))
778        }
779        "openai" => {
780            let openai_cfg = config.llm.openai.as_ref().ok_or_else(|| {
781                BootstrapError::Provider(
782                    "llm.openai config required for openai sub-provider".into(),
783                )
784            })?;
785            let api_key = config
786                .secrets
787                .openai_api_key
788                .as_ref()
789                .ok_or_else(|| {
790                    BootstrapError::Provider(
791                        "ZEPH_OPENAI_API_KEY required for openai sub-provider".into(),
792                    )
793                })?
794                .expose()
795                .to_owned();
796            let base_url = pcfg
797                .base_url
798                .clone()
799                .unwrap_or_else(|| openai_cfg.base_url.clone());
800            let model = pcfg.model.as_deref().unwrap_or(&openai_cfg.model);
801            let embed = pcfg
802                .embedding_model
803                .clone()
804                .or_else(|| openai_cfg.embedding_model.clone());
805            Ok(SubProvider::OpenAi(
806                OpenAiProvider::new(
807                    api_key,
808                    base_url,
809                    model.to_owned(),
810                    openai_cfg.max_tokens,
811                    embed,
812                    openai_cfg.reasoning_effort.clone(),
813                )
814                .with_client(llm_client(config.timeouts.llm_request_timeout_secs)),
815            ))
816        }
817        "gemini" => {
818            let api_key = config
819                .secrets
820                .gemini_api_key
821                .as_ref()
822                .ok_or_else(|| {
823                    BootstrapError::Provider(
824                        "ZEPH_GEMINI_API_KEY required for gemini sub-provider".into(),
825                    )
826                })?
827                .expose()
828                .to_owned();
829            let gemini_cfg = config.llm.gemini.as_ref();
830            let model = pcfg
831                .model
832                .as_deref()
833                .or_else(|| gemini_cfg.map(|c| c.model.as_str()))
834                .unwrap_or("gemini-2.0-flash");
835            let max_tokens = gemini_cfg.map_or(8192, |c| c.max_tokens);
836            let base_url = gemini_cfg.map_or_else(
837                || "https://generativelanguage.googleapis.com".to_owned(),
838                |c| c.base_url.clone(),
839            );
840            let mut provider = GeminiProvider::new(api_key, model.to_owned(), max_tokens)
841                .with_base_url(base_url)
842                .with_client(llm_client(config.timeouts.llm_request_timeout_secs));
843            if let Some(level) = gemini_cfg.and_then(|c| c.thinking_level) {
844                provider = provider.with_thinking_level(level);
845            }
846            if let Some(budget) = gemini_cfg.and_then(|c| c.thinking_budget) {
847                provider = provider.with_thinking_budget(budget).map_err(|e| {
848                    BootstrapError::Provider(format!("invalid thinking_budget: {e}"))
849                })?;
850            }
851            if let Some(include) = gemini_cfg.and_then(|c| c.include_thoughts) {
852                provider = provider.with_include_thoughts(include);
853            }
854            Ok(SubProvider::Gemini(provider))
855        }
856        #[cfg(feature = "candle")]
857        "candle" => {
858            let candle_cfg = config.llm.candle.as_ref().ok_or_else(|| {
859                BootstrapError::Provider(
860                    "llm.candle config required for candle sub-provider".into(),
861                )
862            })?;
863            let source = match candle_cfg.source.as_str() {
864                "local" => zeph_llm::candle_provider::loader::ModelSource::Local {
865                    path: std::path::PathBuf::from(&candle_cfg.local_path),
866                },
867                _ => zeph_llm::candle_provider::loader::ModelSource::HuggingFace {
868                    repo_id: pcfg
869                        .model
870                        .clone()
871                        .unwrap_or_else(|| config.llm.model.clone()),
872                    filename: candle_cfg.filename.clone(),
873                },
874            };
875            let device_pref = pcfg.device.as_deref().unwrap_or(&candle_cfg.device);
876            let any = build_candle_provider(source, candle_cfg, device_pref)?;
877            if let AnyProvider::Candle(p) = any {
878                Ok(SubProvider::Candle(p))
879            } else {
880                unreachable!("build_candle_provider always returns AnyProvider::Candle")
881            }
882        }
883        other => Err(BootstrapError::Provider(format!(
884            "unknown orchestrator sub-provider type: {other}"
885        ))),
886    }
887}
888
889pub fn build_orchestrator(
890    config: &Config,
891) -> Result<zeph_llm::orchestrator::ModelOrchestrator, BootstrapError> {
892    use std::collections::HashMap;
893    use zeph_llm::orchestrator::{ModelOrchestrator, TaskType};
894
895    let orch_cfg = config.llm.orchestrator.as_ref().ok_or_else(|| {
896        BootstrapError::Provider(
897            "llm.orchestrator config section required for orchestrator provider".into(),
898        )
899    })?;
900
901    let mut providers = HashMap::new();
902    for (name, pcfg) in &orch_cfg.providers {
903        let provider = build_sub_provider(pcfg, config)?;
904        providers.insert(name.clone(), provider);
905    }
906
907    let mut routes = HashMap::new();
908    for (task_str, chain) in &orch_cfg.routes {
909        let task = TaskType::parse_str(task_str);
910        routes.insert(task, chain.clone());
911    }
912
913    ModelOrchestrator::new(
914        routes,
915        providers,
916        orch_cfg.default.clone(),
917        orch_cfg.embed.clone(),
918    )
919    .map_err(|e| BootstrapError::Provider(e.to_string()))
920}
921
922/// Returns `true` if `base_url` points to a local or private-network endpoint
923/// where an API key is typically unnecessary.
924fn is_local_endpoint(base_url: &str) -> bool {
925    // Strip scheme (http:// or https://) then extract host before port/path.
926    let after_scheme = base_url
927        .strip_prefix("https://")
928        .or_else(|| base_url.strip_prefix("http://"))
929        .unwrap_or(base_url);
930    let host = after_scheme
931        .split('/')
932        .next()
933        .and_then(|h| h.split(':').next())
934        .unwrap_or(after_scheme);
935
936    if host.eq_ignore_ascii_case("localhost")
937        || host == "127.0.0.1"
938        || host == "::1"
939        || host == "[::1]"
940    {
941        return true;
942    }
943    if let Ok(ip) = host.parse::<std::net::IpAddr>() {
944        return match ip {
945            std::net::IpAddr::V4(v4) => v4.is_loopback() || v4.is_private() || v4.is_link_local(),
946            std::net::IpAddr::V6(v6) => v6.is_loopback(),
947        };
948    }
949    // Hostname suffixes, not file extensions — suppress clippy false positive.
950    #[allow(clippy::case_sensitive_file_extension_comparisons)]
951    {
952        host.ends_with(".local") || host.ends_with(".internal")
953    }
954}
955
956#[cfg(test)]
957mod tests {
958    use super::*;
959
960    #[test]
961    fn local_endpoints_detected() {
962        assert!(is_local_endpoint("http://localhost:11434/v1"));
963        assert!(is_local_endpoint("http://127.0.0.1:8080"));
964        assert!(is_local_endpoint("https://localhost/api"));
965        assert!(is_local_endpoint("http://192.168.1.100:11434/v1"));
966        assert!(is_local_endpoint("http://10.0.0.5:8000"));
967        assert!(is_local_endpoint("http://172.16.0.1:9090"));
968        assert!(is_local_endpoint("http://myhost.local:11434"));
969        assert!(is_local_endpoint("http://service.internal:8080"));
970    }
971
972    #[test]
973    fn remote_endpoints_not_local() {
974        assert!(!is_local_endpoint("https://api.openai.com/v1"));
975        assert!(!is_local_endpoint("https://api.anthropic.com"));
976        assert!(!is_local_endpoint("http://8.8.8.8:11434"));
977        assert!(!is_local_endpoint("https://my-server.example.com/v1"));
978    }
979}