1use zeph_llm::any::AnyProvider;
12use zeph_llm::claude::ClaudeProvider;
13use zeph_llm::compatible::CompatibleProvider;
14use zeph_llm::gemini::GeminiProvider;
15#[cfg(feature = "gonka")]
16use zeph_llm::gonka::endpoints::{EndpointPool, GonkaEndpoint};
17#[cfg(feature = "gonka")]
18use zeph_llm::gonka::{GonkaProvider, RequestSigner};
19use zeph_llm::http::llm_client;
20use zeph_llm::ollama::OllamaProvider;
21use zeph_llm::openai::OpenAiProvider;
22#[cfg(feature = "gonka")]
23use zeroize::Zeroizing;
24
25use crate::agent::state::ProviderConfigSnapshot;
26use crate::config::{Config, ProviderEntry, ProviderKind};
27
28#[derive(Debug, thiserror::Error)]
35pub enum BootstrapError {
36 #[error("config error: {0}")]
38 Config(#[from] crate::config::ConfigError),
39 #[error("provider error: {0}")]
41 Provider(String),
42 #[error("memory error: {0}")]
44 Memory(String),
45 #[error("vault init error: {0}")]
47 VaultInit(crate::vault::AgeVaultError),
48 #[error("I/O error: {0}")]
50 Io(#[from] std::io::Error),
51}
52
53pub fn build_provider_for_switch(
64 entry: &ProviderEntry,
65 snapshot: &ProviderConfigSnapshot,
66) -> Result<AnyProvider, BootstrapError> {
67 use zeph_common::secret::Secret;
68 let mut config = Config::default();
72 config.secrets.claude_api_key = snapshot.claude_api_key.as_deref().map(Secret::new);
73 config.secrets.openai_api_key = snapshot.openai_api_key.as_deref().map(Secret::new);
74 config.secrets.gemini_api_key = snapshot.gemini_api_key.as_deref().map(Secret::new);
75 config.secrets.compatible_api_keys = snapshot
76 .compatible_api_keys
77 .iter()
78 .map(|(k, v)| (k.clone(), Secret::new(v.as_str())))
79 .collect();
80 config.secrets.gonka_private_key = snapshot
81 .gonka_private_key
82 .as_ref()
83 .map(|z| Secret::new(z.as_str()));
84 config.secrets.gonka_address = snapshot.gonka_address.as_deref().map(Secret::new);
85 config.timeouts.llm_request_timeout_secs = snapshot.llm_request_timeout_secs;
86 config
87 .llm
88 .embedding_model
89 .clone_from(&snapshot.embedding_model);
90 build_provider_from_entry(entry, &config)
91}
92
93pub fn build_provider_from_entry(
103 entry: &ProviderEntry,
104 config: &Config,
105) -> Result<AnyProvider, BootstrapError> {
106 match entry.provider_type {
107 ProviderKind::Ollama => Ok(build_ollama_provider(entry, config)),
108 ProviderKind::Claude => build_claude_provider(entry, config),
109 ProviderKind::OpenAi => build_openai_provider(entry, config),
110 ProviderKind::Gemini => build_gemini_provider(entry, config),
111 ProviderKind::Compatible => build_compatible_provider(entry, config),
112 #[cfg(feature = "candle")]
113 ProviderKind::Candle => build_candle_provider(entry, config),
114 #[cfg(not(feature = "candle"))]
115 ProviderKind::Candle => Err(BootstrapError::Provider(
116 "candle feature is not enabled".into(),
117 )),
118 #[cfg(feature = "gonka")]
119 ProviderKind::Gonka => build_gonka_provider(entry, config),
120 #[cfg(not(feature = "gonka"))]
121 ProviderKind::Gonka => Err(BootstrapError::Provider(
122 "gonka feature is not enabled; rebuild with --features gonka".into(),
123 )),
124 }
125}
126
127fn build_ollama_provider(entry: &ProviderEntry, config: &Config) -> AnyProvider {
128 let base_url = entry
129 .base_url
130 .as_deref()
131 .unwrap_or("http://localhost:11434");
132 let model = entry.model.as_deref().unwrap_or("qwen3:8b").to_owned();
133 let embed = entry
134 .embedding_model
135 .clone()
136 .unwrap_or_else(|| config.llm.embedding_model.clone());
137 let mut provider = OllamaProvider::new(base_url, model, embed);
138 if let Some(ref vm) = entry.vision_model {
139 provider = provider.with_vision_model(vm.clone());
140 }
141 if config.mcp.forward_output_schema {
142 tracing::debug!(
143 "mcp.forward_output_schema is enabled but Ollama does not support \
144 output schema forwarding; setting ignored for this provider"
145 );
146 }
147 AnyProvider::Ollama(provider)
148}
149
150fn build_claude_provider(
151 entry: &ProviderEntry,
152 config: &Config,
153) -> Result<AnyProvider, BootstrapError> {
154 let api_key = config
155 .secrets
156 .claude_api_key
157 .as_ref()
158 .ok_or_else(|| BootstrapError::Provider("ZEPH_CLAUDE_API_KEY not found in vault".into()))?
159 .expose()
160 .to_owned();
161 let model = entry
162 .model
163 .clone()
164 .unwrap_or_else(|| "claude-haiku-4-5-20251001".to_owned());
165 let max_tokens = entry.max_tokens.unwrap_or(4096);
166 let provider = ClaudeProvider::new(api_key, model, max_tokens)
167 .with_client(llm_client(config.timeouts.llm_request_timeout_secs))
168 .with_extended_context(entry.enable_extended_context)
169 .with_thinking_opt(entry.thinking.clone())
170 .map_err(|e| BootstrapError::Provider(format!("invalid thinking config: {e}")))?
171 .with_server_compaction(entry.server_compaction)
172 .with_prompt_cache_ttl(entry.prompt_cache_ttl)
173 .with_output_schema_forwarding(
174 config.mcp.forward_output_schema,
175 config.mcp.output_schema_hint_bytes,
176 config.mcp.max_description_bytes,
177 );
178 tracing::info!(
179 forward = config.mcp.forward_output_schema,
180 "mcp.output_schema.forwarding_configured"
181 );
182 Ok(AnyProvider::Claude(provider))
183}
184
185fn build_openai_provider(
186 entry: &ProviderEntry,
187 config: &Config,
188) -> Result<AnyProvider, BootstrapError> {
189 let api_key = config
190 .secrets
191 .openai_api_key
192 .as_ref()
193 .ok_or_else(|| BootstrapError::Provider("ZEPH_OPENAI_API_KEY not found in vault".into()))?
194 .expose()
195 .to_owned();
196 let base_url = entry
197 .base_url
198 .clone()
199 .unwrap_or_else(|| "https://api.openai.com/v1".to_owned());
200 let model = entry
201 .model
202 .clone()
203 .unwrap_or_else(|| "gpt-4o-mini".to_owned());
204 let max_tokens = entry.max_tokens.unwrap_or(4096);
205 Ok(AnyProvider::OpenAi(
206 OpenAiProvider::new(
207 api_key,
208 base_url,
209 model,
210 max_tokens,
211 entry.embedding_model.clone(),
212 entry.reasoning_effort.clone(),
213 )
214 .with_client(llm_client(config.timeouts.llm_request_timeout_secs))
215 .with_output_schema_forwarding(
216 config.mcp.forward_output_schema,
217 config.mcp.output_schema_hint_bytes,
218 config.mcp.max_description_bytes,
219 ),
220 ))
221}
222
223fn build_gemini_provider(
224 entry: &ProviderEntry,
225 config: &Config,
226) -> Result<AnyProvider, BootstrapError> {
227 let api_key = config
228 .secrets
229 .gemini_api_key
230 .as_ref()
231 .ok_or_else(|| BootstrapError::Provider("ZEPH_GEMINI_API_KEY not found in vault".into()))?
232 .expose()
233 .to_owned();
234 let model = entry
235 .model
236 .clone()
237 .unwrap_or_else(|| "gemini-2.0-flash".to_owned());
238 let max_tokens = entry.max_tokens.unwrap_or(8192);
239 let base_url = entry
240 .base_url
241 .clone()
242 .unwrap_or_else(|| "https://generativelanguage.googleapis.com".to_owned());
243 let mut provider = GeminiProvider::new(api_key, model, max_tokens)
244 .with_base_url(base_url)
245 .with_client(llm_client(config.timeouts.llm_request_timeout_secs));
246 if let Some(ref em) = entry.embedding_model {
247 provider = provider.with_embedding_model(em.clone());
248 }
249 if let Some(level) = entry.thinking_level {
250 provider = provider.with_thinking_level(level);
251 }
252 if let Some(budget) = entry.thinking_budget {
253 provider = provider
254 .with_thinking_budget(budget)
255 .map_err(|e| BootstrapError::Provider(e.to_string()))?;
256 }
257 if let Some(include) = entry.include_thoughts {
258 provider = provider.with_include_thoughts(include);
259 }
260 if config.mcp.forward_output_schema {
261 tracing::debug!(
262 "mcp.forward_output_schema is enabled but Gemini does not support \
263 output schema forwarding; setting ignored for this provider"
264 );
265 }
266 Ok(AnyProvider::Gemini(provider))
267}
268
269fn build_compatible_provider(
270 entry: &ProviderEntry,
271 config: &Config,
272) -> Result<AnyProvider, BootstrapError> {
273 let name = entry.name.as_deref().ok_or_else(|| {
274 BootstrapError::Provider(
275 "compatible provider requires 'name' field in [[llm.providers]]".into(),
276 )
277 })?;
278 let base_url = entry.base_url.clone().ok_or_else(|| {
279 BootstrapError::Provider(format!("compatible provider '{name}' requires 'base_url'"))
280 })?;
281 let model = entry.model.clone().unwrap_or_default();
282 let api_key = entry.api_key.clone().unwrap_or_else(|| {
283 config
284 .secrets
285 .compatible_api_keys
286 .get(name)
287 .map(|s| s.expose().to_owned())
288 .unwrap_or_default()
289 });
290 let max_tokens = entry.max_tokens.unwrap_or(4096);
291 let provider = CompatibleProvider::new(
292 name.to_owned(),
293 api_key,
294 base_url,
295 model,
296 max_tokens,
297 entry.embedding_model.clone(),
298 )
299 .with_output_schema_forwarding(
300 config.mcp.forward_output_schema,
301 config.mcp.output_schema_hint_bytes,
302 config.mcp.max_description_bytes,
303 );
304 tracing::info!(
305 forward = config.mcp.forward_output_schema,
306 provider = name,
307 "mcp.output_schema.forwarding_configured"
308 );
309 Ok(AnyProvider::Compatible(provider))
310}
311
312#[cfg(feature = "gonka")]
313fn build_gonka_provider(
314 entry: &ProviderEntry,
315 config: &Config,
316) -> Result<AnyProvider, BootstrapError> {
317 let _span = tracing::info_span!("core.provider_factory.build_gonka").entered();
318
319 let private_key_hex: Zeroizing<String> = Zeroizing::new(
320 config
321 .secrets
322 .gonka_private_key
323 .as_ref()
324 .ok_or_else(|| {
325 BootstrapError::Provider(
326 "ZEPH_GONKA_PRIVATE_KEY not found in vault; set it with: zeph vault set ZEPH_GONKA_PRIVATE_KEY <hex>".into(),
327 )
328 })?
329 .expose()
330 .to_owned(),
331 );
332
333 let chain_prefix = entry.effective_gonka_chain_prefix().to_owned();
334 let signer = RequestSigner::from_hex(&private_key_hex, &chain_prefix)
335 .map_err(|e| BootstrapError::Provider(format!("invalid Gonka private key: {e}")))?;
336
337 if let Some(ref configured_address) = config.secrets.gonka_address {
338 let configured = configured_address.expose().to_lowercase();
339 let derived = signer.address().to_lowercase();
340 if configured != derived {
341 return Err(BootstrapError::Provider(format!(
342 "ZEPH_GONKA_ADDRESS does not match address derived from private key \
343 (configured: {configured}, derived: {derived})"
344 )));
345 }
346 } else {
347 tracing::info!(
348 address = signer.address(),
349 "Gonka: using address derived from private key (ZEPH_GONKA_ADDRESS not set)"
350 );
351 }
352
353 if entry.gonka_nodes.is_empty() {
354 return Err(BootstrapError::Provider(
355 "Gonka provider entry must have at least one node in gonka_nodes".into(),
356 ));
357 }
358
359 let endpoints: Vec<GonkaEndpoint> = entry
360 .gonka_nodes
361 .iter()
362 .map(|n| GonkaEndpoint {
363 base_url: n.url.clone(),
364 address: n.address.clone(),
365 })
366 .collect();
367
368 let pool = EndpointPool::new(endpoints).map_err(|e| {
369 BootstrapError::Provider(format!("failed to build Gonka endpoint pool: {e}"))
370 })?;
371
372 let model = entry.model.clone().unwrap_or_else(|| "gpt-4o".to_owned());
373 let max_tokens = entry.max_tokens.unwrap_or(4096);
374 let timeout = std::time::Duration::from_secs(config.timeouts.llm_request_timeout_secs);
375
376 let provider = GonkaProvider::new(
377 std::sync::Arc::new(signer),
378 std::sync::Arc::new(pool),
379 model,
380 max_tokens,
381 entry.embedding_model.clone(),
382 timeout,
383 );
384
385 Ok(AnyProvider::Gonka(provider))
386}
387
388#[cfg(feature = "candle")]
389fn build_candle_provider(
390 entry: &ProviderEntry,
391 config: &Config,
392) -> Result<AnyProvider, BootstrapError> {
393 let candle = entry.candle.as_ref().ok_or_else(|| {
394 BootstrapError::Provider(
395 "candle provider requires 'candle' section in [[llm.providers]]".into(),
396 )
397 })?;
398 let source = match candle.source.as_str() {
399 "local" => zeph_llm::candle_provider::loader::ModelSource::Local {
400 path: std::path::PathBuf::from(&candle.local_path),
401 },
402 _ => zeph_llm::candle_provider::loader::ModelSource::HuggingFace {
403 repo_id: entry
404 .model
405 .clone()
406 .unwrap_or_else(|| config.llm.effective_model().to_owned()),
407 filename: candle.filename.clone(),
408 },
409 };
410 let template =
411 zeph_llm::candle_provider::template::ChatTemplate::parse_str(&candle.chat_template);
412 let gen_config = zeph_llm::candle_provider::generate::GenerationConfig {
413 temperature: candle.generation.temperature,
414 top_p: candle.generation.top_p,
415 top_k: candle.generation.top_k,
416 max_tokens: candle.generation.capped_max_tokens(),
417 seed: candle.generation.seed,
418 repeat_penalty: candle.generation.repeat_penalty,
419 repeat_last_n: candle.generation.repeat_last_n,
420 };
421 let device = select_device(&candle.device)?;
422 let inference_timeout = std::time::Duration::from_secs(candle.inference_timeout_secs.max(1));
425 zeph_llm::candle_provider::CandleProvider::new_with_timeout(
426 &source,
427 template,
428 gen_config,
429 candle.embedding_repo.as_deref(),
430 candle.hf_token.as_deref(),
431 device,
432 inference_timeout,
433 )
434 .map(AnyProvider::Candle)
435 .map_err(|e| BootstrapError::Provider(e.to_string()))
436}
437
438#[cfg(feature = "candle")]
449pub fn select_device(
450 preference: &str,
451) -> Result<zeph_llm::candle_provider::Device, BootstrapError> {
452 match preference {
453 "metal" => {
454 #[cfg(feature = "metal")]
455 return zeph_llm::candle_provider::Device::new_metal(0)
456 .map_err(|e| BootstrapError::Provider(e.to_string()));
457 #[cfg(not(feature = "metal"))]
458 return Err(BootstrapError::Provider(
459 "candle compiled without metal feature".into(),
460 ));
461 }
462 "cuda" => {
463 #[cfg(feature = "cuda")]
464 return zeph_llm::candle_provider::Device::new_cuda(0)
465 .map_err(|e| BootstrapError::Provider(e.to_string()));
466 #[cfg(not(feature = "cuda"))]
467 return Err(BootstrapError::Provider(
468 "candle compiled without cuda feature".into(),
469 ));
470 }
471 "auto" => {
472 #[cfg(feature = "metal")]
473 if let Ok(device) = zeph_llm::candle_provider::Device::new_metal(0) {
474 return Ok(device);
475 }
476 #[cfg(feature = "cuda")]
477 if let Ok(device) = zeph_llm::candle_provider::Device::new_cuda(0) {
478 return Ok(device);
479 }
480 Ok(zeph_llm::candle_provider::Device::Cpu)
481 }
482 _ => Ok(zeph_llm::candle_provider::Device::Cpu),
483 }
484}
485
486#[must_use]
493pub fn effective_embedding_model(config: &Config) -> String {
494 if let Some(m) = config
496 .llm
497 .providers
498 .iter()
499 .find(|e| e.embed)
500 .and_then(|e| e.embedding_model.as_ref())
501 {
502 return m.clone();
503 }
504 if let Some(m) = config
506 .llm
507 .providers
508 .first()
509 .and_then(|e| e.embedding_model.as_ref())
510 {
511 return m.clone();
512 }
513 config.llm.embedding_model.clone()
514}
515
516#[must_use]
526pub fn stable_skill_embedding_model(config: &Config) -> String {
527 let embed_entry = config.llm.providers.iter().find(|e| e.embed).or_else(|| {
529 config
530 .llm
531 .providers
532 .iter()
533 .find(|e| e.embedding_model.is_some())
534 });
535
536 if let Some(entry) = embed_entry {
537 if let Some(em) = entry.embedding_model.as_ref().filter(|s| !s.is_empty()) {
539 return em.clone();
540 }
541 if let Some(m) = entry.model.as_ref().filter(|s| !s.is_empty()) {
542 return m.clone();
543 }
544 }
545
546 effective_embedding_model(config)
548}
549
550#[cfg(test)]
551mod tests {
552 #[cfg(feature = "candle")]
553 use super::select_device;
554
555 #[cfg(feature = "candle")]
556 #[test]
557 fn select_device_cpu_default() {
558 let device = select_device("cpu").unwrap();
559 assert!(matches!(device, zeph_llm::candle_provider::Device::Cpu));
560 }
561
562 #[cfg(feature = "candle")]
563 #[test]
564 fn select_device_unknown_defaults_to_cpu() {
565 let device = select_device("unknown").unwrap();
566 assert!(matches!(device, zeph_llm::candle_provider::Device::Cpu));
567 }
568
569 #[cfg(all(feature = "candle", not(feature = "metal")))]
570 #[test]
571 fn select_device_metal_without_feature_errors() {
572 let result = select_device("metal");
573 assert!(result.is_err());
574 assert!(result.unwrap_err().to_string().contains("metal feature"));
575 }
576
577 #[cfg(all(feature = "candle", not(feature = "cuda")))]
578 #[test]
579 fn select_device_cuda_without_feature_errors() {
580 let result = select_device("cuda");
581 assert!(result.is_err());
582 assert!(result.unwrap_err().to_string().contains("cuda feature"));
583 }
584
585 #[cfg(feature = "candle")]
586 #[test]
587 fn select_device_auto_fallback() {
588 let device = select_device("auto").unwrap();
589 assert!(matches!(
590 device,
591 zeph_llm::candle_provider::Device::Cpu
592 | zeph_llm::candle_provider::Device::Cuda(_)
593 | zeph_llm::candle_provider::Device::Metal(_)
594 ));
595 }
596
597 #[cfg(feature = "gonka")]
598 use super::build_provider_from_entry;
599 use super::{effective_embedding_model, stable_skill_embedding_model};
600 use crate::config::{Config, ProviderKind};
601 use zeph_config::providers::ProviderEntry;
602
603 #[cfg(feature = "gonka")]
604 mod gonka_tests {
605 use super::*;
606 use zeph_common::secret::Secret;
607 use zeph_config::GonkaNode;
608 use zeph_llm::LlmProvider;
609
610 fn gonka_entry_with_nodes(nodes: Vec<GonkaNode>) -> ProviderEntry {
611 ProviderEntry {
612 provider_type: ProviderKind::Gonka,
613 name: Some("gonka".into()),
614 model: Some("gpt-4o".into()),
615 gonka_nodes: nodes,
616 ..ProviderEntry::default()
617 }
618 }
619
620 fn valid_nodes() -> Vec<GonkaNode> {
621 vec![GonkaNode {
622 url: "https://node1.gonka.ai".into(),
623 address: "gonka1w508d6qejxtdg4y5r3zarvary0c5xw7k2gsyg6".into(),
624 name: Some("node1".into()),
625 }]
626 }
627
628 const VALID_PRIV_KEY: &str =
629 "0000000000000000000000000000000000000000000000000000000000000001";
630
631 #[test]
632 fn build_gonka_provider_missing_key_returns_error() {
633 let entry = gonka_entry_with_nodes(valid_nodes());
634 let config = Config::default();
635 let result = build_provider_from_entry(&entry, &config);
636 assert!(result.is_err());
637 let msg = result.unwrap_err().to_string();
638 assert!(
639 msg.contains("ZEPH_GONKA_PRIVATE_KEY"),
640 "error must mention missing key: {msg}"
641 );
642 }
643
644 #[test]
645 fn build_gonka_provider_empty_nodes_returns_error() {
646 let entry = gonka_entry_with_nodes(vec![]);
647 let mut config = Config::default();
648 config.secrets.gonka_private_key = Some(Secret::new(VALID_PRIV_KEY));
649 let result = build_provider_from_entry(&entry, &config);
650 assert!(result.is_err());
651 let msg = result.unwrap_err().to_string();
652 assert!(
653 msg.contains("gonka_nodes") || msg.contains("node"),
654 "error must mention empty nodes: {msg}"
655 );
656 }
657
658 #[test]
659 fn build_gonka_provider_address_mismatch_returns_error() {
660 let entry = gonka_entry_with_nodes(valid_nodes());
661 let mut config = Config::default();
662 config.secrets.gonka_private_key = Some(Secret::new(VALID_PRIV_KEY));
663 config.secrets.gonka_address =
664 Some(Secret::new("gonka1wrongaddress000000000000000000000000000"));
665 let result = build_provider_from_entry(&entry, &config);
666 assert!(result.is_err());
667 let msg = result.unwrap_err().to_string();
668 assert!(
669 msg.contains("does not match"),
670 "error must mention address mismatch: {msg}"
671 );
672 }
673
674 #[test]
675 fn build_gonka_provider_happy_path() {
676 let entry = gonka_entry_with_nodes(valid_nodes());
677 let mut config = Config::default();
678 config.secrets.gonka_private_key = Some(Secret::new(VALID_PRIV_KEY));
679 let result = build_provider_from_entry(&entry, &config);
680 assert!(result.is_ok(), "expected Ok, got: {:?}", result.err());
681 let provider = result.unwrap();
682 assert_eq!(provider.name(), "gonka");
683 }
684 }
685
686 fn make_provider_entry(
687 embed: bool,
688 model: Option<&str>,
689 embedding_model: Option<&str>,
690 ) -> ProviderEntry {
691 ProviderEntry {
692 provider_type: ProviderKind::Ollama,
693 embed,
694 model: model.map(str::to_owned),
695 embedding_model: embedding_model.map(str::to_owned),
696 ..ProviderEntry::default()
697 }
698 }
699
700 #[test]
701 fn stable_skill_embedding_model_prefers_embedding_model_field() {
702 let mut config = Config::default();
703 config.llm.providers = vec![make_provider_entry(
704 true,
705 Some("chat-model"),
706 Some("embed-v2"),
707 )];
708 assert_eq!(stable_skill_embedding_model(&config), "embed-v2");
709 }
710
711 #[test]
712 fn stable_skill_embedding_model_falls_back_to_model_field() {
713 let mut config = Config::default();
714 config.llm.providers = vec![make_provider_entry(
715 true,
716 Some("nomic-embed-text-v2-moe:latest"),
717 None,
718 )];
719 assert_eq!(
720 stable_skill_embedding_model(&config),
721 "nomic-embed-text-v2-moe:latest"
722 );
723 }
724
725 #[test]
726 fn stable_skill_embedding_model_finds_embed_flag_entry() {
727 let mut config = Config::default();
728 config.llm.providers = vec![
729 make_provider_entry(false, Some("chat-model"), None),
730 make_provider_entry(true, Some("embed-model"), Some("text-embed-3")),
731 ];
732 assert_eq!(stable_skill_embedding_model(&config), "text-embed-3");
733 }
734
735 #[test]
736 fn stable_skill_embedding_model_falls_back_to_effective_when_no_embed_entry() {
737 let mut config = Config::default();
738 config.llm.embedding_model = "global-embed-model".to_owned();
739 config.llm.providers = vec![make_provider_entry(false, Some("chat"), None)];
741 assert_eq!(
742 stable_skill_embedding_model(&config),
743 effective_embedding_model(&config)
744 );
745 }
746}