Skip to main content

zeph_core/bootstrap/
mod.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Application bootstrap: config resolution, provider/memory/tool construction.
5
6pub mod config;
7pub mod health;
8pub mod mcp;
9pub mod oauth;
10pub mod provider;
11pub mod skills;
12
13pub use config::{parse_vault_args, resolve_config_path};
14pub use health::{health_check, warmup_provider};
15pub use mcp::{create_mcp_manager, create_mcp_manager_with_vault, create_mcp_registry};
16pub use oauth::VaultCredentialStore;
17#[cfg(feature = "candle")]
18pub use provider::select_device;
19pub use provider::{
20    BootstrapError, build_provider_for_switch, build_provider_from_entry, create_named_provider,
21    create_provider, create_summary_provider,
22};
23pub use skills::{
24    create_embedding_provider, create_skill_matcher, effective_embedding_model, managed_skills_dir,
25};
26
27use std::path::{Path, PathBuf};
28use std::sync::Arc;
29
30use tokio::sync::{RwLock, mpsc, watch};
31use zeph_llm::any::AnyProvider;
32use zeph_llm::provider::LlmProvider;
33use zeph_memory::GraphStore;
34use zeph_memory::QdrantOps;
35use zeph_memory::semantic::SemanticMemory;
36use zeph_skills::loader::SkillMeta;
37use zeph_skills::matcher::SkillMatcherBackend;
38use zeph_skills::registry::SkillRegistry;
39use zeph_skills::watcher::{SkillEvent, SkillWatcher};
40
41use crate::config::{Config, SecretResolver};
42use crate::config_watcher::{ConfigEvent, ConfigWatcher};
43use crate::vault::AgeVaultProvider;
44use crate::vault::{EnvVaultProvider, VaultProvider};
45
46pub struct AppBuilder {
47    config: Config,
48    config_path: PathBuf,
49    vault: Box<dyn VaultProvider>,
50    /// Present when the vault backend is `age`. Used to pass to `create_mcp_manager_with_vault`
51    /// for OAuth credential persistence across sessions.
52    age_vault: Option<Arc<RwLock<AgeVaultProvider>>>,
53    qdrant_ops: Option<QdrantOps>,
54}
55
56pub struct VaultArgs {
57    pub backend: String,
58    pub key_path: Option<String>,
59    pub vault_path: Option<String>,
60}
61
62pub struct WatcherBundle {
63    pub skill_watcher: Option<SkillWatcher>,
64    pub skill_reload_rx: mpsc::Receiver<SkillEvent>,
65    pub config_watcher: Option<ConfigWatcher>,
66    pub config_reload_rx: mpsc::Receiver<ConfigEvent>,
67}
68
69impl AppBuilder {
70    /// Resolve config, load it, create vault, resolve secrets.
71    ///
72    /// CLI-provided overrides take priority over environment variables and config.
73    ///
74    /// # Errors
75    ///
76    /// Returns [`BootstrapError`] if config loading, validation, vault construction,
77    /// secret resolution, or Qdrant URL parsing fails.
78    pub async fn new(
79        config_override: Option<&Path>,
80        vault_override: Option<&str>,
81        vault_key_override: Option<&Path>,
82        vault_path_override: Option<&Path>,
83    ) -> Result<Self, BootstrapError> {
84        let config_path = resolve_config_path(config_override);
85        let mut config = Config::load(&config_path)?;
86        config.validate()?;
87        config.llm.check_legacy_format()?;
88
89        let vault_args = parse_vault_args(
90            &config,
91            vault_override,
92            vault_key_override,
93            vault_path_override,
94        );
95        let (vault, age_vault): (
96            Box<dyn VaultProvider>,
97            Option<Arc<RwLock<AgeVaultProvider>>>,
98        ) = match vault_args.backend.as_str() {
99            "env" => (Box::new(EnvVaultProvider), None),
100            "age" => {
101                let key = vault_args.key_path.ok_or_else(|| {
102                    BootstrapError::Provider("--vault-key required for age backend".into())
103                })?;
104                let path = vault_args.vault_path.ok_or_else(|| {
105                    BootstrapError::Provider("--vault-path required for age backend".into())
106                })?;
107                let provider = AgeVaultProvider::new(Path::new(&key), Path::new(&path))
108                    .map_err(BootstrapError::VaultInit)?;
109                let arc = Arc::new(RwLock::new(provider));
110                let boxed: Box<dyn VaultProvider> =
111                    Box::new(crate::vault::ArcAgeVaultProvider(Arc::clone(&arc)));
112                (boxed, Some(arc))
113            }
114            other => {
115                return Err(BootstrapError::Provider(format!(
116                    "unknown vault backend: {other}"
117                )));
118            }
119        };
120
121        config.resolve_secrets(vault.as_ref()).await?;
122
123        let qdrant_ops = match config.memory.vector_backend {
124            crate::config::VectorBackend::Qdrant => {
125                let ops = QdrantOps::new(&config.memory.qdrant_url).map_err(|e| {
126                    BootstrapError::Provider(format!(
127                        "invalid qdrant_url '{}': {e}",
128                        config.memory.qdrant_url
129                    ))
130                })?;
131                Some(ops)
132            }
133            crate::config::VectorBackend::Sqlite => None,
134        };
135
136        Ok(Self {
137            config,
138            config_path,
139            vault,
140            age_vault,
141            qdrant_ops,
142        })
143    }
144
145    pub fn qdrant_ops(&self) -> Option<&QdrantOps> {
146        self.qdrant_ops.as_ref()
147    }
148
149    pub fn config(&self) -> &Config {
150        &self.config
151    }
152
153    pub fn config_mut(&mut self) -> &mut Config {
154        &mut self.config
155    }
156
157    pub fn config_path(&self) -> &Path {
158        &self.config_path
159    }
160
161    /// Returns the vault provider used for secret resolution.
162    ///
163    /// Retained as part of the public `Bootstrap` API for external callers
164    /// that may inspect or override vault behavior at runtime.
165    pub fn vault(&self) -> &dyn VaultProvider {
166        self.vault.as_ref()
167    }
168
169    /// Returns the shared age vault, if the backend is `age`.
170    ///
171    /// Pass this to `create_mcp_manager_with_vault` so OAuth tokens are persisted
172    /// across sessions.
173    pub fn age_vault_arc(&self) -> Option<&Arc<RwLock<AgeVaultProvider>>> {
174        self.age_vault.as_ref()
175    }
176
177    /// # Errors
178    ///
179    /// Returns [`BootstrapError`] if provider creation or health check fails.
180    pub async fn build_provider(
181        &self,
182    ) -> Result<
183        (
184            AnyProvider,
185            tokio::sync::mpsc::UnboundedSender<String>,
186            tokio::sync::mpsc::UnboundedReceiver<String>,
187        ),
188        BootstrapError,
189    > {
190        let mut provider = create_provider(&self.config)?;
191
192        let (status_tx, status_rx) = tokio::sync::mpsc::unbounded_channel::<String>();
193        let status_tx_clone = status_tx.clone();
194        provider.set_status_tx(status_tx);
195
196        health_check(&provider).await;
197
198        if let AnyProvider::Ollama(ref mut ollama) = provider
199            && let Ok(info) = ollama.fetch_model_info().await
200            && let Some(ctx) = info.context_length
201        {
202            ollama.set_context_window(ctx);
203            tracing::info!(context_window = ctx, "detected Ollama model context window");
204        }
205
206        if let AnyProvider::Orchestrator(ref mut orch) = provider {
207            orch.auto_detect_context_window().await;
208        }
209        if let Some(ctx) = provider.context_window()
210            && !matches!(provider, AnyProvider::Ollama(_))
211        {
212            tracing::info!(context_window = ctx, "detected orchestrator context window");
213        }
214
215        Ok((provider, status_tx_clone, status_rx))
216    }
217
218    pub fn auto_budget_tokens(&self, provider: &AnyProvider) -> usize {
219        if self.config.memory.auto_budget && self.config.memory.context_budget_tokens == 0 {
220            if let Some(ctx_size) = provider.context_window() {
221                tracing::info!(model_context = ctx_size, "auto-configured context budget");
222                ctx_size
223            } else {
224                0
225            }
226        } else {
227            self.config.memory.context_budget_tokens
228        }
229    }
230
231    /// # Errors
232    ///
233    /// Returns [`BootstrapError`] if `SQLite` cannot be initialized or if `vector_backend = "Qdrant"`
234    /// but `qdrant_ops` is `None` (invariant violation — should not happen if `AppBuilder::new`
235    /// succeeded).
236    pub async fn build_memory(
237        &self,
238        provider: &AnyProvider,
239    ) -> Result<SemanticMemory, BootstrapError> {
240        let embed_model = self.embedding_model();
241        let mut memory = match self.config.memory.vector_backend {
242            crate::config::VectorBackend::Sqlite => {
243                SemanticMemory::with_sqlite_backend_and_pool_size(
244                    &self.config.memory.sqlite_path,
245                    provider.clone(),
246                    &embed_model,
247                    self.config.memory.semantic.vector_weight,
248                    self.config.memory.semantic.keyword_weight,
249                    self.config.memory.sqlite_pool_size,
250                )
251                .await
252                .map_err(|e| BootstrapError::Memory(e.to_string()))?
253            }
254            crate::config::VectorBackend::Qdrant => {
255                let ops = self
256                    .qdrant_ops
257                    .as_ref()
258                    .ok_or_else(|| {
259                        BootstrapError::Memory(
260                            "qdrant_ops must be Some when vector_backend = Qdrant".into(),
261                        )
262                    })?
263                    .clone();
264                SemanticMemory::with_qdrant_ops(
265                    &self.config.memory.sqlite_path,
266                    ops,
267                    provider.clone(),
268                    &embed_model,
269                    self.config.memory.semantic.vector_weight,
270                    self.config.memory.semantic.keyword_weight,
271                    self.config.memory.sqlite_pool_size,
272                )
273                .await
274                .map_err(|e| BootstrapError::Memory(e.to_string()))?
275            }
276        };
277
278        memory = memory.with_ranking_options(
279            self.config.memory.semantic.temporal_decay_enabled,
280            self.config.memory.semantic.temporal_decay_half_life_days,
281            self.config.memory.semantic.mmr_enabled,
282            self.config.memory.semantic.mmr_lambda,
283        );
284
285        memory = memory.with_importance_options(
286            self.config.memory.semantic.importance_enabled,
287            self.config.memory.semantic.importance_weight,
288        );
289
290        if self.config.memory.semantic.enabled && memory.is_vector_store_connected().await {
291            tracing::info!("semantic memory enabled, vector store connected");
292            match memory.embed_missing().await {
293                Ok(n) if n > 0 => tracing::info!("backfilled {n} missing embedding(s)"),
294                Ok(_) => {}
295                Err(e) => tracing::warn!("embed_missing failed: {e:#}"),
296            }
297        }
298
299        if self.config.memory.graph.enabled {
300            let pool = memory.sqlite().pool().clone();
301            let store = Arc::new(GraphStore::new(pool));
302            memory = memory.with_graph_store(store);
303            tracing::info!("graph memory enabled, GraphStore attached");
304        }
305
306        Ok(memory)
307    }
308
309    pub async fn build_skill_matcher(
310        &self,
311        provider: &AnyProvider,
312        meta: &[&SkillMeta],
313        memory: &SemanticMemory,
314    ) -> Option<SkillMatcherBackend> {
315        let embed_model = self.embedding_model();
316        create_skill_matcher(
317            &self.config,
318            provider,
319            meta,
320            memory,
321            &embed_model,
322            self.qdrant_ops.as_ref(),
323        )
324        .await
325    }
326
327    pub fn build_registry(&self) -> SkillRegistry {
328        #[cfg(feature = "bundled-skills")]
329        {
330            let managed = managed_skills_dir();
331            match zeph_skills::bundled::provision_bundled_skills(&managed) {
332                Ok(report) => {
333                    if !report.installed.is_empty() {
334                        tracing::info!(
335                            skills = ?report.installed,
336                            "provisioned new bundled skills"
337                        );
338                    }
339                    if !report.updated.is_empty() {
340                        tracing::info!(
341                            skills = ?report.updated,
342                            "updated bundled skills"
343                        );
344                    }
345                    for (name, err) in &report.failed {
346                        tracing::warn!(skill = %name, error = %err, "failed to provision bundled skill");
347                    }
348                }
349                Err(e) => {
350                    tracing::warn!(error = %e, "bundled skill provisioning failed");
351                }
352            }
353        }
354
355        let skill_paths = self.skill_paths();
356        let registry = SkillRegistry::load(&skill_paths);
357
358        if self.config.skills.trust.scan_on_load {
359            let findings = registry.scan_loaded();
360            if findings.is_empty() {
361                tracing::debug!("skill content scan: no injection patterns found");
362            } else {
363                tracing::warn!(
364                    count = findings.len(),
365                    "skill content scan complete: {} skill(s) with potential injection patterns",
366                    findings.len()
367                );
368            }
369        }
370
371        registry
372    }
373
374    pub fn skill_paths(&self) -> Vec<PathBuf> {
375        let mut paths: Vec<PathBuf> = self.config.skills.paths.iter().map(PathBuf::from).collect();
376        let managed_dir = managed_skills_dir();
377        if !paths.contains(&managed_dir) {
378            paths.push(managed_dir);
379        }
380        paths
381    }
382
383    pub fn managed_skills_dir() -> PathBuf {
384        managed_skills_dir()
385    }
386
387    pub fn build_watchers(&self) -> WatcherBundle {
388        let skill_paths = self.skill_paths();
389        let (reload_tx, skill_reload_rx) = mpsc::channel(4);
390        let skill_watcher = match SkillWatcher::start(&skill_paths, reload_tx) {
391            Ok(w) => {
392                tracing::info!("skill watcher started");
393                Some(w)
394            }
395            Err(e) => {
396                tracing::warn!("skill watcher unavailable: {e:#}");
397                None
398            }
399        };
400
401        let (config_reload_tx, config_reload_rx) = mpsc::channel(4);
402        let config_watcher = match ConfigWatcher::start(&self.config_path, config_reload_tx) {
403            Ok(w) => {
404                tracing::info!("config watcher started");
405                Some(w)
406            }
407            Err(e) => {
408                tracing::warn!("config watcher unavailable: {e:#}");
409                None
410            }
411        };
412
413        WatcherBundle {
414            skill_watcher,
415            skill_reload_rx,
416            config_watcher,
417            config_reload_rx,
418        }
419    }
420
421    pub fn build_shutdown() -> (watch::Sender<bool>, watch::Receiver<bool>) {
422        watch::channel(false)
423    }
424
425    pub fn embedding_model(&self) -> String {
426        effective_embedding_model(&self.config)
427    }
428
429    pub fn build_summary_provider(&self) -> Option<AnyProvider> {
430        // Structured config takes precedence over the string-based summary_model.
431        if let Some(ref entry) = self.config.llm.summary_provider {
432            return match build_provider_from_entry(entry, &self.config) {
433                Ok(sp) => {
434                    tracing::info!(
435                        provider_type = ?entry.provider_type,
436                        model = ?entry.model,
437                        "summary provider configured via [llm.summary_provider]"
438                    );
439                    Some(sp)
440                }
441                Err(e) => {
442                    tracing::warn!("failed to create summary provider: {e:#}, using primary");
443                    None
444                }
445            };
446        }
447        self.config.llm.summary_model.as_ref().and_then(
448            |model_spec| match create_summary_provider(model_spec, &self.config) {
449                Ok(sp) => {
450                    tracing::info!(model = %model_spec, "summary provider configured via llm.summary_model");
451                    Some(sp)
452                }
453                Err(e) => {
454                    tracing::warn!("failed to create summary provider: {e:#}, using primary");
455                    None
456                }
457            },
458        )
459    }
460
461    /// Build the quarantine summarizer provider when `security.content_isolation.quarantine.enabled = true`.
462    ///
463    /// Returns `None` when quarantine is disabled or provider resolution fails.
464    /// Emits a `tracing::warn` on resolution failure (quarantine silently disabled).
465    pub fn build_quarantine_provider(
466        &self,
467    ) -> Option<(AnyProvider, zeph_sanitizer::QuarantineConfig)> {
468        let qc = &self.config.security.content_isolation.quarantine;
469        if !qc.enabled {
470            return None;
471        }
472        match create_named_provider(&qc.model, &self.config) {
473            Ok(p) => {
474                tracing::info!(model = %qc.model, "quarantine provider configured");
475                Some((p, qc.clone()))
476            }
477            Err(e) => {
478                tracing::warn!(
479                    model = %qc.model,
480                    error = %e,
481                    "quarantine provider resolution failed, quarantine disabled"
482                );
483                None
484            }
485        }
486    }
487
488    /// Build the guardrail filter when `security.guardrail.enabled = true`.
489    ///
490    /// Returns `None` when guardrail is disabled or provider resolution fails.
491    /// Emits a `tracing::warn` on resolution failure (guardrail silently disabled).
492    #[cfg(feature = "guardrail")]
493    pub fn build_guardrail_filter(&self) -> Option<zeph_sanitizer::guardrail::GuardrailFilter> {
494        let (provider, config) = self.build_guardrail_provider()?;
495        match zeph_sanitizer::guardrail::GuardrailFilter::new(provider, &config) {
496            Ok(filter) => Some(filter),
497            Err(e) => {
498                tracing::warn!(error = %e, "guardrail filter construction failed, guardrail disabled");
499                None
500            }
501        }
502    }
503
504    /// Build the guardrail provider and config pair for use in multi-session contexts.
505    ///
506    /// Returns `None` when guardrail is disabled or provider resolution fails.
507    #[cfg(feature = "guardrail")]
508    pub fn build_guardrail_provider(
509        &self,
510    ) -> Option<(AnyProvider, zeph_sanitizer::guardrail::GuardrailConfig)> {
511        let gc = &self.config.security.guardrail;
512        if !gc.enabled {
513            return None;
514        }
515        let provider_name = gc.provider.as_deref().unwrap_or("ollama");
516        match create_named_provider(provider_name, &self.config) {
517            Ok(p) => {
518                tracing::info!(
519                    provider = %provider_name,
520                    model = ?gc.model,
521                    "guardrail provider configured"
522                );
523                Some((p, gc.clone()))
524            }
525            Err(e) => {
526                tracing::warn!(
527                    provider = %provider_name,
528                    error = %e,
529                    "guardrail provider resolution failed, guardrail disabled"
530                );
531                None
532            }
533        }
534    }
535
536    /// Build a dedicated provider for the judge detector when `detector_mode = judge`.
537    ///
538    /// Returns `None` when mode is `Regex` or `judge_model` is empty (primary provider used).
539    /// Emits a `tracing::warn` when mode is `Judge` but no model is specified.
540    pub fn build_judge_provider(&self) -> Option<AnyProvider> {
541        use crate::config::DetectorMode;
542        let learning = &self.config.skills.learning;
543        if learning.detector_mode != DetectorMode::Judge {
544            return None;
545        }
546        if learning.judge_model.is_empty() {
547            tracing::warn!(
548                "detector_mode=judge but judge_model is empty — primary provider will be used for judging"
549            );
550            return None;
551        }
552        match create_named_provider(&learning.judge_model, &self.config) {
553            Ok(jp) => {
554                tracing::info!(model = %learning.judge_model, "judge provider configured");
555                Some(jp)
556            }
557            Err(e) => {
558                tracing::warn!("failed to create judge provider: {e:#}, using primary");
559                None
560            }
561        }
562    }
563
564    /// Build an `LlmClassifier` for `detector_mode = "model"` feedback detection.
565    ///
566    /// Resolves `feedback_provider` from `[[llm.providers]]` registry.
567    /// Pass the session's primary provider as `primary` for fallback when `feedback_provider`
568    /// is empty. Returns `None` with a warning on resolution failure — never fails startup.
569    pub fn build_feedback_classifier(
570        &self,
571        primary: &AnyProvider,
572    ) -> Option<zeph_llm::classifier::llm::LlmClassifier> {
573        use crate::config::DetectorMode;
574        let learning = &self.config.skills.learning;
575        if learning.detector_mode != DetectorMode::Model {
576            return None;
577        }
578        let provider = if learning.feedback_provider.is_empty() {
579            tracing::debug!("feedback_provider empty — using primary provider for LlmClassifier");
580            Some(primary.clone())
581        } else {
582            match crate::bootstrap::provider::create_named_provider(
583                &learning.feedback_provider,
584                &self.config,
585            ) {
586                Ok(p) => {
587                    tracing::info!(
588                        provider = %learning.feedback_provider,
589                        "LlmClassifier feedback provider configured"
590                    );
591                    Some(p)
592                }
593                Err(e) => {
594                    tracing::warn!(
595                        provider = %learning.feedback_provider,
596                        error = %e,
597                        "feedback_provider not found in registry, degrading to regex-only"
598                    );
599                    None
600                }
601            }
602        };
603        if let Some(p) = provider {
604            Some(zeph_llm::classifier::llm::LlmClassifier::new(
605                std::sync::Arc::new(p),
606            ))
607        } else {
608            tracing::warn!(
609                "detector_mode=model but no provider available, degrading to regex-only"
610            );
611            None
612        }
613    }
614
615    /// Build a dedicated provider for compaction probe LLM calls.
616    ///
617    /// Returns `None` when `probe_provider` is empty (falls back to summary provider at call site).
618    /// Emits a `tracing::warn` on resolution failure (summary/primary provider used as fallback).
619    pub fn build_probe_provider(&self) -> Option<AnyProvider> {
620        let name = &self.config.memory.compression.probe.probe_provider;
621        if name.is_empty() {
622            return None;
623        }
624        match create_named_provider(name, &self.config) {
625            Ok(p) => {
626                tracing::info!(provider = %name, "compaction probe provider configured");
627                Some(p)
628            }
629            Err(e) => {
630                tracing::warn!(
631                    provider = %name,
632                    error = %e,
633                    "probe provider resolution failed — summary/primary provider will be used"
634                );
635                None
636            }
637        }
638    }
639
640    /// Build a dedicated provider for orchestration planner LLM calls.
641    ///
642    /// Returns `None` when `planner_provider` is empty (falls back to primary provider at call site).
643    ///
644    /// # Errors (logged, not propagated)
645    ///
646    /// Emits a `tracing::warn` on resolution failure; primary provider is used as fallback.
647    pub fn build_planner_provider(&self) -> Option<AnyProvider> {
648        let name = &self.config.orchestration.planner_provider;
649        if name.is_empty() {
650            return None;
651        }
652        match create_named_provider(name, &self.config) {
653            Ok(p) => {
654                tracing::info!(provider = %name, "planner provider configured");
655                Some(p)
656            }
657            Err(e) => {
658                tracing::warn!(
659                    provider = %name,
660                    error = %e,
661                    "planner provider resolution failed — primary provider will be used"
662                );
663                None
664            }
665        }
666    }
667
668    #[cfg(feature = "experiments")]
669    pub fn build_eval_provider(&self) -> Option<AnyProvider> {
670        let model_spec = self.config.experiments.eval_model.as_deref()?;
671        match create_summary_provider(model_spec, &self.config) {
672            Ok(p) => {
673                tracing::info!(eval_model = %model_spec, "experiment eval provider configured");
674                Some(p)
675            }
676            Err(e) => {
677                tracing::warn!(
678                    eval_model = %model_spec,
679                    error = %e,
680                    "failed to create eval provider — primary provider will be used as judge"
681                );
682                None
683            }
684        }
685    }
686}
687
688#[cfg(test)]
689mod tests;