Skip to main content

zeph_core/bootstrap/
mod.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Application bootstrap: config resolution, provider/memory/tool construction.
5
6pub mod config;
7pub mod health;
8pub mod mcp;
9pub mod oauth;
10pub mod provider;
11pub mod skills;
12
13pub use config::{parse_vault_args, resolve_config_path};
14pub use health::{health_check, warmup_provider};
15pub use mcp::{
16    create_mcp_manager, create_mcp_manager_with_vault, create_mcp_registry, wire_trust_calibration,
17};
18pub use oauth::VaultCredentialStore;
19#[cfg(feature = "candle")]
20pub use provider::select_device;
21pub use provider::{
22    BootstrapError, build_provider_for_switch, build_provider_from_entry, create_named_provider,
23    create_provider, create_summary_provider,
24};
25pub use skills::{
26    create_embedding_provider, create_skill_matcher, effective_embedding_model, managed_skills_dir,
27};
28
29use std::path::{Path, PathBuf};
30use std::sync::Arc;
31
32use tokio::sync::{RwLock, mpsc, watch};
33use zeph_llm::any::AnyProvider;
34use zeph_llm::provider::LlmProvider;
35use zeph_memory::GraphStore;
36use zeph_memory::QdrantOps;
37use zeph_memory::semantic::SemanticMemory;
38use zeph_skills::loader::SkillMeta;
39use zeph_skills::matcher::SkillMatcherBackend;
40use zeph_skills::registry::SkillRegistry;
41use zeph_skills::watcher::{SkillEvent, SkillWatcher};
42
43use crate::config::{Config, SecretResolver};
44use crate::config_watcher::{ConfigEvent, ConfigWatcher};
45use crate::vault::AgeVaultProvider;
46use crate::vault::{EnvVaultProvider, VaultProvider};
47
48pub struct AppBuilder {
49    config: Config,
50    config_path: PathBuf,
51    vault: Box<dyn VaultProvider>,
52    /// Present when the vault backend is `age`. Used to pass to `create_mcp_manager_with_vault`
53    /// for OAuth credential persistence across sessions.
54    age_vault: Option<Arc<RwLock<AgeVaultProvider>>>,
55    qdrant_ops: Option<QdrantOps>,
56}
57
58pub struct VaultArgs {
59    pub backend: String,
60    pub key_path: Option<String>,
61    pub vault_path: Option<String>,
62}
63
64pub struct WatcherBundle {
65    pub skill_watcher: Option<SkillWatcher>,
66    pub skill_reload_rx: mpsc::Receiver<SkillEvent>,
67    pub config_watcher: Option<ConfigWatcher>,
68    pub config_reload_rx: mpsc::Receiver<ConfigEvent>,
69}
70
71impl AppBuilder {
72    /// Resolve config, load it, create vault, resolve secrets.
73    ///
74    /// CLI-provided overrides take priority over environment variables and config.
75    ///
76    /// # Errors
77    ///
78    /// Returns [`BootstrapError`] if config loading, validation, vault construction,
79    /// secret resolution, or Qdrant URL parsing fails.
80    pub async fn new(
81        config_override: Option<&Path>,
82        vault_override: Option<&str>,
83        vault_key_override: Option<&Path>,
84        vault_path_override: Option<&Path>,
85    ) -> Result<Self, BootstrapError> {
86        let config_path = resolve_config_path(config_override);
87        let mut config = Config::load(&config_path)?;
88        config.validate()?;
89        config.llm.check_legacy_format()?;
90
91        let vault_args = parse_vault_args(
92            &config,
93            vault_override,
94            vault_key_override,
95            vault_path_override,
96        );
97        let (vault, age_vault): (
98            Box<dyn VaultProvider>,
99            Option<Arc<RwLock<AgeVaultProvider>>>,
100        ) = match vault_args.backend.as_str() {
101            "env" => (Box::new(EnvVaultProvider), None),
102            "age" => {
103                let key = vault_args.key_path.ok_or_else(|| {
104                    BootstrapError::Provider("--vault-key required for age backend".into())
105                })?;
106                let path = vault_args.vault_path.ok_or_else(|| {
107                    BootstrapError::Provider("--vault-path required for age backend".into())
108                })?;
109                let provider = AgeVaultProvider::new(Path::new(&key), Path::new(&path))
110                    .map_err(BootstrapError::VaultInit)?;
111                let arc = Arc::new(RwLock::new(provider));
112                let boxed: Box<dyn VaultProvider> =
113                    Box::new(crate::vault::ArcAgeVaultProvider(Arc::clone(&arc)));
114                (boxed, Some(arc))
115            }
116            other => {
117                return Err(BootstrapError::Provider(format!(
118                    "unknown vault backend: {other}"
119                )));
120            }
121        };
122
123        config.resolve_secrets(vault.as_ref()).await?;
124
125        let qdrant_ops = match config.memory.vector_backend {
126            crate::config::VectorBackend::Qdrant => {
127                let ops = QdrantOps::new(&config.memory.qdrant_url).map_err(|e| {
128                    BootstrapError::Provider(format!(
129                        "invalid qdrant_url '{}': {e}",
130                        config.memory.qdrant_url
131                    ))
132                })?;
133                Some(ops)
134            }
135            crate::config::VectorBackend::Sqlite => None,
136        };
137
138        Ok(Self {
139            config,
140            config_path,
141            vault,
142            age_vault,
143            qdrant_ops,
144        })
145    }
146
147    pub fn qdrant_ops(&self) -> Option<&QdrantOps> {
148        self.qdrant_ops.as_ref()
149    }
150
151    pub fn config(&self) -> &Config {
152        &self.config
153    }
154
155    pub fn config_mut(&mut self) -> &mut Config {
156        &mut self.config
157    }
158
159    pub fn config_path(&self) -> &Path {
160        &self.config_path
161    }
162
163    /// Returns the vault provider used for secret resolution.
164    ///
165    /// Retained as part of the public `Bootstrap` API for external callers
166    /// that may inspect or override vault behavior at runtime.
167    pub fn vault(&self) -> &dyn VaultProvider {
168        self.vault.as_ref()
169    }
170
171    /// Returns the shared age vault, if the backend is `age`.
172    ///
173    /// Pass this to `create_mcp_manager_with_vault` so OAuth tokens are persisted
174    /// across sessions.
175    pub fn age_vault_arc(&self) -> Option<&Arc<RwLock<AgeVaultProvider>>> {
176        self.age_vault.as_ref()
177    }
178
179    /// # Errors
180    ///
181    /// Returns [`BootstrapError`] if provider creation or health check fails.
182    pub async fn build_provider(
183        &self,
184    ) -> Result<
185        (
186            AnyProvider,
187            tokio::sync::mpsc::UnboundedSender<String>,
188            tokio::sync::mpsc::UnboundedReceiver<String>,
189        ),
190        BootstrapError,
191    > {
192        let mut provider = create_provider(&self.config)?;
193
194        let (status_tx, status_rx) = tokio::sync::mpsc::unbounded_channel::<String>();
195        let status_tx_clone = status_tx.clone();
196        provider.set_status_tx(status_tx);
197
198        health_check(&provider).await;
199
200        if let AnyProvider::Ollama(ref mut ollama) = provider
201            && let Ok(info) = ollama.fetch_model_info().await
202            && let Some(ctx) = info.context_length
203        {
204            ollama.set_context_window(ctx);
205            tracing::info!(context_window = ctx, "detected Ollama model context window");
206        }
207
208        if let Some(ctx) = provider.context_window()
209            && !matches!(provider, AnyProvider::Ollama(_))
210        {
211            tracing::info!(context_window = ctx, "detected provider context window");
212        }
213
214        Ok((provider, status_tx_clone, status_rx))
215    }
216
217    pub fn auto_budget_tokens(&self, provider: &AnyProvider) -> usize {
218        if self.config.memory.auto_budget && self.config.memory.context_budget_tokens == 0 {
219            if let Some(ctx_size) = provider.context_window() {
220                tracing::info!(model_context = ctx_size, "auto-configured context budget");
221                ctx_size
222            } else {
223                0
224            }
225        } else {
226            self.config.memory.context_budget_tokens
227        }
228    }
229
230    /// # Errors
231    ///
232    /// Returns [`BootstrapError`] if `SQLite` cannot be initialized or if `vector_backend = "Qdrant"`
233    /// but `qdrant_ops` is `None` (invariant violation — should not happen if `AppBuilder::new`
234    /// succeeded).
235    pub async fn build_memory(
236        &self,
237        provider: &AnyProvider,
238    ) -> Result<SemanticMemory, BootstrapError> {
239        let embed_model = self.embedding_model();
240        // Resolve the database path: prefer database_url (PostgreSQL) over sqlite_path.
241        let db_path: &str = self
242            .config
243            .memory
244            .database_url
245            .as_deref()
246            .unwrap_or(&self.config.memory.sqlite_path);
247
248        if zeph_db::is_postgres_url(db_path) {
249            return Err(BootstrapError::Memory(
250                "database_url points to PostgreSQL but binary was compiled with the \
251                 sqlite feature. Recompile with --features postgres."
252                    .to_string(),
253            ));
254        }
255
256        let mut memory = match self.config.memory.vector_backend {
257            crate::config::VectorBackend::Sqlite => {
258                SemanticMemory::with_sqlite_backend_and_pool_size(
259                    db_path,
260                    provider.clone(),
261                    &embed_model,
262                    self.config.memory.semantic.vector_weight,
263                    self.config.memory.semantic.keyword_weight,
264                    self.config.memory.sqlite_pool_size,
265                )
266                .await
267                .map_err(|e| BootstrapError::Memory(e.to_string()))?
268            }
269            crate::config::VectorBackend::Qdrant => {
270                let ops = self
271                    .qdrant_ops
272                    .as_ref()
273                    .ok_or_else(|| {
274                        BootstrapError::Memory(
275                            "qdrant_ops must be Some when vector_backend = Qdrant".into(),
276                        )
277                    })?
278                    .clone();
279                SemanticMemory::with_qdrant_ops(
280                    db_path,
281                    ops,
282                    provider.clone(),
283                    &embed_model,
284                    self.config.memory.semantic.vector_weight,
285                    self.config.memory.semantic.keyword_weight,
286                    self.config.memory.sqlite_pool_size,
287                )
288                .await
289                .map_err(|e| BootstrapError::Memory(e.to_string()))?
290            }
291        };
292
293        memory = memory.with_ranking_options(
294            self.config.memory.semantic.temporal_decay_enabled,
295            self.config.memory.semantic.temporal_decay_half_life_days,
296            self.config.memory.semantic.mmr_enabled,
297            self.config.memory.semantic.mmr_lambda,
298        );
299
300        memory = memory.with_importance_options(
301            self.config.memory.semantic.importance_enabled,
302            self.config.memory.semantic.importance_weight,
303        );
304
305        if self.config.memory.semantic.enabled && memory.is_vector_store_connected().await {
306            tracing::info!("semantic memory enabled, vector store connected");
307        }
308
309        if self.config.memory.graph.enabled {
310            // Open a dedicated pool for graph operations to prevent pool starvation.
311            // Community detection and spreading activation can saturate the shared message pool
312            // (pool_size=5), causing pool.acquire() cancellation and semaphore drift in sqlx 0.8.
313            let graph_pool = zeph_db::DbConfig {
314                url: db_path.to_string(),
315                max_connections: self.config.memory.graph.pool_size,
316                pool_size: self.config.memory.graph.pool_size,
317            }
318            .connect()
319            .await
320            .map_err(|e| BootstrapError::Memory(e.to_string()))?;
321            let store = Arc::new(GraphStore::new(graph_pool));
322            memory = memory.with_graph_store(store);
323            tracing::info!(
324                pool_size = self.config.memory.graph.pool_size,
325                "graph memory enabled, GraphStore attached with dedicated pool"
326            );
327        }
328
329        if self.config.memory.admission.enabled {
330            memory = memory.with_admission_control(self.build_admission_control(provider));
331        }
332
333        if let Some(ep) = self.build_memory_embed_provider() {
334            memory = memory.with_embed_provider(ep);
335        }
336
337        Ok(memory)
338    }
339
340    fn build_memory_embed_provider(&self) -> Option<AnyProvider> {
341        let name = self
342            .config
343            .memory
344            .semantic
345            .embed_provider
346            .as_deref()
347            .filter(|s| !s.is_empty())?;
348
349        match create_named_provider(name, &self.config) {
350            Ok(ep) => {
351                tracing::info!(provider = %name, "Using dedicated embed provider for memory backfill");
352                Some(ep)
353            }
354            Err(e) => {
355                tracing::warn!(
356                    provider = %name,
357                    error = %e,
358                    "Memory embed_provider resolution failed — main provider will be used"
359                );
360                None
361            }
362        }
363    }
364}
365
366/// Spawn a background task that backfills missing embeddings.
367///
368/// Fire-and-forget: the caller does not need to await the returned handle.
369/// The task runs for at most `timeout_secs` seconds.
370///
371/// # Errors
372///
373/// The returned `JoinHandle` resolves to `()` — errors are logged internally.
374pub fn spawn_embed_backfill(
375    memory: Arc<SemanticMemory>,
376    timeout_secs: u64,
377) -> tokio::task::JoinHandle<()> {
378    tokio::spawn(async move {
379        let result = tokio::time::timeout(
380            std::time::Duration::from_secs(timeout_secs),
381            memory.embed_missing(),
382        )
383        .await;
384        match result {
385            Ok(Ok(n)) if n > 0 => tracing::info!("backfilled {n} missing embedding(s)"),
386            Ok(Ok(_)) => {}
387            Ok(Err(e)) => tracing::warn!("embed_missing failed: {e:#}"),
388            Err(_) => tracing::warn!("embed_missing timed out after {timeout_secs}s"),
389        }
390    })
391}
392
393impl AppBuilder {
394    fn build_admission_control(
395        &self,
396        fallback_provider: &AnyProvider,
397    ) -> zeph_memory::AdmissionControl {
398        let admission_provider = if self.config.memory.admission.admission_provider.is_empty() {
399            fallback_provider.clone()
400        } else {
401            match create_named_provider(
402                &self.config.memory.admission.admission_provider,
403                &self.config,
404            ) {
405                Ok(p) => {
406                    tracing::info!(
407                        provider = %self.config.memory.admission.admission_provider,
408                        "A-MAC admission provider configured"
409                    );
410                    p
411                }
412                Err(e) => {
413                    tracing::warn!(
414                        provider = %self.config.memory.admission.admission_provider,
415                        error = %e,
416                        "A-MAC admission provider resolution failed — primary provider will be used"
417                    );
418                    fallback_provider.clone()
419                }
420            }
421        };
422        let w = &self.config.memory.admission.weights;
423        let weights = zeph_memory::AdmissionWeights {
424            future_utility: w.future_utility,
425            factual_confidence: w.factual_confidence,
426            semantic_novelty: w.semantic_novelty,
427            temporal_recency: w.temporal_recency,
428            content_type_prior: w.content_type_prior,
429            goal_utility: w.goal_utility,
430        };
431        let mut control = zeph_memory::AdmissionControl::new(
432            self.config.memory.admission.threshold,
433            self.config.memory.admission.fast_path_margin,
434            weights,
435        )
436        .with_provider(admission_provider);
437
438        if self.config.memory.admission.goal_conditioned_write {
439            let goal_provider = if self
440                .config
441                .memory
442                .admission
443                .goal_utility_provider
444                .is_empty()
445            {
446                None
447            } else {
448                match create_named_provider(
449                    &self.config.memory.admission.goal_utility_provider,
450                    &self.config,
451                ) {
452                    Ok(p) => Some(p),
453                    Err(e) => {
454                        tracing::warn!(
455                            provider = %self.config.memory.admission.goal_utility_provider,
456                            error = %e,
457                            "goal_utility_provider not found, LLM refinement disabled"
458                        );
459                        None
460                    }
461                }
462            };
463            control = control.with_goal_gate(zeph_memory::GoalGateConfig {
464                threshold: self.config.memory.admission.goal_utility_threshold,
465                provider: goal_provider,
466                weight: self.config.memory.admission.goal_utility_weight,
467            });
468            tracing::info!(
469                threshold = self.config.memory.admission.goal_utility_threshold,
470                weight = self.config.memory.admission.goal_utility_weight,
471                "A-MAC: goal-conditioned write gate enabled"
472            );
473        }
474
475        if self.config.memory.admission.admission_strategy == zeph_config::AdmissionStrategy::Rl {
476            tracing::warn!(
477                "admission_strategy = \"rl\" is configured but the RL model is not yet wired \
478                 into the admission path — falling back to heuristic. See #2416."
479            );
480        }
481
482        tracing::info!(
483            threshold = self.config.memory.admission.threshold,
484            "A-MAC admission control enabled"
485        );
486        control
487    }
488
489    pub async fn build_skill_matcher(
490        &self,
491        provider: &AnyProvider,
492        meta: &[&SkillMeta],
493        memory: &SemanticMemory,
494    ) -> Option<SkillMatcherBackend> {
495        let embed_model = self.embedding_model();
496        create_skill_matcher(
497            &self.config,
498            provider,
499            meta,
500            memory,
501            &embed_model,
502            self.qdrant_ops.as_ref(),
503        )
504        .await
505    }
506
507    pub fn build_registry(&self) -> SkillRegistry {
508        {
509            let managed = managed_skills_dir();
510            match zeph_skills::bundled::provision_bundled_skills(&managed) {
511                Ok(report) => {
512                    if !report.installed.is_empty() {
513                        tracing::info!(
514                            skills = ?report.installed,
515                            "provisioned new bundled skills"
516                        );
517                    }
518                    if !report.updated.is_empty() {
519                        tracing::info!(
520                            skills = ?report.updated,
521                            "updated bundled skills"
522                        );
523                    }
524                    for (name, err) in &report.failed {
525                        tracing::warn!(skill = %name, error = %err, "failed to provision bundled skill");
526                    }
527                }
528                Err(e) => {
529                    tracing::warn!(error = %e, "bundled skill provisioning failed");
530                }
531            }
532        }
533
534        let skill_paths = self.skill_paths();
535        let registry = SkillRegistry::load(&skill_paths);
536
537        if self.config.skills.trust.scan_on_load {
538            let findings = registry.scan_loaded();
539            if findings.is_empty() {
540                tracing::debug!("skill content scan: no injection patterns found");
541            } else {
542                tracing::warn!(
543                    count = findings.len(),
544                    "skill content scan complete: {} skill(s) with potential injection patterns",
545                    findings.len()
546                );
547            }
548        }
549
550        if self.config.skills.trust.scanner.capability_escalation_check {
551            // Build a trust-level mapping from all loaded skill metas.
552            // Skills without a trust record default to the configured default_level.
553            let default_level = self.config.skills.trust.default_level;
554            let trust_levels: Vec<(String, zeph_tools::SkillTrustLevel)> = registry
555                .all_meta()
556                .iter()
557                .map(|meta| (meta.name.clone(), default_level))
558                .collect();
559
560            let violations = registry.check_escalations(&trust_levels);
561            for v in &violations {
562                tracing::warn!(
563                    skill = %v.skill_name,
564                    denied_tools = ?v.denied_tools,
565                    "capability escalation: skill declares tools exceeding its trust level"
566                );
567            }
568            if violations.is_empty() {
569                tracing::debug!("capability escalation check: no violations found");
570            }
571        }
572
573        registry
574    }
575
576    pub fn skill_paths(&self) -> Vec<PathBuf> {
577        let mut paths: Vec<PathBuf> = self.config.skills.paths.iter().map(PathBuf::from).collect();
578        let managed_dir = managed_skills_dir();
579        if !paths.contains(&managed_dir) {
580            paths.push(managed_dir);
581        }
582        paths
583    }
584
585    pub fn managed_skills_dir() -> PathBuf {
586        managed_skills_dir()
587    }
588
589    pub fn build_watchers(&self) -> WatcherBundle {
590        let skill_paths = self.skill_paths();
591        let (reload_tx, skill_reload_rx) = mpsc::channel(4);
592        let skill_watcher = match SkillWatcher::start(&skill_paths, reload_tx) {
593            Ok(w) => {
594                tracing::info!("skill watcher started");
595                Some(w)
596            }
597            Err(e) => {
598                tracing::warn!("skill watcher unavailable: {e:#}");
599                None
600            }
601        };
602
603        let (config_reload_tx, config_reload_rx) = mpsc::channel(4);
604        let config_watcher = match ConfigWatcher::start(&self.config_path, config_reload_tx) {
605            Ok(w) => {
606                tracing::info!("config watcher started");
607                Some(w)
608            }
609            Err(e) => {
610                tracing::warn!("config watcher unavailable: {e:#}");
611                None
612            }
613        };
614
615        WatcherBundle {
616            skill_watcher,
617            skill_reload_rx,
618            config_watcher,
619            config_reload_rx,
620        }
621    }
622
623    pub fn build_shutdown() -> (watch::Sender<bool>, watch::Receiver<bool>) {
624        watch::channel(false)
625    }
626
627    pub fn embedding_model(&self) -> String {
628        effective_embedding_model(&self.config)
629    }
630
631    pub fn build_summary_provider(&self) -> Option<AnyProvider> {
632        // Structured config takes precedence over the string-based summary_model.
633        if let Some(ref entry) = self.config.llm.summary_provider {
634            return match build_provider_from_entry(entry, &self.config) {
635                Ok(sp) => {
636                    tracing::info!(
637                        provider_type = ?entry.provider_type,
638                        model = ?entry.model,
639                        "summary provider configured via [llm.summary_provider]"
640                    );
641                    Some(sp)
642                }
643                Err(e) => {
644                    tracing::warn!("failed to create summary provider: {e:#}, using primary");
645                    None
646                }
647            };
648        }
649        self.config.llm.summary_model.as_ref().and_then(
650            |model_spec| match create_summary_provider(model_spec, &self.config) {
651                Ok(sp) => {
652                    tracing::info!(model = %model_spec, "summary provider configured via llm.summary_model");
653                    Some(sp)
654                }
655                Err(e) => {
656                    tracing::warn!("failed to create summary provider: {e:#}, using primary");
657                    None
658                }
659            },
660        )
661    }
662
663    /// Build the quarantine summarizer provider when `security.content_isolation.quarantine.enabled = true`.
664    ///
665    /// Returns `None` when quarantine is disabled or provider resolution fails.
666    /// Emits a `tracing::warn` on resolution failure (quarantine silently disabled).
667    pub fn build_quarantine_provider(
668        &self,
669    ) -> Option<(AnyProvider, zeph_sanitizer::QuarantineConfig)> {
670        let ci = &self.config.security.content_isolation;
671        let qc = &ci.quarantine;
672        if !qc.enabled {
673            if ci.mcp_to_acp_boundary {
674                tracing::warn!(
675                    "mcp_to_acp_boundary is enabled but quarantine is disabled — \
676                     cross-boundary MCP tool results in ACP sessions will be \
677                     spotlighted but NOT quarantine-summarized; enable \
678                     [security.content_isolation.quarantine] for full protection"
679                );
680            }
681            return None;
682        }
683        match create_named_provider(&qc.model, &self.config) {
684            Ok(p) => {
685                tracing::info!(model = %qc.model, "quarantine provider configured");
686                Some((p, qc.clone()))
687            }
688            Err(e) => {
689                tracing::warn!(
690                    model = %qc.model,
691                    error = %e,
692                    "quarantine provider resolution failed, quarantine disabled"
693                );
694                None
695            }
696        }
697    }
698
699    /// Build the guardrail filter when `security.guardrail.enabled = true`.
700    ///
701    /// Returns `None` when guardrail is disabled or provider resolution fails.
702    /// Emits a `tracing::warn` on resolution failure (guardrail silently disabled).
703    pub fn build_guardrail_filter(&self) -> Option<zeph_sanitizer::guardrail::GuardrailFilter> {
704        let (provider, config) = self.build_guardrail_provider()?;
705        match zeph_sanitizer::guardrail::GuardrailFilter::new(provider, &config) {
706            Ok(filter) => Some(filter),
707            Err(e) => {
708                tracing::warn!(error = %e, "guardrail filter construction failed, guardrail disabled");
709                None
710            }
711        }
712    }
713
714    /// Build the guardrail provider and config pair for use in multi-session contexts.
715    ///
716    /// Returns `None` when guardrail is disabled or provider resolution fails.
717    pub fn build_guardrail_provider(
718        &self,
719    ) -> Option<(AnyProvider, zeph_sanitizer::guardrail::GuardrailConfig)> {
720        let gc = &self.config.security.guardrail;
721        if !gc.enabled {
722            return None;
723        }
724        let provider_name = gc.provider.as_deref().unwrap_or("ollama");
725        match create_named_provider(provider_name, &self.config) {
726            Ok(p) => {
727                tracing::info!(
728                    provider = %provider_name,
729                    model = ?gc.model,
730                    "guardrail provider configured"
731                );
732                Some((p, gc.clone()))
733            }
734            Err(e) => {
735                tracing::warn!(
736                    provider = %provider_name,
737                    error = %e,
738                    "guardrail provider resolution failed, guardrail disabled"
739                );
740                None
741            }
742        }
743    }
744
745    /// Build a dedicated provider for the judge detector when `detector_mode = judge`.
746    ///
747    /// Returns `None` when mode is `Regex` or `judge_model` is empty (primary provider used).
748    /// Emits a `tracing::warn` when mode is `Judge` but no model is specified.
749    pub fn build_judge_provider(&self) -> Option<AnyProvider> {
750        use crate::config::DetectorMode;
751        let learning = &self.config.skills.learning;
752        if learning.detector_mode != DetectorMode::Judge {
753            return None;
754        }
755        if learning.judge_model.is_empty() {
756            tracing::warn!(
757                "detector_mode=judge but judge_model is empty — primary provider will be used for judging"
758            );
759            return None;
760        }
761        match create_named_provider(&learning.judge_model, &self.config) {
762            Ok(jp) => {
763                tracing::info!(model = %learning.judge_model, "judge provider configured");
764                Some(jp)
765            }
766            Err(e) => {
767                tracing::warn!("failed to create judge provider: {e:#}, using primary");
768                None
769            }
770        }
771    }
772
773    /// Build an `LlmClassifier` for `detector_mode = "model"` feedback detection.
774    ///
775    /// Resolves `feedback_provider` from `[[llm.providers]]` registry.
776    /// Pass the session's primary provider as `primary` for fallback when `feedback_provider`
777    /// is empty. Returns `None` with a warning on resolution failure — never fails startup.
778    pub fn build_feedback_classifier(
779        &self,
780        primary: &AnyProvider,
781    ) -> Option<zeph_llm::classifier::llm::LlmClassifier> {
782        use crate::config::DetectorMode;
783        let learning = &self.config.skills.learning;
784        if learning.detector_mode != DetectorMode::Model {
785            return None;
786        }
787        let provider = if learning.feedback_provider.is_empty() {
788            tracing::debug!("feedback_provider empty — using primary provider for LlmClassifier");
789            Some(primary.clone())
790        } else {
791            match crate::bootstrap::provider::create_named_provider(
792                &learning.feedback_provider,
793                &self.config,
794            ) {
795                Ok(p) => {
796                    tracing::info!(
797                        provider = %learning.feedback_provider,
798                        "LlmClassifier feedback provider configured"
799                    );
800                    Some(p)
801                }
802                Err(e) => {
803                    tracing::warn!(
804                        provider = %learning.feedback_provider,
805                        error = %e,
806                        "feedback_provider not found in registry, degrading to regex-only"
807                    );
808                    None
809                }
810            }
811        };
812        if let Some(p) = provider {
813            Some(zeph_llm::classifier::llm::LlmClassifier::new(
814                std::sync::Arc::new(p),
815            ))
816        } else {
817            tracing::warn!(
818                "detector_mode=model but no provider available, degrading to regex-only"
819            );
820            None
821        }
822    }
823
824    /// Build a dedicated provider for compaction probe LLM calls.
825    ///
826    /// Returns `None` when `probe_provider` is empty (falls back to summary provider at call site).
827    /// Emits a `tracing::warn` on resolution failure (summary/primary provider used as fallback).
828    pub fn build_probe_provider(&self) -> Option<AnyProvider> {
829        let name = &self.config.memory.compression.probe.probe_provider;
830        if name.is_empty() {
831            return None;
832        }
833        match create_named_provider(name, &self.config) {
834            Ok(p) => {
835                tracing::info!(provider = %name, "compaction probe provider configured");
836                Some(p)
837            }
838            Err(e) => {
839                tracing::warn!(
840                    provider = %name,
841                    error = %e,
842                    "probe provider resolution failed — summary/primary provider will be used"
843                );
844                None
845            }
846        }
847    }
848
849    /// Build a dedicated provider for `compress_context` LLM calls (#2356).
850    ///
851    /// Returns `None` when `compress_provider` is empty (falls back to primary provider at call site).
852    /// Emits a `tracing::warn` on resolution failure (primary provider used as fallback).
853    pub fn build_compress_provider(&self) -> Option<AnyProvider> {
854        let name = &self.config.memory.compression.compress_provider;
855        if name.is_empty() {
856            return None;
857        }
858        match create_named_provider(name, &self.config) {
859            Ok(p) => {
860                tracing::info!(provider = %name, "compress_context provider configured");
861                Some(p)
862            }
863            Err(e) => {
864                tracing::warn!(
865                    provider = %name,
866                    error = %e,
867                    "compress_context provider resolution failed — primary provider will be used"
868                );
869                None
870            }
871        }
872    }
873
874    /// Build a dedicated provider for ACON compression guidelines LLM calls.
875    ///
876    /// Returns `None` when `guidelines_provider` is empty (falls back to primary provider at call site).
877    ///
878    /// # Errors (logged, not propagated)
879    ///
880    /// Emits a `tracing::warn` on resolution failure; primary provider is used as fallback.
881    pub fn build_guidelines_provider(&self) -> Option<AnyProvider> {
882        let name = &self
883            .config
884            .memory
885            .compression_guidelines
886            .guidelines_provider;
887        if name.is_empty() {
888            return None;
889        }
890        match create_named_provider(name, &self.config) {
891            Ok(p) => {
892                tracing::info!(provider = %name, "compression guidelines provider configured");
893                Some(p)
894            }
895            Err(e) => {
896                tracing::warn!(
897                    provider = %name,
898                    error = %e,
899                    "guidelines provider resolution failed — primary provider will be used"
900                );
901                None
902            }
903        }
904    }
905
906    /// Build a dedicated provider for All-Mem consolidation LLM calls.
907    ///
908    /// Returns `None` when `consolidation_provider` is empty (falls back to primary provider at
909    /// call site) or when provider resolution fails (logs a warning, fails open).
910    pub fn build_consolidation_provider(&self) -> Option<AnyProvider> {
911        let name = &self.config.memory.consolidation.consolidation_provider;
912        if name.is_empty() {
913            return None;
914        }
915        match create_named_provider(name, &self.config) {
916            Ok(p) => {
917                tracing::info!(provider = %name, "consolidation provider configured");
918                Some(p)
919            }
920            Err(e) => {
921                tracing::warn!(
922                    provider = %name,
923                    error = %e,
924                    "consolidation provider resolution failed — primary provider will be used"
925                );
926                None
927            }
928        }
929    }
930
931    /// Build a dedicated provider for orchestration planner LLM calls.
932    ///
933    /// Returns `None` when `planner_provider` is empty (falls back to primary provider at call site).
934    ///
935    /// # Errors (logged, not propagated)
936    ///
937    /// Emits a `tracing::warn` on resolution failure; primary provider is used as fallback.
938    pub fn build_planner_provider(&self) -> Option<AnyProvider> {
939        let name = &self.config.orchestration.planner_provider;
940        if name.is_empty() {
941            return None;
942        }
943        match create_named_provider(name, &self.config) {
944            Ok(p) => {
945                tracing::info!(provider = %name, "planner provider configured");
946                Some(p)
947            }
948            Err(e) => {
949                tracing::warn!(
950                    provider = %name,
951                    error = %e,
952                    "planner provider resolution failed — primary provider will be used"
953                );
954                None
955            }
956        }
957    }
958
959    /// Build the `PlanVerifier` provider from `[orchestration] verify_provider`.
960    ///
961    /// Returns `None` when `verify_provider` is empty (falls back to the primary provider at
962    /// runtime) or when provider resolution fails (logs a warning, fails open).
963    pub fn build_verify_provider(&self) -> Option<AnyProvider> {
964        let name = &self.config.orchestration.verify_provider;
965        if name.is_empty() {
966            return None;
967        }
968        match create_named_provider(name, &self.config) {
969            Ok(p) => {
970                tracing::info!(provider = %name, "verify provider configured");
971                Some(p)
972            }
973            Err(e) => {
974                tracing::warn!(
975                    provider = %name,
976                    error = %e,
977                    "verify provider resolution failed — primary provider will be used"
978                );
979                None
980            }
981        }
982    }
983    pub fn build_eval_provider(&self) -> Option<AnyProvider> {
984        let model_spec = self.config.experiments.eval_model.as_deref()?;
985        match create_summary_provider(model_spec, &self.config) {
986            Ok(p) => {
987                tracing::info!(eval_model = %model_spec, "experiment eval provider configured");
988                Some(p)
989            }
990            Err(e) => {
991                tracing::warn!(
992                    eval_model = %model_spec,
993                    error = %e,
994                    "failed to create eval provider — primary provider will be used as judge"
995                );
996                None
997            }
998        }
999    }
1000
1001    /// Build a dedicated provider for `MemScene` label/profile LLM generation.
1002    ///
1003    /// Returns `None` when `tiers.scene_provider` is empty (caller falls back to primary provider).
1004    /// Emits a `tracing::warn` on resolution failure; primary provider is used as fallback.
1005    pub fn build_scene_provider(&self) -> Option<AnyProvider> {
1006        let name = &self.config.memory.tiers.scene_provider;
1007        if name.is_empty() {
1008            return None;
1009        }
1010        match create_named_provider(name, &self.config) {
1011            Ok(p) => {
1012                tracing::info!(provider = %name, "scene consolidation provider configured");
1013                Some(p)
1014            }
1015            Err(e) => {
1016                tracing::warn!(
1017                    provider = %name,
1018                    error = %e,
1019                    "scene provider resolution failed — primary provider will be used"
1020                );
1021                None
1022            }
1023        }
1024    }
1025}
1026
1027#[cfg(test)]
1028mod tests;