zeph_core/agent/state/
mod.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Sub-struct definitions for the `Agent` struct.
5//!
6//! Each struct groups a related cluster of `Agent` fields.
7//! All types are `pub(crate)` — visible only within the `zeph-core` crate.
8//!
9//! `MemoryState` is decomposed into four concern-separated sub-structs, each in its own file:
10//!
11//! - [`MemoryPersistenceState`] — `SQLite` handles, conversation IDs, recall budgets, autosave
12//! - [`MemoryCompactionState`] — summarization thresholds, shutdown summary, digest, strategy
13//! - [`MemoryExtractionState`] — graph config, RPE router, document config, semantic labels
14//! - [`MemorySubsystemState`] — `TiMem`, `autoDream`, `MagicDocs`, microcompact
15
16pub(crate) mod compaction;
17pub(crate) mod extraction;
18pub(crate) mod persistence;
19pub(crate) mod subsystems;
20
21pub(crate) use self::compaction::MemoryCompactionState;
22pub(crate) use self::extraction::MemoryExtractionState;
23pub(crate) use self::persistence::MemoryPersistenceState;
24pub(crate) use self::subsystems::MemorySubsystemState;
25
26use std::collections::{HashMap, HashSet, VecDeque};
27use std::path::PathBuf;
28use std::sync::Arc;
29
30use parking_lot::RwLock;
31use std::time::Instant;
32
33use tokio::sync::{Notify, mpsc, watch};
34use tokio::task::JoinHandle;
35use tokio_util::sync::CancellationToken;
36use zeph_llm::any::AnyProvider;
37use zeph_llm::provider::Message;
38use zeph_llm::stt::SpeechToText;
39
40use crate::config::{ProviderEntry, SecurityConfig, SkillPromptMode, TimeoutConfig};
41use crate::config_watcher::ConfigEvent;
42use crate::context::EnvironmentContext;
43use crate::cost::CostTracker;
44use crate::file_watcher::FileChangedEvent;
45use crate::instructions::{InstructionBlock, InstructionEvent, InstructionReloadState};
46use crate::metrics::MetricsSnapshot;
47use crate::vault::Secret;
48use zeph_config;
49use zeph_memory::TokenCounter;
50use zeph_sanitizer::ContentSanitizer;
51use zeph_sanitizer::quarantine::QuarantinedSummarizer;
52use zeph_skills::matcher::SkillMatcherBackend;
53use zeph_skills::registry::SkillRegistry;
54use zeph_skills::watcher::SkillEvent;
55
56use super::message_queue::QueuedMessage;
57
58/// Coordinator struct holding four concern-separated sub-structs for memory management.
59///
60/// Each sub-struct groups fields by a single concern:
61/// - [`persistence`](MemoryPersistenceState) — `SQLite` handles, conversation IDs, recall budgets
62/// - [`compaction`](MemoryCompactionState) — summarization thresholds, shutdown summary, digest
63/// - [`extraction`](MemoryExtractionState) — graph config, RPE router, semantic labels
64/// - [`subsystems`](MemorySubsystemState) — `TiMem`, `autoDream`, `MagicDocs`, microcompact
65#[derive(Default)]
66pub(crate) struct MemoryState {
67    /// `SQLite` handles, conversation IDs, recall budgets, and autosave policy.
68    pub(crate) persistence: MemoryPersistenceState,
69    /// Summarization thresholds, shutdown summary, digest config, and context strategy.
70    pub(crate) compaction: MemoryCompactionState,
71    /// Graph extraction config, RPE router, document config, and semantic label configs.
72    pub(crate) extraction: MemoryExtractionState,
73    /// `TiMem`, `autoDream`, `MagicDocs`, and microcompact subsystem state.
74    pub(crate) subsystems: MemorySubsystemState,
75}
76
77pub(crate) struct SkillState {
78    pub(crate) registry: Arc<RwLock<SkillRegistry>>,
79    pub(crate) skill_paths: Vec<PathBuf>,
80    pub(crate) managed_dir: Option<PathBuf>,
81    pub(crate) trust_config: crate::config::TrustConfig,
82    pub(crate) matcher: Option<SkillMatcherBackend>,
83    pub(crate) max_active_skills: usize,
84    pub(crate) disambiguation_threshold: f32,
85    pub(crate) min_injection_score: f32,
86    pub(crate) embedding_model: String,
87    pub(crate) skill_reload_rx: Option<mpsc::Receiver<SkillEvent>>,
88    pub(crate) active_skill_names: Vec<String>,
89    pub(crate) last_skills_prompt: String,
90    pub(crate) prompt_mode: SkillPromptMode,
91    /// Custom secrets available at runtime: key=hyphenated name, value=secret.
92    pub(crate) available_custom_secrets: HashMap<String, Secret>,
93    pub(crate) cosine_weight: f32,
94    pub(crate) hybrid_search: bool,
95    pub(crate) bm25_index: Option<zeph_skills::bm25::Bm25Index>,
96    pub(crate) two_stage_matching: bool,
97    /// Threshold for confusability warnings (0.0 = disabled).
98    pub(crate) confusability_threshold: f32,
99    /// `SkillOrchestra` RL routing head. `Some` when `rl_routing_enabled = true` and
100    /// weights are loaded or initialized. `None` when RL routing is disabled.
101    pub(crate) rl_head: Option<zeph_skills::rl_head::RoutingHead>,
102    /// Blend weight for RL routing: `final = (1-rl_weight)*cosine + rl_weight*rl_score`.
103    pub(crate) rl_weight: f32,
104    /// Skip RL blending for the first N updates (cold-start warmup).
105    pub(crate) rl_warmup_updates: u32,
106    /// Directory where `/skill create` writes generated skills.
107    /// Defaults to `managed_dir` if `None`.
108    pub(crate) generation_output_dir: Option<std::path::PathBuf>,
109    /// Provider name for `/skill create` generation. Empty = primary.
110    pub(crate) generation_provider_name: String,
111}
112
113pub(crate) struct McpState {
114    pub(crate) tools: Vec<zeph_mcp::McpTool>,
115    pub(crate) registry: Option<zeph_mcp::McpToolRegistry>,
116    pub(crate) manager: Option<std::sync::Arc<zeph_mcp::McpManager>>,
117    pub(crate) allowed_commands: Vec<String>,
118    pub(crate) max_dynamic: usize,
119    /// Receives elicitation requests from MCP server handlers during tool execution.
120    /// When `Some`, the agent loop must process these concurrently with tool result awaiting
121    /// to avoid deadlock (tool result waits for elicitation, elicitation waits for agent loop).
122    pub(crate) elicitation_rx: Option<tokio::sync::mpsc::Receiver<zeph_mcp::ElicitationEvent>>,
123    /// Shared with `McpToolExecutor` so native `tool_use` sees the current tool list.
124    ///
125    /// Two methods write to this `RwLock` — ordering matters:
126    /// - `sync_executor_tools()`: writes the **full** `self.tools` set.
127    /// - `apply_pruned_tools()`: writes the **pruned** subset (used after pruning).
128    ///
129    /// Within a turn, `sync_executor_tools` must always run **before**
130    /// `apply_pruned_tools`.  The normal call order guarantees this: tool-list
131    /// change events call `sync_executor_tools` (inside `check_tool_refresh`,
132    /// `handle_mcp_add`, `handle_mcp_remove`), and pruning runs later inside
133    /// `rebuild_system_prompt`.  See also: `apply_pruned_tools`.
134    pub(crate) shared_tools: Option<Arc<RwLock<Vec<zeph_mcp::McpTool>>>>,
135    /// Receives full flattened tool list after any `tools/list_changed` notification.
136    pub(crate) tool_rx: Option<tokio::sync::watch::Receiver<Vec<zeph_mcp::McpTool>>>,
137    /// Per-server connection outcomes from the initial `connect_all()` call.
138    pub(crate) server_outcomes: Vec<zeph_mcp::ServerConnectOutcome>,
139    /// Per-message cache for MCP tool pruning results (#2298).
140    ///
141    /// Reset at the start of each user turn and whenever the MCP tool list
142    /// changes (via `tools/list_changed`, `/mcp add`, or `/mcp remove`).
143    pub(crate) pruning_cache: zeph_mcp::PruningCache,
144    /// Dedicated provider for MCP tool pruning LLM calls.
145    ///
146    /// `None` means fall back to the agent's primary provider.
147    /// Resolved from `[[llm.providers]]` at build time using `pruning_provider`
148    /// from `ToolPruningConfig`.
149    pub(crate) pruning_provider: Option<zeph_llm::any::AnyProvider>,
150    /// Whether MCP tool pruning is enabled.  Mirrors `ToolPruningConfig::enabled`.
151    pub(crate) pruning_enabled: bool,
152    /// Pruning parameters snapshot.  Derived from `ToolPruningConfig` at build time.
153    pub(crate) pruning_params: zeph_mcp::PruningParams,
154    /// Pre-computed semantic tool index for embedding-based discovery (#2321).
155    ///
156    /// Built at connect time via `rebuild_semantic_index()`, rebuilt on tool list change.
157    /// `None` when strategy is not `Embedding` or when build failed (fallback to all tools).
158    pub(crate) semantic_index: Option<zeph_mcp::SemanticToolIndex>,
159    /// Active discovery strategy and parameters.  Derived from `ToolDiscoveryConfig`.
160    pub(crate) discovery_strategy: zeph_mcp::ToolDiscoveryStrategy,
161    /// Discovery parameters snapshot.  Derived from `ToolDiscoveryConfig` at build time.
162    pub(crate) discovery_params: zeph_mcp::DiscoveryParams,
163    /// Dedicated embedding provider for tool discovery.  `None` = fall back to the
164    /// agent's primary embedding provider.
165    pub(crate) discovery_provider: Option<zeph_llm::any::AnyProvider>,
166    /// When `true`, show a security warning before prompting for fields whose names
167    /// match sensitive patterns (password, token, secret, key, credential, etc.).
168    pub(crate) elicitation_warn_sensitive_fields: bool,
169    /// When `true`, semantic index and registry need to be rebuilt at the next opportunity.
170    ///
171    /// Set after `/mcp add` or `/mcp remove` when called via `AgentAccess::handle_mcp`,
172    /// which cannot call `rebuild_semantic_index` and `sync_mcp_registry` directly because
173    /// those are `async fn(&mut self)` and their futures are `!Send` (they hold `&mut Agent<C>`
174    /// across `.await`). The rebuild is deferred to `check_tool_refresh`, which runs at the
175    /// start of each turn without the `Box<dyn Future + Send>` constraint.
176    pub(crate) pending_semantic_rebuild: bool,
177}
178
179pub(crate) struct IndexState {
180    pub(crate) retriever: Option<std::sync::Arc<zeph_index::retriever::CodeRetriever>>,
181    pub(crate) repo_map_tokens: usize,
182    pub(crate) cached_repo_map: Option<(String, std::time::Instant)>,
183    pub(crate) repo_map_ttl: std::time::Duration,
184}
185
186/// Snapshot of adversarial policy gate configuration for status display.
187#[derive(Debug, Clone)]
188pub struct AdversarialPolicyInfo {
189    pub provider: String,
190    pub policy_count: usize,
191    pub fail_open: bool,
192}
193
194pub(crate) struct RuntimeConfig {
195    pub(crate) security: SecurityConfig,
196    pub(crate) timeouts: TimeoutConfig,
197    pub(crate) model_name: String,
198    /// Configured name from `[[llm.providers]]` (the `name` field), set at startup and on
199    /// `/provider` switch. Falls back to the provider type string when empty.
200    pub(crate) active_provider_name: String,
201    pub(crate) permission_policy: zeph_tools::PermissionPolicy,
202    pub(crate) redact_credentials: bool,
203    pub(crate) rate_limiter: super::rate_limiter::ToolRateLimiter,
204    pub(crate) semantic_cache_enabled: bool,
205    pub(crate) semantic_cache_threshold: f32,
206    pub(crate) semantic_cache_max_candidates: u32,
207    /// Dependency config snapshot stored for per-turn boost parameters.
208    pub(crate) dependency_config: zeph_tools::DependencyConfig,
209    /// Adversarial policy gate runtime info for /status display.
210    pub(crate) adversarial_policy_info: Option<AdversarialPolicyInfo>,
211    /// Current spawn depth of this agent instance (0 = top-level, 1 = first sub-agent, etc.).
212    /// Used by `build_spawn_context()` to propagate depth to children.
213    pub(crate) spawn_depth: u32,
214    /// Inject `<budget>` XML into the volatile system prompt section (#2267).
215    pub(crate) budget_hint_enabled: bool,
216    /// Per-channel skill allowlist. Skills not matching the allowlist are excluded from the
217    /// prompt. An empty `allowed` list means all skills are permitted (default).
218    pub(crate) channel_skills: zeph_config::ChannelSkillsConfig,
219    /// Runtime middleware layers for LLM calls and tool dispatch (#2286).
220    ///
221    /// Default: empty vec (zero-cost — loops never iterate).
222    pub(crate) layers: Vec<std::sync::Arc<dyn crate::runtime_layer::RuntimeLayer>>,
223    /// Background supervisor config snapshot for turn-boundary abort logic.
224    pub(crate) supervisor_config: crate::config::TaskSupervisorConfig,
225}
226
227/// Groups feedback detection subsystems: correction detector, judge detector, and LLM classifier.
228pub(crate) struct FeedbackState {
229    pub(crate) detector: super::feedback_detector::FeedbackDetector,
230    pub(crate) judge: Option<super::feedback_detector::JudgeDetector>,
231    /// LLM-backed zero-shot classifier for `DetectorMode::Model`.
232    /// When `Some`, `spawn_judge_correction_check` uses this instead of `JudgeDetector`.
233    pub(crate) llm_classifier: Option<zeph_llm::classifier::llm::LlmClassifier>,
234}
235
236/// Groups security-related subsystems (sanitizer, quarantine, exfiltration guard).
237pub(crate) struct SecurityState {
238    pub(crate) sanitizer: ContentSanitizer,
239    pub(crate) quarantine_summarizer: Option<QuarantinedSummarizer>,
240    /// Whether this agent session is serving an ACP client.
241    /// When `true` and `mcp_to_acp_boundary` is enabled, MCP tool results
242    /// receive unconditional quarantine and cross-boundary audit logging.
243    pub(crate) is_acp_session: bool,
244    pub(crate) exfiltration_guard: zeph_sanitizer::exfiltration::ExfiltrationGuard,
245    pub(crate) flagged_urls: HashSet<String>,
246    /// URLs explicitly provided by the user across all turns in this session.
247    /// Populated from raw user message text; cleared on `/clear`.
248    /// Shared with `UrlGroundingVerifier` to check `fetch`/`web_scrape` calls at dispatch time.
249    pub(crate) user_provided_urls: Arc<RwLock<HashSet<String>>>,
250    pub(crate) pii_filter: zeph_sanitizer::pii::PiiFilter,
251    /// NER classifier for PII detection (`classifiers.ner_model`). When `Some`, the PII path
252    /// runs both regex (`pii_filter`) and NER, then merges spans before redaction.
253    /// `None` when `classifiers` feature is disabled or `classifiers.enabled = false`.
254    #[cfg(feature = "classifiers")]
255    pub(crate) pii_ner_backend: Option<std::sync::Arc<dyn zeph_llm::classifier::ClassifierBackend>>,
256    /// Per-call timeout for the NER PII classifier in milliseconds.
257    #[cfg(feature = "classifiers")]
258    pub(crate) pii_ner_timeout_ms: u64,
259    /// Maximum number of bytes passed to the NER PII classifier per call.
260    ///
261    /// Large tool outputs (e.g. `search_code`) can produce 150+ `DeBERTa` chunks and exceed
262    /// the per-call timeout. Input is truncated at a valid UTF-8 boundary before classification.
263    #[cfg(feature = "classifiers")]
264    pub(crate) pii_ner_max_chars: usize,
265    /// Circuit-breaker threshold: number of consecutive timeouts before NER is disabled.
266    /// `0` means the circuit breaker is disabled (NER is always attempted).
267    #[cfg(feature = "classifiers")]
268    pub(crate) pii_ner_circuit_breaker_threshold: u32,
269    /// Number of consecutive NER timeouts observed since the last successful call.
270    #[cfg(feature = "classifiers")]
271    pub(crate) pii_ner_consecutive_timeouts: u32,
272    /// Set to `true` when the circuit breaker trips. NER is skipped for the rest of the session.
273    #[cfg(feature = "classifiers")]
274    pub(crate) pii_ner_tripped: bool,
275    pub(crate) memory_validator: zeph_sanitizer::memory_validation::MemoryWriteValidator,
276    /// LLM-based prompt injection pre-screener (opt-in).
277    pub(crate) guardrail: Option<zeph_sanitizer::guardrail::GuardrailFilter>,
278    /// Post-LLM response verification layer.
279    pub(crate) response_verifier: zeph_sanitizer::response_verifier::ResponseVerifier,
280    /// Temporal causal IPI analyzer (opt-in, disabled when `None`).
281    pub(crate) causal_analyzer: Option<zeph_sanitizer::causal_ipi::TurnCausalAnalyzer>,
282}
283
284/// Groups debug/diagnostics subsystems (dumper, trace collector, anomaly detector, logging config).
285pub(crate) struct DebugState {
286    pub(crate) debug_dumper: Option<crate::debug_dump::DebugDumper>,
287    pub(crate) dump_format: crate::debug_dump::DumpFormat,
288    pub(crate) trace_collector: Option<crate::debug_dump::trace::TracingCollector>,
289    /// Monotonically increasing counter for `process_user_message` calls.
290    /// Used to key spans in `trace_collector.active_iterations`.
291    pub(crate) iteration_counter: usize,
292    pub(crate) anomaly_detector: Option<zeph_tools::AnomalyDetector>,
293    /// Whether to emit `reasoning_amplification` warnings for quality failures from reasoning
294    /// models. Mirrors `AnomalyConfig::reasoning_model_warning`. Default: `true`.
295    pub(crate) reasoning_model_warning: bool,
296    pub(crate) logging_config: crate::config::LoggingConfig,
297    /// Base dump directory — stored so `/dump-format trace` can create a `TracingCollector` (CR-04).
298    pub(crate) dump_dir: Option<PathBuf>,
299    /// Service name for `TracingCollector` created via runtime format switch (CR-04).
300    pub(crate) trace_service_name: String,
301    /// Whether to redact in `TracingCollector` created via runtime format switch (CR-04).
302    pub(crate) trace_redact: bool,
303    /// Span ID of the currently executing iteration — used by LLM/tool span wiring (CR-01).
304    /// Set to `Some` at the start of `process_user_message`, cleared at end.
305    pub(crate) current_iteration_span_id: Option<[u8; 8]>,
306}
307
308/// Groups agent lifecycle state: shutdown signaling, timing, and I/O notification channels.
309pub(crate) struct LifecycleState {
310    pub(crate) shutdown: watch::Receiver<bool>,
311    pub(crate) start_time: Instant,
312    pub(crate) cancel_signal: Arc<Notify>,
313    pub(crate) cancel_token: CancellationToken,
314    /// Handle to the cancel bridge task spawned each turn. Aborted before a new one is created
315    /// to prevent unbounded task accumulation across turns.
316    pub(crate) cancel_bridge_handle: Option<JoinHandle<()>>,
317    pub(crate) config_path: Option<PathBuf>,
318    pub(crate) config_reload_rx: Option<mpsc::Receiver<ConfigEvent>>,
319    pub(crate) warmup_ready: Option<watch::Receiver<bool>>,
320    pub(crate) update_notify_rx: Option<mpsc::Receiver<String>>,
321    pub(crate) custom_task_rx: Option<mpsc::Receiver<String>>,
322    /// Last known process cwd. Compared after each tool call to detect changes.
323    pub(crate) last_known_cwd: PathBuf,
324    /// Receiver for file-change events from `FileChangeWatcher`. `None` when no paths configured.
325    pub(crate) file_changed_rx: Option<mpsc::Receiver<FileChangedEvent>>,
326    /// Keeps the `FileChangeWatcher` alive for the agent's lifetime. Dropping it aborts the watcher task.
327    pub(crate) file_watcher: Option<crate::file_watcher::FileChangeWatcher>,
328    /// Supervised background task manager. Owned by the agent; call `reap()` between turns
329    /// and `abort_all()` on shutdown.
330    pub(crate) supervisor: super::supervisor::BackgroundSupervisor,
331}
332
333/// Minimal config snapshot needed to reconstruct a provider at runtime via `/provider <name>`.
334///
335/// Secrets are stored as plain strings because [`Secret`] intentionally does not implement
336/// `Clone`. They are re-wrapped in `Secret` when passed to `build_provider_for_switch`.
337pub struct ProviderConfigSnapshot {
338    pub claude_api_key: Option<String>,
339    pub openai_api_key: Option<String>,
340    pub gemini_api_key: Option<String>,
341    pub compatible_api_keys: std::collections::HashMap<String, String>,
342    pub llm_request_timeout_secs: u64,
343    pub embedding_model: String,
344}
345
346/// Groups provider-related state: alternate providers, runtime switching, and compaction flags.
347pub(crate) struct ProviderState {
348    pub(crate) summary_provider: Option<AnyProvider>,
349    /// Shared slot for runtime model switching; set by external caller (e.g. ACP).
350    pub(crate) provider_override: Option<Arc<RwLock<Option<AnyProvider>>>>,
351    pub(crate) judge_provider: Option<AnyProvider>,
352    /// Dedicated provider for compaction probe LLM calls. Falls back to `summary_provider`
353    /// (or primary) when `None`.
354    pub(crate) probe_provider: Option<AnyProvider>,
355    /// Dedicated provider for `compress_context` LLM calls (#2356).
356    /// Falls back to the primary provider when `None`.
357    pub(crate) compress_provider: Option<AnyProvider>,
358    pub(crate) cached_prompt_tokens: u64,
359    /// Whether the active provider has server-side compaction enabled (Claude compact-2026-01-12).
360    /// When true, client-side compaction is skipped.
361    pub(crate) server_compaction_active: bool,
362    pub(crate) stt: Option<Box<dyn SpeechToText>>,
363    /// Snapshot of `[[llm.providers]]` entries for runtime `/provider` switching.
364    pub(crate) provider_pool: Vec<ProviderEntry>,
365    /// Resolved secrets and timeout settings needed to reconstruct providers at runtime.
366    pub(crate) provider_config_snapshot: Option<ProviderConfigSnapshot>,
367}
368
369/// Groups metrics and cost tracking state.
370pub(crate) struct MetricsState {
371    pub(crate) metrics_tx: Option<watch::Sender<MetricsSnapshot>>,
372    pub(crate) cost_tracker: Option<CostTracker>,
373    pub(crate) token_counter: Arc<TokenCounter>,
374    /// Set to `true` when Claude extended context (`enable_extended_context = true`) is active.
375    /// Read from config at build time, not derived from provider internals.
376    pub(crate) extended_context: bool,
377    /// Shared classifier latency ring buffer. Populated by `ContentSanitizer` (injection, PII)
378    /// and `LlmClassifier` (feedback). `None` when classifiers are not configured.
379    pub(crate) classifier_metrics: Option<Arc<zeph_llm::ClassifierMetrics>>,
380    /// Rolling window of per-turn latency samples (last 10 turns).
381    pub(crate) timing_window: std::collections::VecDeque<crate::metrics::TurnTimings>,
382    /// Accumulator for the current turn's timings. Flushed at turn end via `flush_turn_timings`.
383    pub(crate) pending_timings: crate::metrics::TurnTimings,
384    /// Optional histogram recorder for per-event Prometheus observations.
385    /// `None` when the `prometheus` feature is disabled or metrics are not enabled.
386    pub(crate) histogram_recorder: Option<std::sync::Arc<dyn crate::metrics::HistogramRecorder>>,
387}
388
389/// Groups task orchestration and subagent state.
390#[derive(Default)]
391pub(crate) struct OrchestrationState {
392    /// On `OrchestrationState` (not `ProviderState`) because this provider is used exclusively
393    /// by `LlmPlanner` during orchestration, not shared across subsystems.
394    pub(crate) planner_provider: Option<AnyProvider>,
395    /// Provider for `PlanVerifier` LLM calls. `None` falls back to the primary provider.
396    /// On `OrchestrationState` for the same reason as `planner_provider`.
397    pub(crate) verify_provider: Option<AnyProvider>,
398    /// Graph waiting for `/plan confirm` before execution starts.
399    pub(crate) pending_graph: Option<zeph_orchestration::TaskGraph>,
400    /// Cancellation token for the currently executing plan. `None` when no plan is running.
401    /// Created fresh in `handle_plan_confirm()`, cancelled in `handle_plan_cancel()`.
402    ///
403    /// # Known limitation
404    ///
405    /// Token plumbing is ready; the delivery path requires the agent message loop to be
406    /// restructured so `/plan cancel` can be received while `run_scheduler_loop` holds
407    /// `&mut self`. See follow-up issue #1603 (SEC-M34-002).
408    pub(crate) plan_cancel_token: Option<CancellationToken>,
409    /// Manages spawned sub-agents.
410    pub(crate) subagent_manager: Option<zeph_subagent::SubAgentManager>,
411    pub(crate) subagent_config: crate::config::SubAgentConfig,
412    pub(crate) orchestration_config: crate::config::OrchestrationConfig,
413    /// Lazily initialized plan template cache. `None` until first use or when
414    /// memory (`SQLite`) is unavailable.
415    #[allow(dead_code)]
416    pub(crate) plan_cache: Option<zeph_orchestration::PlanCache>,
417    /// Goal embedding from the most recent `plan_with_cache()` call. Consumed by
418    /// `finalize_plan_execution()` to cache the completed plan template.
419    pub(crate) pending_goal_embedding: Option<Vec<f32>>,
420}
421
422/// Groups instruction hot-reload state.
423#[derive(Default)]
424pub(crate) struct InstructionState {
425    pub(crate) blocks: Vec<InstructionBlock>,
426    pub(crate) reload_rx: Option<mpsc::Receiver<InstructionEvent>>,
427    pub(crate) reload_state: Option<InstructionReloadState>,
428}
429
430/// Groups experiment feature state (gated behind `experiments` feature flag).
431pub(crate) struct ExperimentState {
432    pub(crate) config: crate::config::ExperimentConfig,
433    /// Cancellation token for a running experiment session. `Some` means an experiment is active.
434    pub(crate) cancel: Option<tokio_util::sync::CancellationToken>,
435    /// Pre-built config snapshot used as the experiment baseline (agent path).
436    pub(crate) baseline: zeph_experiments::ConfigSnapshot,
437    /// Dedicated judge provider for evaluation. When `Some`, the evaluator uses this provider
438    /// instead of the agent's primary provider, eliminating self-judge bias.
439    pub(crate) eval_provider: Option<AnyProvider>,
440    /// Receives completion/error messages from the background experiment engine task.
441    /// Always present so the select! branch compiles unconditionally.
442    pub(crate) notify_rx: Option<tokio::sync::mpsc::Receiver<String>>,
443    /// Sender end paired with `experiment_notify_rx`. Cloned into the background task.
444    pub(crate) notify_tx: tokio::sync::mpsc::Sender<String>,
445}
446
447/// Output of a background subgoal extraction LLM call.
448pub(crate) struct SubgoalExtractionResult {
449    /// Current subgoal the agent is working toward.
450    pub(crate) current: String,
451    /// Just-completed subgoal, if the LLM detected a transition (`COMPLETED:` non-NONE).
452    pub(crate) completed: Option<String>,
453}
454
455/// Groups context-compression feature state (gated behind `context-compression` feature flag).
456#[derive(Default)]
457pub(crate) struct CompressionState {
458    /// Cached task goal for TaskAware/MIG pruning. Set by `maybe_compact()`,
459    /// invalidated when the last user message hash changes.
460    pub(crate) current_task_goal: Option<String>,
461    /// Hash of the last user message when `current_task_goal` was populated.
462    pub(crate) task_goal_user_msg_hash: Option<u64>,
463    /// Pending background task for goal extraction. Spawned fire-and-forget when the user message
464    /// hash changes; result applied at the start of the next Soft compaction (#1909).
465    pub(crate) pending_task_goal: Option<tokio::task::JoinHandle<Option<String>>>,
466    /// Pending `SideQuest` eviction result from the background LLM call spawned last turn.
467    /// Applied at the START of the next turn before compaction (PERF-1 fix).
468    pub(crate) pending_sidequest_result: Option<tokio::task::JoinHandle<Option<Vec<usize>>>>,
469    /// In-memory subgoal registry for `Subgoal`/`SubgoalMig` pruning strategies (#2022).
470    pub(crate) subgoal_registry: crate::agent::compaction_strategy::SubgoalRegistry,
471    /// Pending background subgoal extraction task.
472    pub(crate) pending_subgoal: Option<tokio::task::JoinHandle<Option<SubgoalExtractionResult>>>,
473    /// Hash of the last user message when subgoal extraction was scheduled.
474    pub(crate) subgoal_user_msg_hash: Option<u64>,
475}
476
477/// Groups runtime tool filtering, dependency tracking, and iteration bookkeeping.
478#[derive(Default)]
479pub(crate) struct ToolState {
480    /// Dynamic tool schema filter: pre-computed tool embeddings for per-turn filtering (#2020).
481    pub(crate) tool_schema_filter: Option<zeph_tools::ToolSchemaFilter>,
482    /// Cached filtered tool IDs for the current user turn.
483    pub(crate) cached_filtered_tool_ids: Option<HashSet<String>>,
484    /// Tool dependency graph for sequential tool availability (#2024).
485    pub(crate) dependency_graph: Option<zeph_tools::ToolDependencyGraph>,
486    /// Always-on tool IDs, mirrored from the tool schema filter for dependency gate bypass.
487    pub(crate) dependency_always_on: HashSet<String>,
488    /// Tool IDs that completed successfully in the current session.
489    pub(crate) completed_tool_ids: HashSet<String>,
490    /// Current tool loop iteration index within the active user turn.
491    pub(crate) current_tool_iteration: usize,
492}
493
494/// Groups per-session I/O and policy state.
495pub(crate) struct SessionState {
496    pub(crate) env_context: EnvironmentContext,
497    /// Timestamp of the last assistant message appended to context.
498    /// Used by time-based microcompact to compute session idle gap (#2699).
499    /// `None` before the first assistant response.
500    pub(crate) last_assistant_at: Option<Instant>,
501    pub(crate) response_cache: Option<std::sync::Arc<zeph_memory::ResponseCache>>,
502    /// Parent tool call ID when this agent runs as a subagent inside another agent session.
503    /// Propagated into every `LoopbackEvent::ToolStart` / `ToolOutput` so the IDE can build
504    /// a subagent hierarchy.
505    pub(crate) parent_tool_use_id: Option<String>,
506    /// Optional status channel for sending spinner/status messages to TUI or stderr.
507    pub(crate) status_tx: Option<tokio::sync::mpsc::UnboundedSender<String>>,
508    /// LSP context injection hooks. Fires after native tool execution, injects
509    /// diagnostics/hover notes as `Role::System` messages before the next LLM call.
510    pub(crate) lsp_hooks: Option<crate::lsp_hooks::LspHookRunner>,
511    /// Snapshot of the policy config for `/policy` command inspection.
512    pub(crate) policy_config: Option<zeph_tools::PolicyConfig>,
513    /// `CwdChanged` hook definitions extracted from `[hooks]` config.
514    pub(crate) hooks_config: HooksConfigSnapshot,
515}
516
517/// Extracted hook lists from `[hooks]` config, stored in `SessionState`.
518#[derive(Default)]
519pub(crate) struct HooksConfigSnapshot {
520    /// Hooks fired when working directory changes.
521    pub(crate) cwd_changed: Vec<zeph_config::HookDef>,
522    /// Hooks fired when a watched file changes.
523    pub(crate) file_changed_hooks: Vec<zeph_config::HookDef>,
524}
525
526// Groups message buffering and image staging state.
527pub(crate) struct MessageState {
528    pub(crate) messages: Vec<Message>,
529    // QueuedMessage is pub(super) in message_queue — same visibility as this struct; lint suppressed.
530    #[allow(private_interfaces)]
531    pub(crate) message_queue: VecDeque<QueuedMessage>,
532    /// Image parts staged by `/image` commands, attached to the next user message.
533    pub(crate) pending_image_parts: Vec<zeph_llm::provider::MessagePart>,
534    /// DB row ID of the most recently persisted message. Set by `persist_message`;
535    /// consumed by `push_message` call sites to populate `metadata.db_id` on in-memory messages.
536    pub(crate) last_persisted_message_id: Option<i64>,
537    /// DB message IDs pending hide after deferred tool pair summarization.
538    pub(crate) deferred_db_hide_ids: Vec<i64>,
539    /// Summary texts pending insertion after deferred tool pair summarization.
540    pub(crate) deferred_db_summaries: Vec<String>,
541}
542
543impl McpState {
544    /// Write the **full** `self.tools` set to the shared executor `RwLock`.
545    ///
546    /// This is the first of two writers to `shared_tools`. Within a turn this method must run
547    /// **before** `apply_pruned_tools`, which writes the pruned subset. The normal call order
548    /// guarantees this: tool-list change events call this method, and pruning runs later inside
549    /// `rebuild_system_prompt`. See also: `apply_pruned_tools`.
550    pub(crate) fn sync_executor_tools(&self) {
551        if let Some(ref shared) = self.shared_tools {
552            shared.write().clone_from(&self.tools);
553        }
554    }
555
556    /// Write the **pruned** tool subset to the shared executor `RwLock`.
557    ///
558    /// Must only be called **after** `sync_executor_tools` has established the full tool set for
559    /// the current turn. `self.tools` (the full set) is intentionally **not** modified.
560    ///
561    /// This method must **NOT** call `sync_executor_tools` internally — doing so would overwrite
562    /// the pruned subset with the full set. See also: `sync_executor_tools`.
563    pub(crate) fn apply_pruned_tools(&self, pruned: Vec<zeph_mcp::McpTool>) {
564        debug_assert!(
565            pruned.iter().all(|p| self
566                .tools
567                .iter()
568                .any(|t| t.server_id == p.server_id && t.name == p.name)),
569            "pruned set must be a subset of self.tools"
570        );
571        if let Some(ref shared) = self.shared_tools {
572            *shared.write() = pruned;
573        }
574    }
575
576    #[cfg(test)]
577    pub(crate) fn tool_count(&self) -> usize {
578        self.tools.len()
579    }
580}
581
582impl IndexState {
583    pub(crate) async fn fetch_code_rag(
584        &self,
585        query: &str,
586        token_budget: usize,
587    ) -> Result<Option<String>, crate::agent::error::AgentError> {
588        let Some(retriever) = &self.retriever else {
589            return Ok(None);
590        };
591        if token_budget == 0 {
592            return Ok(None);
593        }
594
595        let result = retriever
596            .retrieve(query, token_budget)
597            .await
598            .map_err(|e| crate::agent::error::AgentError::Other(format!("{e:#}")))?;
599        let context_text = zeph_index::retriever::format_as_context(&result);
600
601        if context_text.is_empty() {
602            Ok(None)
603        } else {
604            tracing::debug!(
605                strategy = ?result.strategy,
606                chunks = result.chunks.len(),
607                tokens = result.total_tokens,
608                "code context fetched"
609            );
610            Ok(Some(context_text))
611        }
612    }
613
614    /// Return `Some(self)` when code indexing is enabled, `None` otherwise.
615    ///
616    /// Used by `prepare_context` to pass an optional `IndexAccess` reference to
617    /// `zeph_context::assembler::ContextAssembler::gather` without wrapping the whole state.
618    pub(crate) fn as_index_access(&self) -> Option<&dyn zeph_context::input::IndexAccess> {
619        if self.retriever.is_some() {
620            Some(self)
621        } else {
622            None
623        }
624    }
625}
626
627impl DebugState {
628    pub(crate) fn start_iteration_span(&mut self, iteration_index: usize, text: &str) {
629        if let Some(ref mut tc) = self.trace_collector {
630            tc.begin_iteration(iteration_index, text);
631            self.current_iteration_span_id = tc.current_iteration_span_id(iteration_index);
632        }
633    }
634
635    pub(crate) fn end_iteration_span(
636        &mut self,
637        iteration_index: usize,
638        status: crate::debug_dump::trace::SpanStatus,
639    ) {
640        if let Some(ref mut tc) = self.trace_collector {
641            tc.end_iteration(iteration_index, status);
642        }
643        self.current_iteration_span_id = None;
644    }
645
646    pub(crate) fn switch_format(&mut self, new_format: crate::debug_dump::DumpFormat) {
647        let was_trace = self.dump_format == crate::debug_dump::DumpFormat::Trace;
648        let now_trace = new_format == crate::debug_dump::DumpFormat::Trace;
649
650        if now_trace
651            && !was_trace
652            && let Some(ref dump_dir) = self.dump_dir.clone()
653        {
654            let service_name = self.trace_service_name.clone();
655            let redact = self.trace_redact;
656            match crate::debug_dump::trace::TracingCollector::new(
657                dump_dir.as_path(),
658                &service_name,
659                redact,
660                None,
661            ) {
662                Ok(collector) => {
663                    self.trace_collector = Some(collector);
664                }
665                Err(e) => {
666                    tracing::warn!(error = %e, "failed to create TracingCollector on format switch");
667                }
668            }
669        }
670        if was_trace
671            && !now_trace
672            && let Some(mut tc) = self.trace_collector.take()
673        {
674            tc.finish();
675        }
676
677        self.dump_format = new_format;
678    }
679
680    pub(crate) fn write_chat_debug_dump(
681        &self,
682        dump_id: Option<u32>,
683        result: &zeph_llm::provider::ChatResponse,
684        pii_filter: &zeph_sanitizer::pii::PiiFilter,
685    ) {
686        let Some((d, id)) = self.debug_dumper.as_ref().zip(dump_id) else {
687            return;
688        };
689        let raw = match result {
690            zeph_llm::provider::ChatResponse::Text(t) => t.clone(),
691            zeph_llm::provider::ChatResponse::ToolUse {
692                text, tool_calls, ..
693            } => {
694                let calls = serde_json::to_string_pretty(tool_calls).unwrap_or_default();
695                format!(
696                    "{}\n\n---TOOL_CALLS---\n{calls}",
697                    text.as_deref().unwrap_or("")
698                )
699            }
700        };
701        let text = if pii_filter.is_enabled() {
702            pii_filter.scrub(&raw).into_owned()
703        } else {
704            raw
705        };
706        d.dump_response(id, &text);
707    }
708}
709
710impl Default for McpState {
711    fn default() -> Self {
712        Self {
713            tools: Vec::new(),
714            registry: None,
715            manager: None,
716            allowed_commands: Vec::new(),
717            max_dynamic: 10,
718            elicitation_rx: None,
719            shared_tools: None,
720            tool_rx: None,
721            server_outcomes: Vec::new(),
722            pruning_cache: zeph_mcp::PruningCache::new(),
723            pruning_provider: None,
724            pruning_enabled: false,
725            pruning_params: zeph_mcp::PruningParams::default(),
726            semantic_index: None,
727            discovery_strategy: zeph_mcp::ToolDiscoveryStrategy::default(),
728            discovery_params: zeph_mcp::DiscoveryParams::default(),
729            discovery_provider: None,
730            elicitation_warn_sensitive_fields: true,
731            pending_semantic_rebuild: false,
732        }
733    }
734}
735
736impl Default for IndexState {
737    fn default() -> Self {
738        Self {
739            retriever: None,
740            repo_map_tokens: 0,
741            cached_repo_map: None,
742            repo_map_ttl: std::time::Duration::from_secs(300),
743        }
744    }
745}
746
747impl Default for DebugState {
748    fn default() -> Self {
749        Self {
750            debug_dumper: None,
751            dump_format: crate::debug_dump::DumpFormat::default(),
752            trace_collector: None,
753            iteration_counter: 0,
754            anomaly_detector: None,
755            reasoning_model_warning: true,
756            logging_config: crate::config::LoggingConfig::default(),
757            dump_dir: None,
758            trace_service_name: String::new(),
759            trace_redact: true,
760            current_iteration_span_id: None,
761        }
762    }
763}
764
765impl Default for FeedbackState {
766    fn default() -> Self {
767        Self {
768            detector: super::feedback_detector::FeedbackDetector::new(0.6),
769            judge: None,
770            llm_classifier: None,
771        }
772    }
773}
774
775impl Default for RuntimeConfig {
776    fn default() -> Self {
777        Self {
778            security: SecurityConfig::default(),
779            timeouts: TimeoutConfig::default(),
780            model_name: String::new(),
781            active_provider_name: String::new(),
782            permission_policy: zeph_tools::PermissionPolicy::default(),
783            redact_credentials: true,
784            rate_limiter: super::rate_limiter::ToolRateLimiter::new(
785                super::rate_limiter::RateLimitConfig::default(),
786            ),
787            semantic_cache_enabled: false,
788            semantic_cache_threshold: 0.95,
789            semantic_cache_max_candidates: 10,
790            dependency_config: zeph_tools::DependencyConfig::default(),
791            adversarial_policy_info: None,
792            spawn_depth: 0,
793            budget_hint_enabled: true,
794            channel_skills: zeph_config::ChannelSkillsConfig::default(),
795            layers: Vec::new(),
796            supervisor_config: crate::config::TaskSupervisorConfig::default(),
797        }
798    }
799}
800
801impl SessionState {
802    pub(crate) fn new() -> Self {
803        Self {
804            env_context: EnvironmentContext::gather(""),
805            last_assistant_at: None,
806            response_cache: None,
807            parent_tool_use_id: None,
808            status_tx: None,
809            lsp_hooks: None,
810            policy_config: None,
811            hooks_config: HooksConfigSnapshot::default(),
812        }
813    }
814}
815
816impl SkillState {
817    pub(crate) fn new(
818        registry: Arc<RwLock<SkillRegistry>>,
819        matcher: Option<SkillMatcherBackend>,
820        max_active_skills: usize,
821        last_skills_prompt: String,
822    ) -> Self {
823        Self {
824            registry,
825            skill_paths: Vec::new(),
826            managed_dir: None,
827            trust_config: crate::config::TrustConfig::default(),
828            matcher,
829            max_active_skills,
830            disambiguation_threshold: 0.20,
831            min_injection_score: 0.20,
832            embedding_model: String::new(),
833            skill_reload_rx: None,
834            active_skill_names: Vec::new(),
835            last_skills_prompt,
836            prompt_mode: crate::config::SkillPromptMode::Auto,
837            available_custom_secrets: HashMap::new(),
838            cosine_weight: 0.7,
839            hybrid_search: false,
840            bm25_index: None,
841            two_stage_matching: false,
842            confusability_threshold: 0.0,
843            rl_head: None,
844            rl_weight: 0.3,
845            rl_warmup_updates: 50,
846            generation_output_dir: None,
847            generation_provider_name: String::new(),
848        }
849    }
850}
851
852impl LifecycleState {
853    pub(crate) fn new() -> Self {
854        let (_tx, rx) = watch::channel(false);
855        Self {
856            shutdown: rx,
857            start_time: Instant::now(),
858            cancel_signal: Arc::new(tokio::sync::Notify::new()),
859            cancel_token: tokio_util::sync::CancellationToken::new(),
860            cancel_bridge_handle: None,
861            config_path: None,
862            config_reload_rx: None,
863            warmup_ready: None,
864            update_notify_rx: None,
865            custom_task_rx: None,
866            last_known_cwd: std::env::current_dir().unwrap_or_default(),
867            file_changed_rx: None,
868            file_watcher: None,
869            supervisor: super::supervisor::BackgroundSupervisor::new(
870                &crate::config::TaskSupervisorConfig::default(),
871                None,
872            ),
873        }
874    }
875}
876
877impl ProviderState {
878    pub(crate) fn new(initial_prompt_tokens: u64) -> Self {
879        Self {
880            summary_provider: None,
881            provider_override: None,
882            judge_provider: None,
883            probe_provider: None,
884            compress_provider: None,
885            cached_prompt_tokens: initial_prompt_tokens,
886            server_compaction_active: false,
887            stt: None,
888            provider_pool: Vec::new(),
889            provider_config_snapshot: None,
890        }
891    }
892}
893
894impl MetricsState {
895    pub(crate) fn new(token_counter: Arc<zeph_memory::TokenCounter>) -> Self {
896        Self {
897            metrics_tx: None,
898            cost_tracker: None,
899            token_counter,
900            extended_context: false,
901            classifier_metrics: None,
902            timing_window: std::collections::VecDeque::new(),
903            pending_timings: crate::metrics::TurnTimings::default(),
904            histogram_recorder: None,
905        }
906    }
907}
908
909impl ExperimentState {
910    pub(crate) fn new() -> Self {
911        let (notify_tx, notify_rx) = tokio::sync::mpsc::channel::<String>(4);
912        Self {
913            config: crate::config::ExperimentConfig::default(),
914            cancel: None,
915            baseline: zeph_experiments::ConfigSnapshot::default(),
916            eval_provider: None,
917            notify_rx: Some(notify_rx),
918            notify_tx,
919        }
920    }
921}
922
923pub(super) mod security;
924pub(super) mod skill;
925
926#[cfg(test)]
927mod tests;
zeph_core/agent/state/mod.rs

zeph_core/agent/state/
mod.rs