Skip to main content

zeph_core/agent/state/
mod.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Sub-struct definitions for the `Agent` struct.
5//!
6//! Each struct groups a related cluster of `Agent` fields.
7//! All types are `pub(crate)` — visible only within the `zeph-core` crate.
8//!
9//! `MemoryState` is decomposed into four concern-separated sub-structs, each in its own file:
10//!
11//! - [`MemoryPersistenceState`] — `SQLite` handles, conversation IDs, recall budgets, autosave
12//! - [`MemoryCompactionState`] — summarization thresholds, shutdown summary, digest, strategy
13//! - [`MemoryExtractionState`] — graph config, RPE router, document config, semantic labels
14//! - [`MemorySubsystemState`] — `TiMem`, `autoDream`, `MagicDocs`, microcompact
15
16pub(crate) mod compaction;
17pub(crate) mod extraction;
18pub(crate) mod persistence;
19pub(crate) mod runtime;
20pub(crate) mod services;
21pub(crate) mod subsystems;
22
23pub(crate) use self::compaction::MemoryCompactionState;
24pub(crate) use self::extraction::MemoryExtractionState;
25pub(crate) use self::persistence::MemoryPersistenceState;
26pub(crate) use self::runtime::AgentRuntime;
27pub(crate) use self::services::Services;
28pub(crate) use self::subsystems::MemorySubsystemState;
29
30use std::collections::{HashMap, HashSet, VecDeque};
31use std::path::PathBuf;
32use std::sync::Arc;
33
34use parking_lot::RwLock;
35use std::time::Instant;
36
37use tokio::sync::{Notify, mpsc, watch};
38use tokio::time::Interval;
39use tokio_util::sync::CancellationToken;
40use zeph_llm::any::AnyProvider;
41use zeph_llm::provider::Message;
42use zeph_llm::stt::SpeechToText;
43
44use crate::config::{ProviderEntry, SecurityConfig, SkillPromptMode, TimeoutConfig};
45use crate::config_watcher::ConfigEvent;
46use crate::context::EnvironmentContext;
47use crate::cost::CostTracker;
48use crate::file_watcher::FileChangedEvent;
49use crate::instructions::{InstructionBlock, InstructionEvent, InstructionReloadState};
50use crate::metrics::MetricsSnapshot;
51use crate::vault::Secret;
52use zeph_config;
53use zeph_memory::TokenCounter;
54use zeph_sanitizer::ContentSanitizer;
55use zeph_sanitizer::quarantine::QuarantinedSummarizer;
56use zeph_skills::matcher::SkillMatcherBackend;
57use zeph_skills::registry::SkillRegistry;
58use zeph_skills::watcher::SkillEvent;
59use zeroize::Zeroizing;
60
61use super::message_queue::QueuedMessage;
62
63/// Coordinator struct holding four concern-separated sub-structs for memory management.
64///
65/// Each sub-struct groups fields by a single concern:
66/// - [`persistence`](MemoryPersistenceState) — `SQLite` handles, conversation IDs, recall budgets
67/// - [`compaction`](MemoryCompactionState) — summarization thresholds, shutdown summary, digest
68/// - [`extraction`](MemoryExtractionState) — graph config, RPE router, semantic labels
69/// - [`subsystems`](MemorySubsystemState) — `TiMem`, `autoDream`, `MagicDocs`, microcompact
70#[derive(Default)]
71pub(crate) struct MemoryState {
72    /// `SQLite` handles, conversation IDs, recall budgets, and autosave policy.
73    pub(crate) persistence: MemoryPersistenceState,
74    /// Summarization thresholds, shutdown summary, digest config, and context strategy.
75    pub(crate) compaction: MemoryCompactionState,
76    /// Graph extraction config, RPE router, document config, and semantic label configs.
77    pub(crate) extraction: MemoryExtractionState,
78    /// `TiMem`, `autoDream`, `MagicDocs`, and microcompact subsystem state.
79    pub(crate) subsystems: MemorySubsystemState,
80}
81
82pub(crate) struct SkillState {
83    pub(crate) registry: Arc<RwLock<SkillRegistry>>,
84    /// Per-turn trust snapshot written by `prepare_context` after `build_skill_trust_map`.
85    /// Shared with `SkillInvokeExecutor` so it can resolve trust without hitting `SQLite`
86    /// on every tool call. Refreshed once per turn — stale by at most one turn.
87    pub(crate) trust_snapshot: Arc<RwLock<HashMap<String, zeph_common::SkillTrustLevel>>>,
88    pub(crate) skill_paths: Vec<PathBuf>,
89    pub(crate) managed_dir: Option<PathBuf>,
90    pub(crate) trust_config: crate::config::TrustConfig,
91    pub(crate) matcher: Option<SkillMatcherBackend>,
92    pub(crate) max_active_skills: usize,
93    pub(crate) disambiguation_threshold: f32,
94    pub(crate) min_injection_score: f32,
95    pub(crate) embedding_model: String,
96    pub(crate) skill_reload_rx: Option<mpsc::Receiver<SkillEvent>>,
97    /// Resolves the current set of per-plugin skill dirs at reload time.
98    ///
99    /// Called inside `reload_skills()` so that plugins installed via `/plugins add` after
100    /// startup are discovered on the next watcher event without restarting the agent.
101    pub(crate) plugin_dirs_supplier: Option<Arc<dyn Fn() -> Vec<PathBuf> + Send + Sync>>,
102    pub(crate) active_skill_names: Vec<String>,
103    pub(crate) last_skills_prompt: String,
104    pub(crate) prompt_mode: SkillPromptMode,
105    /// Custom secrets available at runtime: key=hyphenated name, value=secret.
106    pub(crate) available_custom_secrets: HashMap<String, Secret>,
107    pub(crate) cosine_weight: f32,
108    pub(crate) hybrid_search: bool,
109    pub(crate) bm25_index: Option<zeph_skills::bm25::Bm25Index>,
110    pub(crate) two_stage_matching: bool,
111    /// Threshold for confusability warnings (0.0 = disabled).
112    pub(crate) confusability_threshold: f32,
113    /// `SkillOrchestra` RL routing head. `Some` when `rl_routing_enabled = true` and
114    /// weights are loaded or initialized. `None` when RL routing is disabled.
115    pub(crate) rl_head: Option<zeph_skills::rl_head::RoutingHead>,
116    /// Blend weight for RL routing: `final = (1-rl_weight)*cosine + rl_weight*rl_score`.
117    pub(crate) rl_weight: f32,
118    /// Skip RL blending for the first N updates (cold-start warmup).
119    pub(crate) rl_warmup_updates: u32,
120    /// Directory where `/skill create` writes generated skills.
121    /// Defaults to `managed_dir` if `None`.
122    pub(crate) generation_output_dir: Option<std::path::PathBuf>,
123    /// Provider name for `/skill create` generation. Empty = primary.
124    pub(crate) generation_provider_name: String,
125    /// Optional quality-gate evaluator for generated SKILL.md files (#3319).
126    ///
127    /// When `Some`, the evaluator is attached to every `SkillGenerator` instance so that
128    /// generated skills are scored before being written to disk.
129    pub(crate) skill_evaluator: Option<std::sync::Arc<zeph_skills::evaluator::SkillEvaluator>>,
130    /// Weights for the evaluator composite score — forwarded to `SkillGenerator::with_evaluator`.
131    pub(crate) eval_weights: zeph_skills::evaluator::EvaluationWeights,
132    /// Minimum composite score required to accept a generated skill (forwarded to the generator).
133    pub(crate) eval_threshold: f32,
134}
135
136pub(crate) struct McpState {
137    pub(crate) tools: Vec<zeph_mcp::McpTool>,
138    pub(crate) registry: Option<zeph_mcp::McpToolRegistry>,
139    pub(crate) manager: Option<std::sync::Arc<zeph_mcp::McpManager>>,
140    pub(crate) allowed_commands: Vec<String>,
141    pub(crate) max_dynamic: usize,
142    /// Receives elicitation requests from MCP server handlers during tool execution.
143    /// When `Some`, the agent loop must process these concurrently with tool result awaiting
144    /// to avoid deadlock (tool result waits for elicitation, elicitation waits for agent loop).
145    pub(crate) elicitation_rx: Option<tokio::sync::mpsc::Receiver<zeph_mcp::ElicitationEvent>>,
146    /// Shared with `McpToolExecutor` so native `tool_use` sees the current tool list.
147    ///
148    /// Two methods write to this `RwLock` — ordering matters:
149    /// - `sync_executor_tools()`: writes the **full** `self.tools` set.
150    /// - `apply_pruned_tools()`: writes the **pruned** subset (used after pruning).
151    ///
152    /// Within a turn, `sync_executor_tools` must always run **before**
153    /// `apply_pruned_tools`.  The normal call order guarantees this: tool-list
154    /// change events call `sync_executor_tools` (inside `check_tool_refresh`,
155    /// `handle_mcp_add`, `handle_mcp_remove`), and pruning runs later inside
156    /// `rebuild_system_prompt`.  See also: `apply_pruned_tools`.
157    pub(crate) shared_tools: Option<Arc<RwLock<Vec<zeph_mcp::McpTool>>>>,
158    /// Receives full flattened tool list after any `tools/list_changed` notification.
159    pub(crate) tool_rx: Option<tokio::sync::watch::Receiver<Vec<zeph_mcp::McpTool>>>,
160    /// Per-server connection outcomes from the initial `connect_all()` call.
161    pub(crate) server_outcomes: Vec<zeph_mcp::ServerConnectOutcome>,
162    /// Per-message cache for MCP tool pruning results (#2298).
163    ///
164    /// Reset at the start of each user turn and whenever the MCP tool list
165    /// changes (via `tools/list_changed`, `/mcp add`, or `/mcp remove`).
166    pub(crate) pruning_cache: zeph_mcp::PruningCache,
167    /// Dedicated provider for MCP tool pruning LLM calls.
168    ///
169    /// `None` means fall back to the agent's primary provider.
170    /// Resolved from `[[llm.providers]]` at build time using `pruning_provider`
171    /// from `ToolPruningConfig`.
172    pub(crate) pruning_provider: Option<zeph_llm::any::AnyProvider>,
173    /// Whether MCP tool pruning is enabled.  Mirrors `ToolPruningConfig::enabled`.
174    pub(crate) pruning_enabled: bool,
175    /// Pruning parameters snapshot.  Derived from `ToolPruningConfig` at build time.
176    pub(crate) pruning_params: zeph_mcp::PruningParams,
177    /// Pre-computed semantic tool index for embedding-based discovery (#2321).
178    ///
179    /// Built at connect time via `rebuild_semantic_index()`, rebuilt on tool list change.
180    /// `None` when strategy is not `Embedding` or when build failed (fallback to all tools).
181    pub(crate) semantic_index: Option<zeph_mcp::SemanticToolIndex>,
182    /// Active discovery strategy and parameters.  Derived from `ToolDiscoveryConfig`.
183    pub(crate) discovery_strategy: zeph_mcp::ToolDiscoveryStrategy,
184    /// Discovery parameters snapshot.  Derived from `ToolDiscoveryConfig` at build time.
185    pub(crate) discovery_params: zeph_mcp::DiscoveryParams,
186    /// Dedicated embedding provider for tool discovery.  `None` = fall back to the
187    /// agent's primary embedding provider.
188    pub(crate) discovery_provider: Option<zeph_llm::any::AnyProvider>,
189    /// When `true`, show a security warning before prompting for fields whose names
190    /// match sensitive patterns (password, token, secret, key, credential, etc.).
191    pub(crate) elicitation_warn_sensitive_fields: bool,
192    /// When `true`, semantic index and registry need to be rebuilt at the next opportunity.
193    ///
194    /// Set after `/mcp add` or `/mcp remove` when called via `AgentAccess::handle_mcp`,
195    /// which cannot call `rebuild_semantic_index` and `sync_mcp_registry` directly because
196    /// those are `async fn(&mut self)` and their futures are `!Send` (they hold `&mut Agent<C>`
197    /// across `.await`). The rebuild is deferred to `check_tool_refresh`, which runs at the
198    /// start of each turn without the `Box<dyn Future + Send>` constraint.
199    pub(crate) pending_semantic_rebuild: bool,
200}
201
202pub(crate) struct IndexState {
203    pub(crate) retriever: Option<std::sync::Arc<zeph_index::retriever::CodeRetriever>>,
204    pub(crate) repo_map_tokens: usize,
205    pub(crate) cached_repo_map: Option<(String, std::time::Instant)>,
206    pub(crate) repo_map_ttl: std::time::Duration,
207}
208
209/// Snapshot of adversarial policy gate configuration for status display.
210#[derive(Debug, Clone)]
211pub struct AdversarialPolicyInfo {
212    pub provider: String,
213    pub policy_count: usize,
214    pub fail_open: bool,
215}
216
217#[allow(clippy::struct_excessive_bools)] // independent boolean flags; bitflags or enum would obscure semantics without reducing complexity
218pub(crate) struct RuntimeConfig {
219    pub(crate) security: SecurityConfig,
220    pub(crate) timeouts: TimeoutConfig,
221    pub(crate) model_name: String,
222    /// Configured name from `[[llm.providers]]` (the `name` field), set at startup and on
223    /// `/provider` switch. Falls back to the provider type string when empty.
224    pub(crate) active_provider_name: String,
225    pub(crate) permission_policy: zeph_tools::PermissionPolicy,
226    pub(crate) redact_credentials: bool,
227    pub(crate) rate_limiter: super::rate_limiter::ToolRateLimiter,
228    pub(crate) semantic_cache_enabled: bool,
229    pub(crate) semantic_cache_threshold: f32,
230    pub(crate) semantic_cache_max_candidates: u32,
231    /// Dependency config snapshot stored for per-turn boost parameters.
232    pub(crate) dependency_config: zeph_tools::DependencyConfig,
233    /// Adversarial policy gate runtime info for /status display.
234    pub(crate) adversarial_policy_info: Option<AdversarialPolicyInfo>,
235    /// Current spawn depth of this agent instance (0 = top-level, 1 = first sub-agent, etc.).
236    /// Used by `build_spawn_context()` to propagate depth to children.
237    pub(crate) spawn_depth: u32,
238    /// Inject `<budget>` XML into the volatile system prompt section (#2267).
239    pub(crate) budget_hint_enabled: bool,
240    /// Per-channel skill allowlist. Skills not matching the allowlist are excluded from the
241    /// prompt. An empty `allowed` list means all skills are permitted (default).
242    pub(crate) channel_skills: zeph_config::ChannelSkillsConfig,
243    /// Minimum allowed interval for `/loop` ticks (seconds). Sourced from `[cli.loop] min_interval_secs`.
244    pub(crate) loop_min_interval_secs: u64,
245    /// Runtime middleware layers for LLM calls and tool dispatch (#2286).
246    ///
247    /// Default: empty vec (zero-cost — loops never iterate).
248    pub(crate) layers: Vec<std::sync::Arc<dyn crate::runtime_layer::RuntimeLayer>>,
249    /// Background supervisor config snapshot for turn-boundary abort logic.
250    pub(crate) supervisor_config: crate::config::TaskSupervisorConfig,
251    /// Session recap config (#3064).
252    pub(crate) recap_config: zeph_config::RecapConfig,
253    /// ACP server configuration snapshot for `/acp` slash-command display.
254    pub(crate) acp_config: zeph_config::AcpConfig,
255    /// Set to `true` after the auto-recap is emitted at session resume (#3144).
256    ///
257    /// Used by `/recap` to skip a redundant LLM call when no new messages have
258    /// been added since the auto-recap was shown.
259    pub(crate) auto_recap_shown: bool,
260    /// Number of non-system messages present when the session was resumed (#3144).
261    ///
262    /// Combined with `auto_recap_shown` to detect whether the user has added new
263    /// messages after the auto-recap was shown.
264    pub(crate) msg_count_at_resume: usize,
265    /// Callback that spawns an external ACP sub-agent process by shell command (#3302).
266    ///
267    /// Injected by the binary crate when the `acp` feature is enabled.
268    /// `None` in bare / non-ACP mode; callers must degrade gracefully.
269    pub(crate) acp_subagent_spawn_fn: Option<zeph_subagent::AcpSubagentSpawnFn>,
270    /// Channel type string used as part of the `(channel_type, channel_id)` persistence key.
271    ///
272    /// Set at build time from the active I/O channel (e.g. `"cli"`, `"tui"`, `"telegram"`).
273    /// Empty when channel identity has not been configured (persistence is skipped).
274    pub(crate) channel_type: String,
275    /// Whether provider preference persistence is enabled for this session (#3308).
276    ///
277    /// Controlled by `[session] provider_persistence = true` (the default). When `false`,
278    /// the stored provider preference is never read or written.
279    pub(crate) provider_persistence_enabled: bool,
280    /// Goal lifecycle feature configuration.
281    pub(crate) goals: GoalRuntimeConfig,
282}
283
284/// Groups feedback detection subsystems: correction detector, judge detector, and LLM classifier.
285pub(crate) struct FeedbackState {
286    pub(crate) detector: zeph_agent_feedback::FeedbackDetector,
287    pub(crate) judge: Option<zeph_agent_feedback::JudgeDetector>,
288    /// LLM-backed zero-shot classifier for `DetectorMode::Model`.
289    /// When `Some`, `spawn_judge_correction_check` uses this instead of `JudgeDetector`.
290    pub(crate) llm_classifier: Option<zeph_llm::classifier::llm::LlmClassifier>,
291}
292
293/// Groups security-related subsystems (sanitizer, quarantine, exfiltration guard).
294pub(crate) struct SecurityState {
295    pub(crate) sanitizer: ContentSanitizer,
296    pub(crate) quarantine_summarizer: Option<QuarantinedSummarizer>,
297    /// Whether this agent session is serving an ACP client.
298    /// When `true` and `mcp_to_acp_boundary` is enabled, MCP tool results
299    /// receive unconditional quarantine and cross-boundary audit logging.
300    pub(crate) is_acp_session: bool,
301    pub(crate) exfiltration_guard: zeph_sanitizer::exfiltration::ExfiltrationGuard,
302    pub(crate) flagged_urls: HashSet<String>,
303    /// URLs explicitly provided by the user across all turns in this session.
304    /// Populated from raw user message text; cleared on `/clear`.
305    /// Shared with `UrlGroundingVerifier` to check `fetch`/`web_scrape` calls at dispatch time.
306    pub(crate) user_provided_urls: Arc<RwLock<HashSet<String>>>,
307    pub(crate) pii_filter: zeph_sanitizer::pii::PiiFilter,
308    /// NER classifier for PII detection (`classifiers.ner_model`). When `Some`, the PII path
309    /// runs both regex (`pii_filter`) and NER, then merges spans before redaction.
310    /// `None` when `classifiers` feature is disabled or `classifiers.enabled = false`.
311    #[cfg(feature = "classifiers")]
312    pub(crate) pii_ner_backend: Option<std::sync::Arc<dyn zeph_llm::classifier::ClassifierBackend>>,
313    /// Per-call timeout for the NER PII classifier in milliseconds.
314    #[cfg(feature = "classifiers")]
315    pub(crate) pii_ner_timeout_ms: u64,
316    /// Maximum number of bytes passed to the NER PII classifier per call.
317    ///
318    /// Large tool outputs (e.g. `search_code`) can produce 150+ `DeBERTa` chunks and exceed
319    /// the per-call timeout. Input is truncated at a valid UTF-8 boundary before classification.
320    #[cfg(feature = "classifiers")]
321    pub(crate) pii_ner_max_chars: usize,
322    /// Circuit-breaker threshold: number of consecutive timeouts before NER is disabled.
323    /// `0` means the circuit breaker is disabled (NER is always attempted).
324    #[cfg(feature = "classifiers")]
325    pub(crate) pii_ner_circuit_breaker_threshold: u32,
326    /// Number of consecutive NER timeouts observed since the last successful call.
327    #[cfg(feature = "classifiers")]
328    pub(crate) pii_ner_consecutive_timeouts: u32,
329    /// Set to `true` when the circuit breaker trips. NER is skipped for the rest of the session.
330    #[cfg(feature = "classifiers")]
331    pub(crate) pii_ner_tripped: bool,
332    pub(crate) memory_validator: zeph_sanitizer::memory_validation::MemoryWriteValidator,
333    /// LLM-based prompt injection pre-screener (opt-in).
334    pub(crate) guardrail: Option<zeph_sanitizer::guardrail::GuardrailFilter>,
335    /// Post-LLM response verification layer.
336    pub(crate) response_verifier: zeph_sanitizer::response_verifier::ResponseVerifier,
337    /// Temporal causal IPI analyzer (opt-in, disabled when `None`).
338    pub(crate) causal_analyzer: Option<zeph_sanitizer::causal_ipi::TurnCausalAnalyzer>,
339    /// VIGIL pre-sanitizer gate. `None` for subagent sessions (subagents are exempt).
340    /// Set at agent build time for top-level agents; skipped for subagents (high FP rate).
341    pub(crate) vigil: Option<crate::agent::vigil::VigilGate>,
342    /// Cross-turn risk accumulator (spec 050 Phase 1).
343    ///
344    /// `advance_turn()` MUST be called once per turn, before `PolicyGateExecutor::check_policy`.
345    /// Never expose score, level, or alerts to any LLM-callable surface.
346    pub(crate) trajectory: crate::agent::trajectory::TrajectorySentinel,
347    /// Shared risk-level slot for `PolicyGateExecutor` (spec 050).
348    ///
349    /// Written by the agent loop after each turn's `sentinel.current_risk()` call.
350    /// `PolicyGateExecutor::check_policy` reads it to downgrade `Allow` at `Critical`.
351    /// `u8` encoding: 0=Calm, 1=Elevated, 2=High, 3=Critical.
352    pub(crate) trajectory_risk_slot: zeph_tools::TrajectoryRiskSlot,
353    /// Pending risk signals from executor layers (spec 050 §2).
354    ///
355    /// `PolicyGateExecutor` and `ScopedToolExecutor` push signal codes here.
356    /// `begin_turn()` drains this queue into `trajectory.record()`.
357    pub(crate) trajectory_signal_queue: zeph_tools::RiskSignalQueue,
358}
359
360/// Groups debug/diagnostics subsystems (dumper, trace collector, anomaly detector, logging config).
361pub(crate) struct DebugState {
362    pub(crate) debug_dumper: Option<crate::debug_dump::DebugDumper>,
363    pub(crate) dump_format: crate::debug_dump::DumpFormat,
364    pub(crate) trace_collector: Option<crate::debug_dump::trace::TracingCollector>,
365    /// Monotonically increasing counter for `process_user_message` calls.
366    /// Used to key spans in `trace_collector.active_iterations`.
367    pub(crate) iteration_counter: usize,
368    pub(crate) anomaly_detector: Option<zeph_tools::AnomalyDetector>,
369    /// Whether to emit `reasoning_amplification` warnings for quality failures from reasoning
370    /// models. Mirrors `AnomalyConfig::reasoning_model_warning`. Default: `true`.
371    pub(crate) reasoning_model_warning: bool,
372    pub(crate) logging_config: crate::config::LoggingConfig,
373    /// Base dump directory — stored so `/dump-format trace` can create a `TracingCollector` (CR-04).
374    pub(crate) dump_dir: Option<PathBuf>,
375    /// Service name for `TracingCollector` created via runtime format switch (CR-04).
376    pub(crate) trace_service_name: String,
377    /// Whether to redact in `TracingCollector` created via runtime format switch (CR-04).
378    pub(crate) trace_redact: bool,
379    /// Span ID of the currently executing iteration — used by LLM/tool span wiring (CR-01).
380    /// Set to `Some` at the start of `process_user_message`, cleared at end.
381    pub(crate) current_iteration_span_id: Option<[u8; 8]>,
382}
383
384/// Snapshot of the shell-level overlay baked in at startup.
385///
386/// Used in `reload_config` to detect when a hot-reload would produce a different shell
387/// restriction set than the one baked into the live `ShellExecutor` (M4 warn-on-divergence).
388#[derive(Debug, Clone, Default, PartialEq, Eq)]
389pub struct ShellOverlaySnapshot {
390    /// Sorted `blocked_commands` contributed by plugins.
391    pub blocked: Vec<String>,
392    /// Sorted `allowed_commands` after plugin intersection (empty if base was empty).
393    pub allowed: Vec<String>,
394}
395
396/// Runtime state for an active `/loop` session.
397///
398/// At most one loop is active at a time; `LifecycleState::user_loop` holds `Some` while
399/// the loop is running and `None` otherwise.
400pub(crate) struct LoopState {
401    /// The prompt text injected on each tick.
402    pub(crate) prompt: String,
403    /// Number of ticks fired so far.
404    pub(crate) iteration: u64,
405    /// Tick interval. `MissedTickBehavior::Skip` prevents burst catch-up.
406    pub(crate) interval: Interval,
407    /// Cancel handle. Dropped (and token cancelled) when loop is stopped.
408    pub(crate) cancel_tx: CancellationToken,
409}
410
411/// Groups agent lifecycle state: shutdown signaling, timing, and I/O notification channels.
412pub(crate) struct LifecycleState {
413    pub(crate) shutdown: watch::Receiver<bool>,
414    pub(crate) start_time: Instant,
415    pub(crate) cancel_signal: Arc<Notify>,
416    pub(crate) cancel_token: CancellationToken,
417    /// Handle to the cancel bridge task spawned each turn. Aborted before a new one is created
418    /// to prevent unbounded task accumulation across turns.
419    pub(crate) cancel_bridge_handle: Option<zeph_common::task_supervisor::BlockingHandle<()>>,
420    pub(crate) config_path: Option<PathBuf>,
421    pub(crate) config_reload_rx: Option<mpsc::Receiver<ConfigEvent>>,
422    /// Path to the plugins directory; used to re-apply overlays on hot-reload.
423    pub(crate) plugins_dir: PathBuf,
424    /// Shell overlay snapshot baked in at startup. Used to detect divergence on hot-reload.
425    pub(crate) startup_shell_overlay: ShellOverlaySnapshot,
426    /// Handle for live-rebuilding the `ShellExecutor`'s `blocked_commands` policy on hot-reload.
427    /// `None` when no `ShellExecutor` is in the executor chain (test harnesses, daemon-only modes).
428    pub(crate) shell_policy_handle: Option<zeph_tools::ShellPolicyHandle>,
429    pub(crate) warmup_ready: Option<watch::Receiver<bool>>,
430    pub(crate) update_notify_rx: Option<mpsc::Receiver<String>>,
431    pub(crate) custom_task_rx: Option<mpsc::Receiver<String>>,
432    /// Active `/loop` state. `None` when no loop is running.
433    pub(crate) user_loop: Option<LoopState>,
434    /// Last known process cwd. Compared after each tool call to detect changes.
435    pub(crate) last_known_cwd: PathBuf,
436    /// Receiver for file-change events from `FileChangeWatcher`. `None` when no paths configured.
437    pub(crate) file_changed_rx: Option<mpsc::Receiver<FileChangedEvent>>,
438    /// Keeps the `FileChangeWatcher` alive for the agent's lifetime. Dropping it aborts the watcher task.
439    pub(crate) file_watcher: Option<crate::file_watcher::FileChangeWatcher>,
440    /// Supervised background task manager. Owned by the agent; call `reap()` between turns
441    /// and `abort_all()` on shutdown.
442    pub(crate) supervisor: super::agent_supervisor::BackgroundSupervisor,
443    /// Per-turn completion notifier. `None` when `notifications.enabled = false`.
444    pub(crate) notifier: Option<crate::notifications::Notifier>,
445    /// Per-turn LLM request counter. Incremented by `process_response`; reset at turn start.
446    pub(crate) turn_llm_requests: u32,
447    /// Timestamp of the last turn that ended with `LlmError::NoProviders`.
448    ///
449    /// Used to gate `advance_context_lifecycle`: when all providers are down, context preparation
450    /// is skipped (degraded mode) until `no_providers_backoff_secs` has elapsed.
451    pub(crate) last_no_providers_at: Option<Instant>,
452    /// Completions from background shell runs waiting to be injected into the next turn.
453    ///
454    /// Drained at the top of `process_user_message_inner` after `supervisor.reap()`.
455    /// All pending completions and the real user message are merged into a **single**
456    /// user-role block to satisfy strict alternation requirements (Anthropic Messages API).
457    ///
458    /// Capacity is capped at `BACKGROUND_COMPLETION_BUFFER_CAP`. On overflow the oldest
459    /// entry is dropped and a placeholder is substituted so the LLM learns results were lost.
460    pub(crate) pending_background_completions:
461        VecDeque<zeph_tools::shell::background::BackgroundCompletion>,
462    /// Receiver end of the dedicated background-completion channel created alongside the
463    /// `ShellExecutor`. Polled at the top of each turn to drain completions into
464    /// `pending_background_completions`. `None` when no `ShellExecutor` is configured.
465    pub(crate) background_completion_rx:
466        Option<tokio::sync::mpsc::Receiver<zeph_tools::BackgroundCompletion>>,
467    /// Shared reference to the `ShellExecutor` used to query in-flight background run snapshots
468    /// for TUI metrics display. `None` when no `ShellExecutor` is wired (test harnesses, etc.).
469    pub(crate) shell_executor_handle: Option<std::sync::Arc<zeph_tools::ShellExecutor>>,
470    /// Session-level task supervisor, shared with bootstrap and TUI. Used to register
471    /// background agent tasks (cancel bridge, compaction, sidequest eviction) for
472    /// observability and graceful shutdown.
473    ///
474    /// Created with a fresh [`CancellationToken`] in `LifecycleState::new()` for test
475    /// harnesses; production code overwrites it via `Agent::with_task_supervisor`.
476    pub(crate) task_supervisor: Arc<zeph_common::TaskSupervisor>,
477}
478
479/// Minimal config snapshot needed to reconstruct a provider at runtime via `/provider <name>`.
480///
481/// Secrets are stored as plain strings because [`Secret`] intentionally does not implement
482/// `Clone`. They are re-wrapped in `Secret` when passed to `build_provider_for_switch`.
483pub struct ProviderConfigSnapshot {
484    pub claude_api_key: Option<String>,
485    pub openai_api_key: Option<String>,
486    pub gemini_api_key: Option<String>,
487    pub compatible_api_keys: std::collections::HashMap<String, String>,
488    pub llm_request_timeout_secs: u64,
489    pub embedding_model: String,
490    pub gonka_private_key: Option<Zeroizing<String>>,
491    pub gonka_address: Option<String>,
492}
493
494/// Groups provider-related state: alternate providers, runtime switching, and compaction flags.
495pub(crate) struct ProviderState {
496    pub(crate) summary_provider: Option<AnyProvider>,
497    /// Shared slot for runtime model switching; set by external caller (e.g. ACP).
498    pub(crate) provider_override: Option<Arc<RwLock<Option<AnyProvider>>>>,
499    pub(crate) judge_provider: Option<AnyProvider>,
500    /// Dedicated provider for compaction probe LLM calls. Falls back to `summary_provider`
501    /// (or primary) when `None`.
502    pub(crate) probe_provider: Option<AnyProvider>,
503    /// Dedicated provider for `compress_context` LLM calls (#2356).
504    /// Falls back to the primary provider when `None`.
505    pub(crate) compress_provider: Option<AnyProvider>,
506    pub(crate) cached_prompt_tokens: u64,
507    /// Whether the active provider has server-side compaction enabled (Claude compact-2026-01-12).
508    /// When true, client-side compaction is skipped.
509    pub(crate) server_compaction_active: bool,
510    pub(crate) stt: Option<Box<dyn SpeechToText>>,
511    /// Snapshot of `[[llm.providers]]` entries for runtime `/provider` switching.
512    pub(crate) provider_pool: Vec<ProviderEntry>,
513    /// Resolved secrets and timeout settings needed to reconstruct providers at runtime.
514    pub(crate) provider_config_snapshot: Option<ProviderConfigSnapshot>,
515}
516
517/// Groups metrics and cost tracking state.
518pub(crate) struct MetricsState {
519    pub(crate) metrics_tx: Option<watch::Sender<MetricsSnapshot>>,
520    pub(crate) cost_tracker: Option<CostTracker>,
521    pub(crate) token_counter: Arc<TokenCounter>,
522    /// Set to `true` when Claude extended context (`enable_extended_context = true`) is active.
523    /// Read from config at build time, not derived from provider internals.
524    pub(crate) extended_context: bool,
525    /// Shared classifier latency ring buffer. Populated by `ContentSanitizer` (injection, PII)
526    /// and `LlmClassifier` (feedback). `None` when classifiers are not configured.
527    pub(crate) classifier_metrics: Option<Arc<zeph_llm::ClassifierMetrics>>,
528    /// Rolling window of per-turn latency samples (last 10 turns).
529    pub(crate) timing_window: std::collections::VecDeque<crate::metrics::TurnTimings>,
530    /// Accumulator for the current turn's timings. Flushed at turn end via `flush_turn_timings`.
531    pub(crate) pending_timings: crate::metrics::TurnTimings,
532    /// Optional histogram recorder for per-event Prometheus observations.
533    /// `None` when the `prometheus` feature is disabled or metrics are not enabled.
534    pub(crate) histogram_recorder: Option<std::sync::Arc<dyn crate::metrics::HistogramRecorder>>,
535}
536
537/// Groups task orchestration and subagent state.
538#[derive(Default)]
539pub(crate) struct OrchestrationState {
540    /// On `OrchestrationState` (not `ProviderState`) because this provider is used exclusively
541    /// by `LlmPlanner` during orchestration, not shared across subsystems.
542    pub(crate) planner_provider: Option<AnyProvider>,
543    /// Provider for `PlanVerifier` LLM calls. `None` falls back to `orchestrator_provider`
544    /// then the primary provider.
545    pub(crate) verify_provider: Option<AnyProvider>,
546    /// Provider for scheduling-tier LLM calls (aggregation, predicate evaluation, verification
547    /// fallback). `None` falls back to the primary provider.
548    /// Set from `config.orchestration.orchestrator_provider` at startup.
549    pub(crate) orchestrator_provider: Option<AnyProvider>,
550    /// Provider for predicate gate evaluation. `None` falls back to `orchestrator_provider`
551    /// then `verify_provider` then primary.
552    pub(crate) predicate_provider: Option<AnyProvider>,
553    /// Graph waiting for `/plan confirm` before execution starts.
554    pub(crate) pending_graph: Option<zeph_orchestration::TaskGraph>,
555    /// Cancellation token for the currently executing plan. `None` when no plan is running.
556    /// Created fresh in `handle_plan_confirm()`, cancelled in `handle_plan_cancel()`.
557    ///
558    /// # Known limitation
559    ///
560    /// Token plumbing is ready; the delivery path requires the agent message loop to be
561    /// restructured so `/plan cancel` can be received while `run_scheduler_loop` holds
562    /// `&mut self`. See follow-up issue #1603 (SEC-M34-002).
563    pub(crate) plan_cancel_token: Option<CancellationToken>,
564    /// Manages spawned sub-agents.
565    pub(crate) subagent_manager: Option<zeph_subagent::SubAgentManager>,
566    pub(crate) subagent_config: crate::config::SubAgentConfig,
567    pub(crate) orchestration_config: crate::config::OrchestrationConfig,
568    /// Lazily initialized plan template cache. `None` until first use or when
569    /// memory (`SQLite`) is unavailable.
570    #[allow(dead_code)]
571    pub(crate) plan_cache: Option<zeph_orchestration::PlanCache>,
572    /// Goal embedding from the most recent `plan_with_cache()` call. Consumed by
573    /// `finalize_plan_execution()` to cache the completed plan template.
574    pub(crate) pending_goal_embedding: Option<Vec<f32>>,
575    /// `AdaptOrch` topology advisor — `None` when `[orchestration.adaptorch]` is disabled.
576    pub(crate) topology_advisor: Option<std::sync::Arc<zeph_orchestration::TopologyAdvisor>>,
577    /// Last `AdaptOrch` verdict; carried from `handle_plan_goal_as_string` to scheduler loop
578    /// for `record_outcome`.
579    #[allow(dead_code)] // read via .take() in plan.rs; clippy false positive
580    pub(crate) last_advisor_verdict: Option<zeph_orchestration::AdvisorVerdict>,
581    /// Task graph persistence handle. `None` when no `SemanticMemory` was
582    /// attached via `with_memory`, or when
583    /// `OrchestrationConfig::persistence_enabled` is `false`. When `Some`, the
584    /// scheduler loop snapshots the graph once per tick and `/plan resume <id>`
585    /// rehydrates from disk.
586    pub(crate) graph_persistence: Option<
587        zeph_orchestration::GraphPersistence<zeph_memory::store::graph_store::TaskGraphStore>,
588    >,
589    /// Named execution environment for the current orchestration task.
590    ///
591    /// Set by the scheduler when dispatching a `TaskNode` that has
592    /// `execution_environment: Some(name)`. Cleared between tasks. When `Some`,
593    /// `prepare_tool_dispatch` injects an [`ExecutionContext`] named `name` into
594    /// every `ToolCall` so that `ShellExecutor::resolve_context` uses the right env.
595    pub(crate) task_execution_env: Option<String>,
596}
597
598/// Groups instruction hot-reload state.
599#[derive(Default)]
600pub(crate) struct InstructionState {
601    pub(crate) blocks: Vec<InstructionBlock>,
602    pub(crate) reload_rx: Option<mpsc::Receiver<InstructionEvent>>,
603    pub(crate) reload_state: Option<InstructionReloadState>,
604}
605
606/// Groups experiment feature state (gated behind `experiments` feature flag).
607pub(crate) struct ExperimentState {
608    pub(crate) config: crate::config::ExperimentConfig,
609    /// Cancellation token for a running experiment session. `Some` means an experiment is active.
610    pub(crate) cancel: Option<tokio_util::sync::CancellationToken>,
611    /// Pre-built config snapshot used as the experiment baseline (agent path).
612    pub(crate) baseline: zeph_experiments::ConfigSnapshot,
613    /// Dedicated judge provider for evaluation. When `Some`, the evaluator uses this provider
614    /// instead of the agent's primary provider, eliminating self-judge bias.
615    pub(crate) eval_provider: Option<AnyProvider>,
616    /// Receives completion/error messages from the background experiment engine task.
617    /// Always present so the select! branch compiles unconditionally.
618    pub(crate) notify_rx: Option<tokio::sync::mpsc::Receiver<String>>,
619    /// Sender end paired with `experiment_notify_rx`. Cloned into the background task.
620    pub(crate) notify_tx: tokio::sync::mpsc::Sender<String>,
621}
622
623/// Groups context-compression feature state (gated behind `context-compression` feature flag).
624#[derive(Default)]
625pub(crate) struct CompressionState {
626    /// Cached task goal for TaskAware/MIG pruning. Set by `maybe_compact()`,
627    /// invalidated when the last user message hash changes.
628    pub(crate) current_task_goal: Option<String>,
629    /// Hash of the last user message when `current_task_goal` was populated.
630    pub(crate) task_goal_user_msg_hash: Option<u64>,
631    /// Pending background task for goal extraction. Spawned when the user message hash changes;
632    /// result applied at the start of the next Soft compaction (#1909).
633    pub(crate) pending_task_goal:
634        Option<zeph_common::task_supervisor::BlockingHandle<Option<String>>>,
635    /// Pending `SideQuest` eviction result from the background LLM call spawned last turn.
636    /// Applied at the START of the next turn before compaction (PERF-1 fix).
637    pub(crate) pending_sidequest_result:
638        Option<zeph_common::task_supervisor::BlockingHandle<Option<Vec<usize>>>>,
639    /// In-memory subgoal registry for `Subgoal`/`SubgoalMig` pruning strategies (#2022).
640    pub(crate) subgoal_registry: zeph_agent_context::SubgoalRegistry,
641    /// Pending background subgoal extraction task.
642    pub(crate) pending_subgoal: Option<
643        zeph_common::task_supervisor::BlockingHandle<
644            Option<zeph_agent_context::SubgoalExtractionResult>,
645        >,
646    >,
647    /// Hash of the last user message when subgoal extraction was scheduled.
648    pub(crate) subgoal_user_msg_hash: Option<u64>,
649    /// Shared typed-page state (#3630). `None` when `typed_pages.enabled = false`.
650    pub(crate) typed_pages_state: Option<Arc<zeph_context::typed_page::TypedPagesState>>,
651}
652
653/// Groups runtime tool filtering, dependency tracking, and iteration bookkeeping.
654#[derive(Default)]
655pub(crate) struct ToolState {
656    /// Dynamic tool schema filter: pre-computed tool embeddings for per-turn filtering (#2020).
657    pub(crate) tool_schema_filter: Option<zeph_tools::ToolSchemaFilter>,
658    /// Cached filtered tool IDs for the current user turn.
659    pub(crate) cached_filtered_tool_ids: Option<HashSet<String>>,
660    /// Tool dependency graph for sequential tool availability (#2024).
661    pub(crate) dependency_graph: Option<zeph_tools::ToolDependencyGraph>,
662    /// Always-on tool IDs, mirrored from the tool schema filter for dependency gate bypass.
663    pub(crate) dependency_always_on: HashSet<String>,
664    /// Tool IDs that completed successfully in the current session.
665    pub(crate) completed_tool_ids: HashSet<String>,
666    /// Current tool loop iteration index within the active user turn.
667    pub(crate) current_tool_iteration: usize,
668    /// PASTE pattern store for tool invocation history and prediction (#3642).
669    ///
670    /// `Some` only when `config.tools.speculative.mode` is `Pattern` or `Both`.
671    pub(crate) pattern_store: Option<Arc<crate::agent::speculative::paste::PatternStore>>,
672    /// Per-turn mapping from tool name to `(skill_name, skill_hash)`, populated at skill
673    /// activation and used by `observe()` to attribute tool completions to their owning skill.
674    pub(crate) tool_to_skill: HashMap<String, (String, String)>,
675    /// Last tool executed per skill in the current turn, keyed by skill name.
676    /// Used as `prev_tool` for PASTE pattern transition recording.
677    pub(crate) last_tool_per_skill: HashMap<String, String>,
678}
679
680/// Groups per-session I/O and policy state.
681pub(crate) struct SessionState {
682    pub(crate) env_context: EnvironmentContext,
683    /// Timestamp of the last assistant message appended to context.
684    /// Used by time-based microcompact to compute session idle gap (#2699).
685    /// `None` before the first assistant response.
686    pub(crate) last_assistant_at: Option<Instant>,
687    pub(crate) response_cache: Option<std::sync::Arc<zeph_memory::ResponseCache>>,
688    /// Parent tool call ID when this agent runs as a subagent inside another agent session.
689    /// Propagated into every `LoopbackEvent::ToolStart` / `ToolOutput` so the IDE can build
690    /// a subagent hierarchy.
691    pub(crate) parent_tool_use_id: Option<String>,
692    /// Current-turn intent snapshot for VIGIL. `None` between turns.
693    ///
694    /// Set at the top of `process_user_message` (before any tool call) to the first 1024 chars
695    /// of the user message. Cleared at `end_turn`, on `/clear`, and on any turn-abort path.
696    /// Never shared across turns or propagated into subagents.
697    pub(crate) current_turn_intent: Option<String>,
698    /// Optional status channel for sending spinner/status messages to TUI or stderr.
699    pub(crate) status_tx: Option<tokio::sync::mpsc::UnboundedSender<String>>,
700    /// LSP context injection hooks. Fires after native tool execution, injects
701    /// diagnostics/hover notes as `Role::System` messages before the next LLM call.
702    pub(crate) lsp_hooks: Option<crate::lsp_hooks::LspHookRunner>,
703    /// Snapshot of the policy config for `/policy` command inspection.
704    pub(crate) policy_config: Option<zeph_tools::PolicyConfig>,
705    /// `CwdChanged` hook definitions extracted from `[hooks]` config.
706    pub(crate) hooks_config: HooksConfigSnapshot,
707}
708
709/// Extracted hook lists from `[hooks]` config, stored in `SessionState`.
710#[derive(Default)]
711pub(crate) struct HooksConfigSnapshot {
712    /// Hooks fired when working directory changes.
713    pub(crate) cwd_changed: Vec<zeph_config::HookDef>,
714    /// Hooks fired when a watched file changes.
715    pub(crate) file_changed_hooks: Vec<zeph_config::HookDef>,
716    /// Hooks fired when a tool execution is blocked by a `RuntimeLayer::before_tool` check.
717    pub(crate) permission_denied: Vec<zeph_config::HookDef>,
718    /// Hooks fired after each agent turn completes (#3327).
719    ///
720    /// Populated from `HooksConfig::turn_complete` at session construction. Shares the
721    /// `Notifier::should_fire` gate when a notifier is configured; fires on every completion
722    /// when no notifier is present.
723    pub(crate) turn_complete: Vec<zeph_config::HookDef>,
724}
725
726// Groups message buffering and image staging state.
727pub(crate) struct MessageState {
728    pub(crate) messages: Vec<Message>,
729    // QueuedMessage is pub(super) in message_queue — same visibility as this struct; lint suppressed.
730    #[allow(private_interfaces)]
731    pub(crate) message_queue: VecDeque<QueuedMessage>,
732    /// Image parts staged by `/image` commands, attached to the next user message.
733    pub(crate) pending_image_parts: Vec<zeph_llm::provider::MessagePart>,
734    /// DB row ID of the most recently persisted message. Set by `persist_message`;
735    /// consumed by `push_message` call sites to populate `metadata.db_id` on in-memory messages.
736    pub(crate) last_persisted_message_id: Option<i64>,
737    /// DB message IDs pending hide after deferred tool pair summarization.
738    pub(crate) deferred_db_hide_ids: Vec<i64>,
739    /// Summary texts pending insertion after deferred tool pair summarization.
740    pub(crate) deferred_db_summaries: Vec<String>,
741}
742
743impl McpState {
744    /// Write the **full** `self.tools` set to the shared executor `RwLock`.
745    ///
746    /// This is the first of two writers to `shared_tools`. Within a turn this method must run
747    /// **before** `apply_pruned_tools`, which writes the pruned subset. The normal call order
748    /// guarantees this: tool-list change events call this method, and pruning runs later inside
749    /// `rebuild_system_prompt`. See also: `apply_pruned_tools`.
750    pub(crate) fn sync_executor_tools(&self) {
751        if let Some(ref shared) = self.shared_tools {
752            shared.write().clone_from(&self.tools);
753        }
754    }
755
756    /// Write the **pruned** tool subset to the shared executor `RwLock`.
757    ///
758    /// Must only be called **after** `sync_executor_tools` has established the full tool set for
759    /// the current turn. `self.tools` (the full set) is intentionally **not** modified.
760    ///
761    /// This method must **NOT** call `sync_executor_tools` internally — doing so would overwrite
762    /// the pruned subset with the full set. See also: `sync_executor_tools`.
763    pub(crate) fn apply_pruned_tools(&self, pruned: Vec<zeph_mcp::McpTool>) {
764        debug_assert!(
765            pruned.iter().all(|p| self
766                .tools
767                .iter()
768                .any(|t| t.server_id == p.server_id && t.name == p.name)),
769            "pruned set must be a subset of self.tools"
770        );
771        if let Some(ref shared) = self.shared_tools {
772            *shared.write() = pruned;
773        }
774    }
775
776    #[cfg(test)]
777    pub(crate) fn tool_count(&self) -> usize {
778        self.tools.len()
779    }
780}
781
782impl IndexState {
783    #[tracing::instrument(name = "core.index.fetch_code_rag", skip(self), fields(%query, token_budget))]
784    pub(crate) async fn fetch_code_rag(
785        &self,
786        query: &str,
787        token_budget: usize,
788    ) -> Result<Option<String>, crate::agent::error::AgentError> {
789        let Some(retriever) = &self.retriever else {
790            return Ok(None);
791        };
792        if token_budget == 0 {
793            return Ok(None);
794        }
795
796        let result = retriever
797            .retrieve(query, token_budget)
798            .await
799            .map_err(|e| crate::agent::error::AgentError::ContextError(format!("{e:#}")))?;
800        let context_text = zeph_index::retriever::format_as_context(&result);
801
802        if context_text.is_empty() {
803            Ok(None)
804        } else {
805            tracing::debug!(
806                strategy = ?result.strategy,
807                chunks = result.chunks.len(),
808                tokens = result.total_tokens,
809                "code context fetched"
810            );
811            Ok(Some(context_text))
812        }
813    }
814}
815
816impl DebugState {
817    pub(crate) fn start_iteration_span(&mut self, iteration_index: usize, text: &str) {
818        if let Some(ref mut tc) = self.trace_collector {
819            tc.begin_iteration(iteration_index, text);
820            self.current_iteration_span_id = tc.current_iteration_span_id(iteration_index);
821        }
822    }
823
824    pub(crate) fn end_iteration_span(
825        &mut self,
826        iteration_index: usize,
827        status: crate::debug_dump::trace::SpanStatus,
828    ) {
829        if let Some(ref mut tc) = self.trace_collector {
830            tc.end_iteration(iteration_index, status);
831        }
832        self.current_iteration_span_id = None;
833    }
834
835    pub(crate) fn switch_format(&mut self, new_format: crate::debug_dump::DumpFormat) {
836        let was_trace = self.dump_format == crate::debug_dump::DumpFormat::Trace;
837        let now_trace = new_format == crate::debug_dump::DumpFormat::Trace;
838
839        if now_trace
840            && !was_trace
841            && let Some(ref dump_dir) = self.dump_dir.clone()
842        {
843            let service_name = self.trace_service_name.clone();
844            let redact = self.trace_redact;
845            match crate::debug_dump::trace::TracingCollector::new(
846                dump_dir.as_path(),
847                &service_name,
848                redact,
849                None,
850            ) {
851                Ok(collector) => {
852                    self.trace_collector = Some(collector);
853                }
854                Err(e) => {
855                    tracing::warn!(error = %e, "failed to create TracingCollector on format switch");
856                }
857            }
858        }
859        if was_trace
860            && !now_trace
861            && let Some(mut tc) = self.trace_collector.take()
862        {
863            tc.finish();
864        }
865
866        self.dump_format = new_format;
867    }
868
869    pub(crate) fn write_chat_debug_dump(
870        &self,
871        dump_id: Option<u32>,
872        result: &zeph_llm::provider::ChatResponse,
873        pii_filter: &zeph_sanitizer::pii::PiiFilter,
874    ) {
875        let Some((d, id)) = self.debug_dumper.as_ref().zip(dump_id) else {
876            return;
877        };
878        let raw = match result {
879            zeph_llm::provider::ChatResponse::Text(t) => t.clone(),
880            zeph_llm::provider::ChatResponse::ToolUse {
881                text, tool_calls, ..
882            } => {
883                let calls = serde_json::to_string_pretty(tool_calls).unwrap_or_default();
884                format!(
885                    "{}\n\n---TOOL_CALLS---\n{calls}",
886                    text.as_deref().unwrap_or("")
887                )
888            }
889        };
890        let text = if pii_filter.is_enabled() {
891            pii_filter.scrub(&raw).into_owned()
892        } else {
893            raw
894        };
895        d.dump_response(id, &text);
896    }
897}
898
899impl Default for McpState {
900    fn default() -> Self {
901        Self {
902            tools: Vec::new(),
903            registry: None,
904            manager: None,
905            allowed_commands: Vec::new(),
906            max_dynamic: 10,
907            elicitation_rx: None,
908            shared_tools: None,
909            tool_rx: None,
910            server_outcomes: Vec::new(),
911            pruning_cache: zeph_mcp::PruningCache::new(),
912            pruning_provider: None,
913            pruning_enabled: false,
914            pruning_params: zeph_mcp::PruningParams::default(),
915            semantic_index: None,
916            discovery_strategy: zeph_mcp::ToolDiscoveryStrategy::default(),
917            discovery_params: zeph_mcp::DiscoveryParams::default(),
918            discovery_provider: None,
919            elicitation_warn_sensitive_fields: true,
920            pending_semantic_rebuild: false,
921        }
922    }
923}
924
925impl Default for IndexState {
926    fn default() -> Self {
927        Self {
928            retriever: None,
929            repo_map_tokens: 0,
930            cached_repo_map: None,
931            repo_map_ttl: std::time::Duration::from_mins(5),
932        }
933    }
934}
935
936impl Default for DebugState {
937    fn default() -> Self {
938        Self {
939            debug_dumper: None,
940            dump_format: crate::debug_dump::DumpFormat::default(),
941            trace_collector: None,
942            iteration_counter: 0,
943            anomaly_detector: None,
944            reasoning_model_warning: true,
945            logging_config: crate::config::LoggingConfig::default(),
946            dump_dir: None,
947            trace_service_name: String::new(),
948            trace_redact: true,
949            current_iteration_span_id: None,
950        }
951    }
952}
953
954impl Default for FeedbackState {
955    fn default() -> Self {
956        Self {
957            detector: zeph_agent_feedback::FeedbackDetector::new(0.6),
958            judge: None,
959            llm_classifier: None,
960        }
961    }
962}
963
964/// Goal lifecycle feature configuration stored in `RuntimeConfig`.
965#[derive(Debug, Clone)]
966pub(crate) struct GoalRuntimeConfig {
967    /// Whether goal tracking is enabled.
968    pub(crate) enabled: bool,
969    /// Maximum allowed length (in Unicode chars) of goal text at creation.
970    pub(crate) max_text_chars: usize,
971    /// Default token budget for new goals (0 = unlimited).
972    pub(crate) default_token_budget: u64,
973    /// Whether to inject the active goal block into the volatile system prompt region.
974    pub(crate) inject_into_system_prompt: bool,
975}
976
977impl Default for GoalRuntimeConfig {
978    fn default() -> Self {
979        Self {
980            enabled: false,
981            max_text_chars: 2000,
982            default_token_budget: 0,
983            inject_into_system_prompt: true,
984        }
985    }
986}
987
988impl Default for RuntimeConfig {
989    fn default() -> Self {
990        Self {
991            security: SecurityConfig::default(),
992            timeouts: TimeoutConfig::default(),
993            model_name: String::new(),
994            active_provider_name: String::new(),
995            permission_policy: zeph_tools::PermissionPolicy::default(),
996            redact_credentials: true,
997            rate_limiter: super::rate_limiter::ToolRateLimiter::new(
998                super::rate_limiter::RateLimitConfig::default(),
999            ),
1000            semantic_cache_enabled: false,
1001            semantic_cache_threshold: 0.95,
1002            semantic_cache_max_candidates: 10,
1003            dependency_config: zeph_tools::DependencyConfig::default(),
1004            adversarial_policy_info: None,
1005            spawn_depth: 0,
1006            budget_hint_enabled: true,
1007            channel_skills: zeph_config::ChannelSkillsConfig::default(),
1008            loop_min_interval_secs: 5,
1009            layers: Vec::new(),
1010            supervisor_config: crate::config::TaskSupervisorConfig::default(),
1011            recap_config: zeph_config::RecapConfig::default(),
1012            acp_config: zeph_config::AcpConfig::default(),
1013            auto_recap_shown: false,
1014            msg_count_at_resume: 0,
1015            acp_subagent_spawn_fn: None,
1016            channel_type: String::new(),
1017            provider_persistence_enabled: true,
1018            goals: GoalRuntimeConfig::default(),
1019        }
1020    }
1021}
1022
1023impl SessionState {
1024    pub(crate) fn new() -> Self {
1025        Self {
1026            env_context: EnvironmentContext::gather(""),
1027            last_assistant_at: None,
1028            response_cache: None,
1029            parent_tool_use_id: None,
1030            current_turn_intent: None,
1031            status_tx: None,
1032            lsp_hooks: None,
1033            policy_config: None,
1034            hooks_config: HooksConfigSnapshot::default(),
1035        }
1036    }
1037}
1038
1039impl SkillState {
1040    pub(crate) fn new(
1041        registry: Arc<RwLock<SkillRegistry>>,
1042        matcher: Option<SkillMatcherBackend>,
1043        max_active_skills: usize,
1044        last_skills_prompt: String,
1045    ) -> Self {
1046        Self {
1047            registry,
1048            trust_snapshot: Arc::new(RwLock::new(HashMap::new())),
1049            skill_paths: Vec::new(),
1050            managed_dir: None,
1051            trust_config: crate::config::TrustConfig::default(),
1052            matcher,
1053            max_active_skills,
1054            disambiguation_threshold: 0.20,
1055            min_injection_score: 0.20,
1056            embedding_model: String::new(),
1057            skill_reload_rx: None,
1058            plugin_dirs_supplier: None,
1059            active_skill_names: Vec::new(),
1060            last_skills_prompt,
1061            prompt_mode: crate::config::SkillPromptMode::Auto,
1062            available_custom_secrets: HashMap::new(),
1063            cosine_weight: 0.7,
1064            hybrid_search: true,
1065            bm25_index: None,
1066            two_stage_matching: false,
1067            confusability_threshold: 0.0,
1068            rl_head: None,
1069            rl_weight: 0.3,
1070            rl_warmup_updates: 50,
1071            generation_output_dir: None,
1072            generation_provider_name: String::new(),
1073            skill_evaluator: None,
1074            eval_weights: zeph_skills::evaluator::EvaluationWeights::default(),
1075            eval_threshold: 0.60,
1076        }
1077    }
1078}
1079
1080impl LifecycleState {
1081    pub(crate) fn new() -> Self {
1082        let (_tx, rx) = watch::channel(false);
1083        Self {
1084            shutdown: rx,
1085            start_time: Instant::now(),
1086            cancel_signal: Arc::new(tokio::sync::Notify::new()),
1087            cancel_token: tokio_util::sync::CancellationToken::new(),
1088            cancel_bridge_handle: None,
1089            config_path: None,
1090            config_reload_rx: None,
1091            plugins_dir: PathBuf::new(),
1092            startup_shell_overlay: ShellOverlaySnapshot::default(),
1093            shell_policy_handle: None,
1094            warmup_ready: None,
1095            update_notify_rx: None,
1096            custom_task_rx: None,
1097            user_loop: None,
1098            last_known_cwd: std::env::current_dir().unwrap_or_default(),
1099            file_changed_rx: None,
1100            file_watcher: None,
1101            supervisor: super::agent_supervisor::BackgroundSupervisor::new(
1102                &crate::config::TaskSupervisorConfig::default(),
1103                None,
1104            ),
1105            notifier: None,
1106            turn_llm_requests: 0,
1107            last_no_providers_at: None,
1108            pending_background_completions: VecDeque::new(),
1109            background_completion_rx: None,
1110            shell_executor_handle: None,
1111            task_supervisor: Arc::new(zeph_common::TaskSupervisor::new(
1112                tokio_util::sync::CancellationToken::new(),
1113            )),
1114        }
1115    }
1116}
1117
1118impl ProviderState {
1119    pub(crate) fn new(initial_prompt_tokens: u64) -> Self {
1120        Self {
1121            summary_provider: None,
1122            provider_override: None,
1123            judge_provider: None,
1124            probe_provider: None,
1125            compress_provider: None,
1126            cached_prompt_tokens: initial_prompt_tokens,
1127            server_compaction_active: false,
1128            stt: None,
1129            provider_pool: Vec::new(),
1130            provider_config_snapshot: None,
1131        }
1132    }
1133}
1134
1135impl MetricsState {
1136    pub(crate) fn new(token_counter: Arc<zeph_memory::TokenCounter>) -> Self {
1137        Self {
1138            metrics_tx: None,
1139            cost_tracker: None,
1140            token_counter,
1141            extended_context: false,
1142            classifier_metrics: None,
1143            timing_window: std::collections::VecDeque::new(),
1144            pending_timings: crate::metrics::TurnTimings::default(),
1145            histogram_recorder: None,
1146        }
1147    }
1148}
1149
1150impl ExperimentState {
1151    pub(crate) fn new() -> Self {
1152        let (notify_tx, notify_rx) = tokio::sync::mpsc::channel::<String>(4);
1153        Self {
1154            config: crate::config::ExperimentConfig::default(),
1155            cancel: None,
1156            baseline: zeph_experiments::ConfigSnapshot::default(),
1157            eval_provider: None,
1158            notify_rx: Some(notify_rx),
1159            notify_tx,
1160        }
1161    }
1162}
1163
1164pub(super) mod security;
1165pub(super) mod skill;
1166
1167#[cfg(test)]
1168mod tests;