zeph_core/agent/state/mod.rs
1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Sub-struct definitions for the `Agent` struct.
5//!
6//! Each struct groups a related cluster of `Agent` fields.
7//! All types are `pub(crate)` — visible only within the `zeph-core` crate.
8//!
9//! `MemoryState` is decomposed into four concern-separated sub-structs, each in its own file:
10//!
11//! - [`MemoryPersistenceState`] — `SQLite` handles, conversation IDs, recall budgets, autosave
12//! - [`MemoryCompactionState`] — summarization thresholds, shutdown summary, digest, strategy
13//! - [`MemoryExtractionState`] — graph config, RPE router, document config, semantic labels
14//! - [`MemorySubsystemState`] — `TiMem`, `autoDream`, `MagicDocs`, microcompact
15
16pub(crate) mod compaction;
17pub(crate) mod extraction;
18pub(crate) mod persistence;
19pub(crate) mod runtime;
20pub(crate) mod services;
21pub(crate) mod subsystems;
22
23pub(crate) use self::compaction::MemoryCompactionState;
24pub(crate) use self::extraction::MemoryExtractionState;
25pub(crate) use self::persistence::MemoryPersistenceState;
26pub(crate) use self::runtime::AgentRuntime;
27pub(crate) use self::services::Services;
28pub(crate) use self::subsystems::MemorySubsystemState;
29
30use std::collections::{HashMap, HashSet, VecDeque};
31use std::path::PathBuf;
32use std::sync::Arc;
33
34use parking_lot::RwLock;
35use std::time::Instant;
36
37use tokio::sync::{Notify, mpsc, watch};
38use tokio::time::Interval;
39use tokio_util::sync::CancellationToken;
40use zeph_llm::any::AnyProvider;
41use zeph_llm::provider::Message;
42use zeph_llm::stt::SpeechToText;
43
44use crate::config::{ProviderEntry, SecurityConfig, SkillPromptMode, TimeoutConfig};
45use crate::config_watcher::ConfigEvent;
46use crate::context::EnvironmentContext;
47use crate::cost::CostTracker;
48use crate::file_watcher::FileChangedEvent;
49use crate::instructions::{InstructionBlock, InstructionEvent, InstructionReloadState};
50use crate::metrics::MetricsSnapshot;
51use crate::vault::Secret;
52use zeph_config;
53use zeph_memory::TokenCounter;
54use zeph_sanitizer::ContentSanitizer;
55use zeph_sanitizer::quarantine::QuarantinedSummarizer;
56use zeph_skills::matcher::SkillMatcherBackend;
57use zeph_skills::registry::SkillRegistry;
58use zeph_skills::watcher::SkillEvent;
59use zeroize::Zeroizing;
60
61use super::message_queue::QueuedMessage;
62
63/// Coordinator struct holding four concern-separated sub-structs for memory management.
64///
65/// Each sub-struct groups fields by a single concern:
66/// - [`persistence`](MemoryPersistenceState) — `SQLite` handles, conversation IDs, recall budgets
67/// - [`compaction`](MemoryCompactionState) — summarization thresholds, shutdown summary, digest
68/// - [`extraction`](MemoryExtractionState) — graph config, RPE router, semantic labels
69/// - [`subsystems`](MemorySubsystemState) — `TiMem`, `autoDream`, `MagicDocs`, microcompact
70#[derive(Default)]
71pub(crate) struct MemoryState {
72 /// `SQLite` handles, conversation IDs, recall budgets, and autosave policy.
73 pub(crate) persistence: MemoryPersistenceState,
74 /// Summarization thresholds, shutdown summary, digest config, and context strategy.
75 pub(crate) compaction: MemoryCompactionState,
76 /// Graph extraction config, RPE router, document config, and semantic label configs.
77 pub(crate) extraction: MemoryExtractionState,
78 /// `TiMem`, `autoDream`, `MagicDocs`, and microcompact subsystem state.
79 pub(crate) subsystems: MemorySubsystemState,
80}
81
82#[allow(clippy::struct_excessive_bools)]
83pub(crate) struct SkillState {
84 pub(crate) registry: Arc<RwLock<SkillRegistry>>,
85 /// Per-turn trust snapshot written by `prepare_context` after `build_skill_trust_map`.
86 /// Shared with `SkillInvokeExecutor` so it can resolve trust without hitting `SQLite`
87 /// on every tool call. Refreshed once per turn — stale by at most one turn.
88 /// Carries full `SkillTrustSnapshot` (level + `requires_trust_check` + `blake3_hash`) so
89 /// `SkillInvokeExecutor` can perform per-invocation re-hash when the flag is set.
90 pub(crate) trust_snapshot:
91 Arc<RwLock<HashMap<String, crate::skill_invoker::SkillTrustSnapshot>>>,
92 pub(crate) skill_paths: Vec<PathBuf>,
93 pub(crate) managed_dir: Option<PathBuf>,
94 pub(crate) trust_config: crate::config::TrustConfig,
95 pub(crate) matcher: Option<SkillMatcherBackend>,
96 pub(crate) max_active_skills: usize,
97 pub(crate) disambiguation_threshold: f32,
98 pub(crate) min_injection_score: f32,
99 pub(crate) embedding_model: String,
100 pub(crate) skill_reload_rx: Option<mpsc::Receiver<SkillEvent>>,
101 /// Resolves the current set of per-plugin skill dirs at reload time.
102 ///
103 /// Called inside `reload_skills()` so that plugins installed via `/plugins add` after
104 /// startup are discovered on the next watcher event without restarting the agent.
105 pub(crate) plugin_dirs_supplier: Option<Arc<dyn Fn() -> Vec<PathBuf> + Send + Sync>>,
106 pub(crate) active_skill_names: Vec<String>,
107 pub(crate) last_skills_prompt: String,
108 pub(crate) prompt_mode: SkillPromptMode,
109 /// Custom secrets available at runtime: key=hyphenated name, value=secret.
110 pub(crate) available_custom_secrets: HashMap<String, Secret>,
111 pub(crate) cosine_weight: f32,
112 pub(crate) hybrid_search: bool,
113 /// Linear blend weight for BM25 hybrid fusion: `fused = bm25_alpha * cosine + (1-bm25_alpha) * bm25_norm`.
114 /// Clamped to `[0.0, 1.0]` at config load. Default: `0.7`.
115 pub(crate) bm25_alpha: f32,
116 pub(crate) bm25_index: Option<zeph_skills::bm25::Bm25Index>,
117 pub(crate) two_stage_matching: bool,
118 /// Threshold for confusability warnings (0.0 = disabled).
119 pub(crate) confusability_threshold: f32,
120 /// `SkillOrchestra` RL routing head. `Some` when `rl_routing_enabled = true` and
121 /// weights are loaded or initialized. `None` when RL routing is disabled.
122 pub(crate) rl_head: Option<zeph_skills::rl_head::RoutingHead>,
123 /// Blend weight for RL routing: `final = (1-rl_weight)*cosine + rl_weight*rl_score`.
124 pub(crate) rl_weight: f32,
125 /// Skip RL blending for the first N updates (cold-start warmup).
126 pub(crate) rl_warmup_updates: u32,
127 /// Directory where `/skill create` writes generated skills.
128 /// Defaults to `managed_dir` if `None`.
129 pub(crate) generation_output_dir: Option<std::path::PathBuf>,
130 /// Provider name for query rewriting before skill matching. Empty = disabled.
131 pub(crate) query_rewrite_provider_name: String,
132 /// Provider name for `/skill create` generation. Empty = primary.
133 pub(crate) generation_provider_name: String,
134 /// Provider name for skill disambiguation LLM calls. Empty = primary.
135 pub(crate) disambiguate_provider_name: String,
136 /// Timeout in milliseconds for `/skill create` LLM generation. Default: 60 000.
137 pub(crate) generation_timeout_ms: u64,
138 /// Optional quality-gate evaluator for generated SKILL.md files (#3319).
139 ///
140 /// When `Some`, the evaluator is attached to every `SkillGenerator` instance so that
141 /// generated skills are scored before being written to disk.
142 pub(crate) skill_evaluator: Option<std::sync::Arc<zeph_skills::evaluator::SkillEvaluator>>,
143 /// Weights for the evaluator composite score — forwarded to `SkillGenerator::with_evaluator`.
144 pub(crate) eval_weights: zeph_skills::evaluator::EvaluationWeights,
145 /// Minimum composite score required to accept a generated skill (forwarded to the generator).
146 pub(crate) eval_threshold: f32,
147 /// Enable `GoSkills` group-structured skill injection.
148 pub(crate) group_structured: bool,
149 /// Inter-skill cosine similarity threshold for `GoSkills` grouping.
150 pub(crate) support_similarity_threshold: f32,
151 /// Whether Stage-2 LLM semantic compliance scan is enabled on `plugin add`.
152 pub(crate) semantic_scan: bool,
153 /// Provider name for the semantic scan LLM. Empty = use primary provider.
154 pub(crate) semantic_scan_provider: String,
155}
156
157pub(crate) struct McpState {
158 pub(crate) tools: Vec<zeph_mcp::McpTool>,
159 pub(crate) registry: Option<zeph_mcp::McpToolRegistry>,
160 pub(crate) manager: Option<std::sync::Arc<zeph_mcp::McpManager>>,
161 pub(crate) allowed_commands: Vec<String>,
162 pub(crate) max_dynamic: usize,
163 /// Receives elicitation requests from MCP server handlers during tool execution.
164 /// When `Some`, the agent loop must process these concurrently with tool result awaiting
165 /// to avoid deadlock (tool result waits for elicitation, elicitation waits for agent loop).
166 pub(crate) elicitation_rx: Option<tokio::sync::mpsc::Receiver<zeph_mcp::ElicitationEvent>>,
167 /// Shared with `McpToolExecutor` so native `tool_use` sees the current tool list.
168 ///
169 /// Two methods write to this `RwLock` — ordering matters:
170 /// - `sync_executor_tools()`: writes the **full** `self.tools` set.
171 /// - `apply_pruned_tools()`: writes the **pruned** subset (used after pruning).
172 ///
173 /// Within a turn, `sync_executor_tools` must always run **before**
174 /// `apply_pruned_tools`. The normal call order guarantees this: tool-list
175 /// change events call `sync_executor_tools` (inside `check_tool_refresh`,
176 /// `handle_mcp_add`, `handle_mcp_remove`), and pruning runs later inside
177 /// `rebuild_system_prompt`. See also: `apply_pruned_tools`.
178 pub(crate) shared_tools: Option<Arc<RwLock<Vec<zeph_mcp::McpTool>>>>,
179 /// Receives full flattened tool list after any `tools/list_changed` notification.
180 pub(crate) tool_rx: Option<tokio::sync::watch::Receiver<Vec<zeph_mcp::McpTool>>>,
181 /// Per-server connection outcomes from the initial `connect_all()` call.
182 pub(crate) server_outcomes: Vec<zeph_mcp::ServerConnectOutcome>,
183 /// Per-message cache for MCP tool pruning results (#2298).
184 ///
185 /// Reset at the start of each user turn and whenever the MCP tool list
186 /// changes (via `tools/list_changed`, `/mcp add`, or `/mcp remove`).
187 pub(crate) pruning_cache: zeph_mcp::PruningCache,
188 /// Dedicated provider for MCP tool pruning LLM calls.
189 ///
190 /// `None` means fall back to the agent's primary provider.
191 /// Resolved from `[[llm.providers]]` at build time using `pruning_provider`
192 /// from `ToolPruningConfig`.
193 pub(crate) pruning_provider: Option<zeph_llm::any::AnyProvider>,
194 /// Whether MCP tool pruning is enabled. Mirrors `ToolPruningConfig::enabled`.
195 pub(crate) pruning_enabled: bool,
196 /// Pruning parameters snapshot. Derived from `ToolPruningConfig` at build time.
197 pub(crate) pruning_params: zeph_mcp::PruningParams,
198 /// Pre-computed semantic tool index for embedding-based discovery (#2321).
199 ///
200 /// Built at connect time via `rebuild_semantic_index()`, rebuilt on tool list change.
201 /// `None` when strategy is not `Embedding` or when build failed (fallback to all tools).
202 pub(crate) semantic_index: Option<zeph_mcp::SemanticToolIndex>,
203 /// Active discovery strategy and parameters. Derived from `ToolDiscoveryConfig`.
204 pub(crate) discovery_strategy: zeph_mcp::ToolDiscoveryStrategy,
205 /// Discovery parameters snapshot. Derived from `ToolDiscoveryConfig` at build time.
206 pub(crate) discovery_params: zeph_mcp::DiscoveryParams,
207 /// Dedicated embedding provider for tool discovery. `None` = fall back to the
208 /// agent's primary embedding provider.
209 pub(crate) discovery_provider: Option<zeph_llm::any::AnyProvider>,
210 /// When `true`, show a security warning before prompting for fields whose names
211 /// match sensitive patterns (password, token, secret, key, credential, etc.).
212 pub(crate) elicitation_warn_sensitive_fields: bool,
213 /// When `true`, semantic index and registry need to be rebuilt at the next opportunity.
214 ///
215 /// Set after `/mcp add` or `/mcp remove` when called via `AgentAccess::handle_mcp`,
216 /// which cannot call `rebuild_semantic_index` and `sync_mcp_registry` directly because
217 /// those are `async fn(&mut self)` and their futures are `!Send` (they hold `&mut Agent<C>`
218 /// across `.await`). The rebuild is deferred to `check_tool_refresh`, which runs at the
219 /// start of each turn without the `Box<dyn Future + Send>` constraint.
220 pub(crate) pending_semantic_rebuild: bool,
221}
222
223pub(crate) struct IndexState {
224 pub(crate) retriever: Option<std::sync::Arc<zeph_index::retriever::CodeRetriever>>,
225 pub(crate) repo_map_tokens: usize,
226 pub(crate) cached_repo_map: Option<(String, std::time::Instant)>,
227 pub(crate) repo_map_ttl: std::time::Duration,
228}
229
230/// Snapshot of adversarial policy gate configuration for status display.
231#[derive(Debug, Clone)]
232pub struct AdversarialPolicyInfo {
233 pub provider: String,
234 pub policy_count: usize,
235 pub fail_open: bool,
236}
237
238#[allow(clippy::struct_excessive_bools)] // independent boolean flags; bitflags or enum would obscure semantics without reducing complexity
239pub(crate) struct RuntimeConfig {
240 pub(crate) security: SecurityConfig,
241 pub(crate) timeouts: TimeoutConfig,
242 pub(crate) model_name: String,
243 /// Configured name from `[[llm.providers]]` (the `name` field), set at startup and on
244 /// `/provider` switch. Falls back to the provider type string when empty.
245 pub(crate) active_provider_name: String,
246 pub(crate) permission_policy: zeph_tools::PermissionPolicy,
247 pub(crate) redact_credentials: bool,
248 pub(crate) rate_limiter: super::rate_limiter::ToolRateLimiter,
249 pub(crate) semantic_cache_enabled: bool,
250 pub(crate) semantic_cache_threshold: f32,
251 pub(crate) semantic_cache_max_candidates: u32,
252 /// Dependency config snapshot stored for per-turn boost parameters.
253 pub(crate) dependency_config: zeph_tools::DependencyConfig,
254 /// Adversarial policy gate runtime info for /status display.
255 pub(crate) adversarial_policy_info: Option<AdversarialPolicyInfo>,
256 /// Current spawn depth of this agent instance (0 = top-level, 1 = first sub-agent, etc.).
257 /// Used by `build_spawn_context()` to propagate depth to children.
258 pub(crate) spawn_depth: u32,
259 /// Inject `<budget>` XML into the volatile system prompt section (#2267).
260 pub(crate) budget_hint_enabled: bool,
261 /// Per-channel skill allowlist. Skills not matching the allowlist are excluded from the
262 /// prompt. An empty `allowed` list means all skills are permitted (default).
263 pub(crate) channel_skills: zeph_config::ChannelSkillsConfig,
264 /// Per-channel tool allowlist. `None` = no restriction. `Some` = only listed tools permitted.
265 /// Populated from the active channel's `allowed_tools` config at agent build time.
266 pub(crate) channel_tool_allowlist: Option<Vec<String>>,
267 /// Minimum allowed interval for `/loop` ticks (seconds). Sourced from `[cli.loop] min_interval_secs`.
268 pub(crate) loop_min_interval_secs: u64,
269 /// Runtime middleware layers for LLM calls and tool dispatch (#2286).
270 ///
271 /// Default: empty vec (zero-cost — loops never iterate).
272 pub(crate) layers: Vec<std::sync::Arc<dyn crate::runtime_layer::RuntimeLayer>>,
273 /// Background supervisor config snapshot for turn-boundary abort logic.
274 pub(crate) supervisor_config: crate::config::TaskSupervisorConfig,
275 /// Session recap config (#3064).
276 pub(crate) recap_config: zeph_config::RecapConfig,
277 /// ACP server configuration snapshot for `/acp` slash-command display.
278 pub(crate) acp_config: zeph_config::AcpConfig,
279 /// Set to `true` after the auto-recap is emitted at session resume (#3144).
280 ///
281 /// Used by `/recap` to skip a redundant LLM call when no new messages have
282 /// been added since the auto-recap was shown.
283 pub(crate) auto_recap_shown: bool,
284 /// Number of non-system messages present when the session was resumed (#3144).
285 ///
286 /// Combined with `auto_recap_shown` to detect whether the user has added new
287 /// messages after the auto-recap was shown.
288 pub(crate) msg_count_at_resume: usize,
289 /// Callback that spawns an external ACP sub-agent process by shell command (#3302).
290 ///
291 /// Injected by the binary crate when the `acp` feature is enabled.
292 /// `None` in bare / non-ACP mode; callers must degrade gracefully.
293 pub(crate) acp_subagent_spawn_fn: Option<zeph_subagent::AcpSubagentSpawnFn>,
294 /// Channel type string used as part of the `(channel_type, channel_id)` persistence key.
295 ///
296 /// Set at build time from the active I/O channel (e.g. `"cli"`, `"tui"`, `"telegram"`).
297 /// Empty when channel identity has not been configured (persistence is skipped).
298 pub(crate) channel_type: String,
299 /// Whether provider preference persistence is enabled for this session (#3308).
300 ///
301 /// Controlled by `[session] provider_persistence = true` (the default). When `false`,
302 /// the stored provider preference is never read or written.
303 pub(crate) provider_persistence_enabled: bool,
304 /// Whether per-session provider override params (e.g. `reasoning_effort`) should be
305 /// persisted alongside the provider name (#4654).
306 ///
307 /// Only meaningful when `provider_persistence_enabled` is also `true`.
308 pub(crate) persist_provider_overrides_enabled: bool,
309 /// Guards against re-persisting during `restore_channel_provider` (#4654, F1).
310 ///
311 /// Set to `true` immediately before calling `provider_switch_as_string` inside the restore
312 /// path, cleared on every branch after the call. While `true`, `persist_channel_provider`
313 /// returns early without writing anything.
314 pub(crate) restoring_provider: bool,
315 /// Goal lifecycle feature configuration.
316 pub(crate) goals: GoalRuntimeConfig,
317}
318
319/// Groups feedback detection subsystems: correction detector, judge detector, and LLM classifier.
320pub(crate) struct FeedbackState {
321 pub(crate) detector: zeph_agent_feedback::FeedbackDetector,
322 pub(crate) judge: Option<zeph_agent_feedback::JudgeDetector>,
323 /// LLM-backed zero-shot classifier for `DetectorMode::Model`.
324 /// When `Some`, `spawn_judge_correction_check` uses this instead of `JudgeDetector`.
325 pub(crate) llm_classifier: Option<zeph_llm::classifier::llm::LlmClassifier>,
326}
327
328/// Groups security-related subsystems (sanitizer, quarantine, exfiltration guard).
329pub(crate) struct SecurityState {
330 pub(crate) sanitizer: ContentSanitizer,
331 pub(crate) quarantine_summarizer: Option<QuarantinedSummarizer>,
332 /// Whether this agent session is serving an ACP client.
333 /// When `true` and `mcp_to_acp_boundary` is enabled, MCP tool results
334 /// receive unconditional quarantine and cross-boundary audit logging.
335 pub(crate) is_acp_session: bool,
336 pub(crate) exfiltration_guard: zeph_sanitizer::exfiltration::ExfiltrationGuard,
337 pub(crate) flagged_urls: HashSet<String>,
338 /// URLs explicitly provided by the user across all turns in this session.
339 /// Populated from raw user message text; cleared on `/clear`.
340 /// Shared with `UrlGroundingVerifier` to check `fetch`/`web_scrape` calls at dispatch time.
341 pub(crate) user_provided_urls: Arc<RwLock<HashSet<String>>>,
342 pub(crate) pii_filter: zeph_sanitizer::pii::PiiFilter,
343 /// NER classifier for PII detection (`classifiers.ner_model`). When `Some`, the PII path
344 /// runs both regex (`pii_filter`) and NER, then merges spans before redaction.
345 /// `None` when `classifiers` feature is disabled or `classifiers.enabled = false`.
346 #[cfg(feature = "classifiers")]
347 pub(crate) pii_ner_backend: Option<std::sync::Arc<dyn zeph_llm::classifier::ClassifierBackend>>,
348 /// Per-call timeout for the NER PII classifier in milliseconds.
349 #[cfg(feature = "classifiers")]
350 pub(crate) pii_ner_timeout_ms: u64,
351 /// Maximum number of bytes passed to the NER PII classifier per call.
352 ///
353 /// Large tool outputs (e.g. `search_code`) can produce 150+ `DeBERTa` chunks and exceed
354 /// the per-call timeout. Input is truncated at a valid UTF-8 boundary before classification.
355 #[cfg(feature = "classifiers")]
356 pub(crate) pii_ner_max_chars: usize,
357 /// Circuit-breaker threshold: number of consecutive timeouts before NER is disabled.
358 /// `0` means the circuit breaker is disabled (NER is always attempted).
359 #[cfg(feature = "classifiers")]
360 pub(crate) pii_ner_circuit_breaker_threshold: u32,
361 /// Number of consecutive NER timeouts observed since the last successful call.
362 #[cfg(feature = "classifiers")]
363 pub(crate) pii_ner_consecutive_timeouts: u32,
364 /// Set to `true` when the circuit breaker trips. NER is skipped for the rest of the session.
365 #[cfg(feature = "classifiers")]
366 pub(crate) pii_ner_tripped: bool,
367 pub(crate) memory_validator: zeph_sanitizer::memory_validation::MemoryWriteValidator,
368 /// LLM-based prompt injection pre-screener (opt-in).
369 pub(crate) guardrail: Option<zeph_sanitizer::guardrail::GuardrailFilter>,
370 /// Post-LLM response verification layer.
371 pub(crate) response_verifier: zeph_sanitizer::response_verifier::ResponseVerifier,
372 /// Temporal causal IPI analyzer (opt-in, disabled when `None`).
373 pub(crate) causal_analyzer: Option<zeph_sanitizer::causal_ipi::TurnCausalAnalyzer>,
374 /// VIGIL pre-sanitizer gate. `None` for subagent sessions (subagents are exempt).
375 /// Set at agent build time for top-level agents; skipped for subagents (high FP rate).
376 pub(crate) vigil: Option<crate::agent::vigil::VigilGate>,
377 /// Cross-turn risk accumulator (spec 050 Phase 1).
378 ///
379 /// `advance_turn()` MUST be called once per turn, before `PolicyGateExecutor::check_policy`.
380 /// Never expose score, level, or alerts to any LLM-callable surface.
381 pub(crate) trajectory: crate::agent::trajectory::TrajectorySentinel,
382 /// Shared risk-level slot for `PolicyGateExecutor` (spec 050).
383 ///
384 /// Written by the agent loop after each turn's `sentinel.current_risk()` call.
385 /// `PolicyGateExecutor::check_policy` reads it to downgrade `Allow` at `Critical`.
386 /// `u8` encoding: 0=Calm, 1=Elevated, 2=High, 3=Critical.
387 pub(crate) trajectory_risk_slot: zeph_tools::TrajectoryRiskSlot,
388 /// Pending risk signals from executor layers (spec 050 §2).
389 ///
390 /// `PolicyGateExecutor` and `ScopedToolExecutor` push signal codes here.
391 /// `begin_turn()` drains this queue into `trajectory.record()`.
392 pub(crate) trajectory_signal_queue: zeph_tools::RiskSignalQueue,
393 /// Persistent safety stream + LLM pre-execution probe (spec 050 Phase 2).
394 ///
395 /// `None` when `security.shadow_sentinel.enabled = false` (default).
396 /// When `Some`, `begin_turn()` calls `advance_turn()` to reset the per-turn probe counter.
397 pub(crate) shadow_sentinel:
398 Option<std::sync::Arc<crate::agent::shadow_sentinel::ShadowSentinel>>,
399 /// Per-turn multi-step attack chain accumulator.
400 ///
401 /// `None` by default. When `Some`, `begin_turn()` calls `reset()` to clear per-turn state.
402 /// The same `Arc` must be passed to `ShellExecutor::with_risk_chain` at build time.
403 pub(crate) risk_chain_accumulator: Option<std::sync::Arc<zeph_tools::RiskChainAccumulator>>,
404 /// MAGE trajectory risk accumulator (spec 004-16).
405 ///
406 /// Per-session in-memory accumulator that ingests sanitizer audit signals with exponential
407 /// temporal decay and gates tool execution when cumulative risk exceeds `risk_threshold`.
408 /// Initialized as noop when `memory.shadow_memory.enabled = false` (default).
409 /// `begin_turn()` calls `advance_turn()` then ingests pending signal codes.
410 pub(crate) mage_accumulator: zeph_memory::shadow::TrajectoryRiskAccumulator,
411 /// Per-session append-only shadow memory for cross-turn goal-drift detection (spec 010-7).
412 ///
413 /// `None` when `security.causal_ipi.shadow_memory.enabled = false` (default).
414 /// When `Some`, `process_tool_result_batch` records a `ShadowEvent` after each tool batch,
415 /// then calls `goal_drift_score()` and emits a `GoalDrift` security event when alerted.
416 pub(crate) shadow_memory: Option<zeph_sanitizer::ShadowMemory>,
417}
418
419/// Groups debug/diagnostics subsystems (dumper, trace collector, anomaly detector, logging config).
420pub(crate) struct DebugState {
421 pub(crate) debug_dumper: Option<crate::debug_dump::DebugDumper>,
422 pub(crate) dump_format: crate::debug_dump::DumpFormat,
423 pub(crate) trace_collector: Option<crate::debug_dump::trace::TracingCollector>,
424 /// Monotonically increasing counter for `process_user_message` calls.
425 /// Used to key spans in `trace_collector.active_iterations`.
426 pub(crate) iteration_counter: usize,
427 pub(crate) anomaly_detector: Option<zeph_tools::AnomalyDetector>,
428 /// Whether to emit `reasoning_amplification` warnings for quality failures from reasoning
429 /// models. Mirrors `AnomalyConfig::reasoning_model_warning`. Default: `true`.
430 pub(crate) reasoning_model_warning: bool,
431 pub(crate) logging_config: crate::config::LoggingConfig,
432 /// Base dump directory — stored so `/dump-format trace` can create a `TracingCollector` (CR-04).
433 pub(crate) dump_dir: Option<PathBuf>,
434 /// Service name for `TracingCollector` created via runtime format switch (CR-04).
435 pub(crate) trace_service_name: String,
436 /// Whether to redact in `TracingCollector` created via runtime format switch (CR-04).
437 pub(crate) trace_redact: bool,
438 /// User-defined resource attributes forwarded to `TracingCollector` (from `telemetry.trace_metadata`).
439 pub(crate) trace_metadata: std::collections::HashMap<String, String>,
440 /// Span ID of the currently executing iteration — used by LLM/tool span wiring (CR-01).
441 /// Set to `Some` at the start of `process_user_message`, cleared at end.
442 pub(crate) current_iteration_span_id: Option<[u8; 8]>,
443}
444
445/// Snapshot of the shell-level overlay baked in at startup.
446///
447/// Used in `reload_config` to detect when a hot-reload would produce a different shell
448/// restriction set than the one baked into the live `ShellExecutor` (M4 warn-on-divergence).
449#[derive(Debug, Clone, Default, PartialEq, Eq)]
450pub struct ShellOverlaySnapshot {
451 /// Sorted `blocked_commands` contributed by plugins.
452 pub blocked: Vec<String>,
453 /// Sorted `allowed_commands` after plugin intersection (empty if base was empty).
454 pub allowed: Vec<String>,
455}
456
457/// Runtime state for an active `/loop` session.
458///
459/// At most one loop is active at a time; `LifecycleState::user_loop` holds `Some` while
460/// the loop is running and `None` otherwise.
461pub(crate) struct LoopState {
462 /// The prompt text injected on each tick.
463 pub(crate) prompt: String,
464 /// Number of ticks fired so far.
465 pub(crate) iteration: u64,
466 /// Tick interval. `MissedTickBehavior::Skip` prevents burst catch-up.
467 pub(crate) interval: Interval,
468 /// Cancel handle. Dropped (and token cancelled) when loop is stopped.
469 pub(crate) cancel_tx: CancellationToken,
470}
471
472/// Groups agent lifecycle state: shutdown signaling, timing, and I/O notification channels.
473pub(crate) struct LifecycleState {
474 pub(crate) shutdown: watch::Receiver<bool>,
475 pub(crate) start_time: Instant,
476 pub(crate) cancel_signal: Arc<Notify>,
477 pub(crate) cancel_token: CancellationToken,
478 /// Handle to the cancel bridge task spawned each turn. Aborted before a new one is created
479 /// to prevent unbounded task accumulation across turns.
480 pub(crate) cancel_bridge_handle: Option<zeph_common::task_supervisor::BlockingHandle<()>>,
481 pub(crate) config_path: Option<PathBuf>,
482 pub(crate) config_reload_rx: Option<mpsc::Receiver<ConfigEvent>>,
483 /// Path to the plugins directory; used to re-apply overlays on hot-reload.
484 pub(crate) plugins_dir: PathBuf,
485 /// Shell overlay snapshot baked in at startup. Used to detect divergence on hot-reload.
486 pub(crate) startup_shell_overlay: ShellOverlaySnapshot,
487 /// Handle for live-rebuilding the `ShellExecutor`'s `blocked_commands` policy on hot-reload.
488 /// `None` when no `ShellExecutor` is in the executor chain (test harnesses, daemon-only modes).
489 pub(crate) shell_policy_handle: Option<zeph_tools::ShellPolicyHandle>,
490 pub(crate) warmup_ready: Option<watch::Receiver<bool>>,
491 pub(crate) update_notify_rx: Option<mpsc::Receiver<String>>,
492 pub(crate) custom_task_rx: Option<mpsc::Receiver<String>>,
493 /// Active `/loop` state. `None` when no loop is running.
494 pub(crate) user_loop: Option<LoopState>,
495 /// Last known process cwd. Compared after each tool call to detect changes.
496 pub(crate) last_known_cwd: PathBuf,
497 /// Receiver for file-change events from `FileChangeWatcher`. `None` when no paths configured.
498 pub(crate) file_changed_rx: Option<mpsc::Receiver<FileChangedEvent>>,
499 /// Keeps the `FileChangeWatcher` alive for the agent's lifetime. Dropping it aborts the watcher task.
500 pub(crate) file_watcher: Option<crate::file_watcher::FileChangeWatcher>,
501 /// Supervised background task manager. Owned by the agent; call `reap()` between turns
502 /// and `abort_all()` on shutdown.
503 pub(crate) supervisor: super::agent_supervisor::BackgroundSupervisor,
504 /// Per-turn completion notifier. `None` when `notifications.enabled = false`.
505 pub(crate) notifier: Option<crate::notifications::Notifier>,
506 /// Per-turn LLM request counter. Incremented by `process_response`; reset at turn start.
507 pub(crate) turn_llm_requests: u32,
508 /// Timestamp of the last turn that ended with `LlmError::NoProviders`.
509 ///
510 /// Used to gate `advance_context_lifecycle`: when all providers are down, context preparation
511 /// is skipped (degraded mode) until `no_providers_backoff_secs` has elapsed.
512 pub(crate) last_no_providers_at: Option<Instant>,
513 /// Completions from background shell runs waiting to be injected into the next turn.
514 ///
515 /// Drained at the top of `process_user_message_inner` after `supervisor.reap()`.
516 /// All pending completions and the real user message are merged into a **single**
517 /// user-role block to satisfy strict alternation requirements (Anthropic Messages API).
518 ///
519 /// Capacity is capped at `BACKGROUND_COMPLETION_BUFFER_CAP`. On overflow the oldest
520 /// entry is dropped and a placeholder is substituted so the LLM learns results were lost.
521 pub(crate) pending_background_completions:
522 VecDeque<zeph_tools::shell::background::BackgroundCompletion>,
523 /// Receiver end of the dedicated background-completion channel created alongside the
524 /// `ShellExecutor`. Polled at the top of each turn to drain completions into
525 /// `pending_background_completions`. `None` when no `ShellExecutor` is configured.
526 pub(crate) background_completion_rx:
527 Option<tokio::sync::mpsc::Receiver<zeph_tools::BackgroundCompletion>>,
528 /// Shared reference to the `ShellExecutor` used to query in-flight background run snapshots
529 /// for TUI metrics display. `None` when no `ShellExecutor` is wired (test harnesses, etc.).
530 pub(crate) shell_executor_handle: Option<std::sync::Arc<zeph_tools::ShellExecutor>>,
531 /// Session-level task supervisor, shared with bootstrap and TUI. Used to register
532 /// background agent tasks (cancel bridge, compaction, sidequest eviction) for
533 /// observability and graceful shutdown.
534 ///
535 /// Created with a fresh [`CancellationToken`] in `LifecycleState::new()` for test
536 /// harnesses; production code overwrites it via `Agent::with_task_supervisor`.
537 pub(crate) task_supervisor: Arc<zeph_common::TaskSupervisor>,
538}
539
540/// Minimal config snapshot needed to reconstruct a provider at runtime via `/provider <name>`.
541///
542/// Secrets are stored as plain strings because [`Secret`] intentionally does not implement
543/// `Clone`. They are re-wrapped in `Secret` when passed to `build_provider_for_switch`.
544pub struct ProviderConfigSnapshot {
545 pub claude_api_key: Option<String>,
546 pub openai_api_key: Option<String>,
547 pub gemini_api_key: Option<String>,
548 pub compatible_api_keys: std::collections::HashMap<String, String>,
549 pub llm_request_timeout_secs: u64,
550 pub embedding_model: String,
551 pub gonka_private_key: Option<Zeroizing<String>>,
552 pub gonka_address: Option<String>,
553 pub cocoon_access_hash: Option<String>,
554}
555
556/// Groups provider-related state: alternate providers, runtime switching, and compaction flags.
557pub(crate) struct ProviderState {
558 pub(crate) summary_provider: Option<AnyProvider>,
559 /// Shared slot for runtime model switching; set by external caller (e.g. ACP).
560 pub(crate) provider_override: Option<Arc<RwLock<Option<AnyProvider>>>>,
561 pub(crate) judge_provider: Option<AnyProvider>,
562 /// Dedicated provider for compaction probe LLM calls. Falls back to `summary_provider`
563 /// (or primary) when `None`.
564 pub(crate) probe_provider: Option<AnyProvider>,
565 /// Dedicated provider for `compress_context` LLM calls (#2356).
566 /// Falls back to the primary provider when `None`.
567 pub(crate) compress_provider: Option<AnyProvider>,
568 pub(crate) cached_prompt_tokens: u64,
569 /// Whether the active provider has server-side compaction enabled (Claude compact-2026-01-12).
570 /// When true, client-side compaction is skipped.
571 pub(crate) server_compaction_active: bool,
572 pub(crate) stt: Option<Box<dyn SpeechToText>>,
573 /// Snapshot of `[[llm.providers]]` entries for runtime `/provider` switching.
574 pub(crate) provider_pool: Vec<ProviderEntry>,
575 /// Resolved secrets and timeout settings needed to reconstruct providers at runtime.
576 pub(crate) provider_config_snapshot: Option<ProviderConfigSnapshot>,
577}
578
579/// Groups metrics and cost tracking state.
580pub(crate) struct MetricsState {
581 pub(crate) metrics_tx: Option<watch::Sender<MetricsSnapshot>>,
582 pub(crate) cost_tracker: Option<CostTracker>,
583 pub(crate) token_counter: Arc<TokenCounter>,
584 /// Set to `true` when Claude extended context (`enable_extended_context = true`) is active.
585 /// Read from config at build time, not derived from provider internals.
586 pub(crate) extended_context: bool,
587 /// Shared classifier latency ring buffer. Populated by `ContentSanitizer` (injection, PII)
588 /// and `LlmClassifier` (feedback). `None` when classifiers are not configured.
589 pub(crate) classifier_metrics: Option<Arc<zeph_llm::ClassifierMetrics>>,
590 /// Rolling window of per-turn latency samples (last 10 turns).
591 pub(crate) timing_window: std::collections::VecDeque<crate::metrics::TurnTimings>,
592 /// Accumulator for the current turn's timings. Flushed at turn end via `flush_turn_timings`.
593 pub(crate) pending_timings: crate::metrics::TurnTimings,
594 /// Optional histogram recorder for per-event Prometheus observations.
595 /// `None` when the `prometheus` feature is disabled or metrics are not enabled.
596 pub(crate) histogram_recorder: Option<std::sync::Arc<dyn crate::metrics::HistogramRecorder>>,
597}
598
599/// Groups task orchestration and subagent state.
600#[derive(Default)]
601pub(crate) struct OrchestrationState {
602 /// Lookahead tool hints snapshot taken after the most recent scheduler tick.
603 ///
604 /// Populated by `run_scheduler_loop` after each `scheduler.tick()` call via
605 /// `zeph_orchestration::lookahead_tools`. Cleared when the scheduler loop exits.
606 /// Read by `prepare_context` in `assembly.rs` to pass PAACE hints to `FidelityScorer`.
607 pub(crate) cached_lookahead: Vec<zeph_common::PlannedToolHint>,
608 /// On `OrchestrationState` (not `ProviderState`) because this provider is used exclusively
609 /// by `LlmPlanner` during orchestration, not shared across subsystems.
610 pub(crate) planner_provider: Option<AnyProvider>,
611 /// Provider for `PlanVerifier` LLM calls. `None` falls back to `orchestrator_provider`
612 /// then the primary provider.
613 pub(crate) verify_provider: Option<AnyProvider>,
614 /// Provider for scheduling-tier LLM calls (aggregation, predicate evaluation, verification
615 /// fallback). `None` falls back to the primary provider.
616 /// Set from `config.orchestration.orchestrator_provider` at startup.
617 pub(crate) orchestrator_provider: Option<AnyProvider>,
618 /// Provider for predicate gate evaluation. `None` falls back to `orchestrator_provider`
619 /// then `verify_provider` then primary.
620 pub(crate) predicate_provider: Option<AnyProvider>,
621 /// Graph waiting for `/plan confirm` before execution starts.
622 pub(crate) pending_graph: Option<zeph_orchestration::TaskGraph>,
623 /// Cancellation token for the currently executing plan. `None` when no plan is running.
624 /// Created fresh in `handle_plan_confirm()`, cancelled in `handle_plan_cancel()`.
625 ///
626 /// # Known limitation
627 ///
628 /// Token plumbing is ready; the delivery path requires the agent message loop to be
629 /// restructured so `/plan cancel` can be received while `run_scheduler_loop` holds
630 /// `&mut self`. See follow-up issue #1603 (SEC-M34-002).
631 pub(crate) plan_cancel_token: Option<CancellationToken>,
632 /// Manages spawned sub-agents.
633 pub(crate) subagent_manager: Option<zeph_subagent::SubAgentManager>,
634 pub(crate) subagent_config: crate::config::SubAgentConfig,
635 pub(crate) orchestration_config: crate::config::OrchestrationConfig,
636 /// Lazily initialized plan template cache. `None` until first use or when
637 /// memory (`SQLite`) is unavailable.
638 #[allow(dead_code)]
639 pub(crate) plan_cache: Option<zeph_orchestration::PlanCache>,
640 /// Goal embedding from the most recent `plan_with_cache()` call. Consumed by
641 /// `finalize_plan_execution()` to cache the completed plan template.
642 pub(crate) pending_goal_embedding: Option<Vec<f32>>,
643 /// `AdaptOrch` topology advisor — `None` when `[orchestration.adaptorch]` is disabled.
644 pub(crate) topology_advisor: Option<std::sync::Arc<zeph_orchestration::TopologyAdvisor>>,
645 /// Last `AdaptOrch` verdict; carried from `handle_plan_goal_as_string` to scheduler loop
646 /// for `record_outcome`.
647 #[allow(dead_code)] // read via .take() in plan.rs; clippy false positive
648 pub(crate) last_advisor_verdict: Option<zeph_orchestration::AdvisorVerdict>,
649 /// Task graph persistence handle. `None` when no `SemanticMemory` was
650 /// attached via `with_memory`, or when
651 /// `OrchestrationConfig::persistence_enabled` is `false`. When `Some`, the
652 /// scheduler loop snapshots the graph once per tick and `/plan resume <id>`
653 /// rehydrates from disk.
654 pub(crate) graph_persistence: Option<
655 zeph_orchestration::GraphPersistence<zeph_memory::store::graph_store::TaskGraphStore>,
656 >,
657 /// Named execution environment for the current orchestration task.
658 ///
659 /// Set by the scheduler when dispatching a `TaskNode` that has
660 /// `execution_environment: Some(name)`. Cleared between tasks. When `Some`,
661 /// `prepare_tool_dispatch` injects an [`ExecutionContext`] named `name` into
662 /// every `ToolCall` so that `ShellExecutor::resolve_context` uses the right env.
663 pub(crate) task_execution_env: Option<String>,
664}
665
666/// Groups instruction hot-reload state.
667#[derive(Default)]
668pub(crate) struct InstructionState {
669 pub(crate) blocks: Vec<InstructionBlock>,
670 pub(crate) reload_rx: Option<mpsc::Receiver<InstructionEvent>>,
671 pub(crate) reload_state: Option<InstructionReloadState>,
672}
673
674/// Groups experiment feature state (gated behind `experiments` feature flag).
675pub(crate) struct ExperimentState {
676 pub(crate) config: crate::config::ExperimentConfig,
677 /// Cancellation token for a running experiment session. `Some` means an experiment is active.
678 pub(crate) cancel: Option<tokio_util::sync::CancellationToken>,
679 /// `JoinHandle` for the background experiment task. Stored so shutdown can abort it if the
680 /// `CancellationToken` signal is not observed in time (e.g. the task is blocked on I/O).
681 pub(crate) handle: Option<tokio::task::JoinHandle<()>>,
682 /// Pre-built config snapshot used as the experiment baseline (agent path).
683 pub(crate) baseline: zeph_experiments::ConfigSnapshot,
684 /// Dedicated judge provider for evaluation. When `Some`, the evaluator uses this provider
685 /// instead of the agent's primary provider, eliminating self-judge bias.
686 pub(crate) eval_provider: Option<AnyProvider>,
687 /// Receives completion/error messages from the background experiment engine task.
688 /// Always present so the select! branch compiles unconditionally.
689 pub(crate) notify_rx: Option<tokio::sync::mpsc::Receiver<String>>,
690 /// Sender end paired with `experiment_notify_rx`. Cloned into the background task.
691 pub(crate) notify_tx: tokio::sync::mpsc::Sender<String>,
692}
693
694/// Groups context-compression feature state (gated behind `context-compression` feature flag).
695#[derive(Default)]
696pub(crate) struct CompressionState {
697 /// Cached task goal for TaskAware/MIG pruning. Set by `maybe_compact()`,
698 /// invalidated when the last user message hash changes.
699 pub(crate) current_task_goal: Option<String>,
700 /// Hash of the last user message when `current_task_goal` was populated.
701 pub(crate) task_goal_user_msg_hash: Option<u64>,
702 /// Pending background task for goal extraction. Spawned when the user message hash changes;
703 /// result applied at the start of the next Soft compaction (#1909).
704 pub(crate) pending_task_goal:
705 Option<zeph_common::task_supervisor::BlockingHandle<Option<String>>>,
706 /// Pending `SideQuest` eviction result from the background LLM call spawned last turn.
707 /// Applied at the START of the next turn before compaction (PERF-1 fix).
708 pub(crate) pending_sidequest_result:
709 Option<zeph_common::task_supervisor::BlockingHandle<Option<Vec<usize>>>>,
710 /// In-memory subgoal registry for `Subgoal`/`SubgoalMig` pruning strategies (#2022).
711 pub(crate) subgoal_registry: zeph_agent_context::SubgoalRegistry,
712 /// Pending background subgoal extraction task.
713 pub(crate) pending_subgoal: Option<
714 zeph_common::task_supervisor::BlockingHandle<
715 Option<zeph_agent_context::SubgoalExtractionResult>,
716 >,
717 >,
718 /// Hash of the last user message when subgoal extraction was scheduled.
719 pub(crate) subgoal_user_msg_hash: Option<u64>,
720 /// Shared typed-page state (#3630). `None` when `typed_pages.enabled = false`.
721 pub(crate) typed_pages_state: Option<Arc<zeph_context::typed_page::TypedPagesState>>,
722}
723
724/// Groups runtime tool filtering, dependency tracking, and iteration bookkeeping.
725#[derive(Default)]
726pub(crate) struct ToolState {
727 /// Dynamic tool schema filter: pre-computed tool embeddings for per-turn filtering (#2020).
728 pub(crate) tool_schema_filter: Option<zeph_tools::ToolSchemaFilter>,
729 /// Cached filtered tool IDs for the current user turn.
730 pub(crate) cached_filtered_tool_ids: Option<HashSet<String>>,
731 /// Tool dependency graph for sequential tool availability (#2024).
732 pub(crate) dependency_graph: Option<zeph_tools::ToolDependencyGraph>,
733 /// Always-on tool IDs, mirrored from the tool schema filter for dependency gate bypass.
734 pub(crate) dependency_always_on: HashSet<String>,
735 /// Tool IDs that completed successfully in the current session.
736 pub(crate) completed_tool_ids: HashSet<String>,
737 /// Current tool loop iteration index within the active user turn.
738 pub(crate) current_tool_iteration: usize,
739 /// PASTE pattern store for tool invocation history and prediction (#3642).
740 ///
741 /// `Some` only when `config.tools.speculative.mode` is `Pattern` or `Both`.
742 pub(crate) pattern_store: Option<Arc<crate::agent::speculative::paste::PatternStore>>,
743 /// Per-turn mapping from tool name to `(skill_name, skill_hash)`, populated at skill
744 /// activation and used by `observe()` to attribute tool completions to their owning skill.
745 pub(crate) tool_to_skill: HashMap<String, (String, String)>,
746 /// Last tool executed per skill in the current turn, keyed by skill name.
747 /// Used as `prev_tool` for PASTE pattern transition recording.
748 pub(crate) last_tool_per_skill: HashMap<String, String>,
749}
750
751/// Groups per-session I/O and policy state.
752pub(crate) struct SessionState {
753 pub(crate) env_context: EnvironmentContext,
754 /// Timestamp of the last assistant message appended to context.
755 /// Used by time-based microcompact to compute session idle gap (#2699).
756 /// `None` before the first assistant response.
757 pub(crate) last_assistant_at: Option<Instant>,
758 pub(crate) response_cache: Option<std::sync::Arc<zeph_memory::ResponseCache>>,
759 /// Parent tool call ID when this agent runs as a subagent inside another agent session.
760 /// Propagated into every `LoopbackEvent::ToolStart` / `ToolOutput` so the IDE can build
761 /// a subagent hierarchy.
762 pub(crate) parent_tool_use_id: Option<String>,
763 /// Current-turn intent snapshot for VIGIL. `None` between turns.
764 ///
765 /// Set at the top of `process_user_message` (before any tool call) to the first 1024 chars
766 /// of the user message. Cleared at `end_turn`, on `/clear`, and on any turn-abort path.
767 /// Never shared across turns or propagated into subagents.
768 pub(crate) current_turn_intent: Option<String>,
769 /// Optional status channel for sending spinner/status messages to TUI or stderr.
770 pub(crate) status_tx: Option<tokio::sync::mpsc::UnboundedSender<String>>,
771 /// LSP context injection hooks. Fires after native tool execution, injects
772 /// diagnostics/hover notes as `Role::System` messages before the next LLM call.
773 pub(crate) lsp_hooks: Option<crate::lsp_hooks::LspHookRunner>,
774 /// Snapshot of the policy config for `/policy` command inspection.
775 pub(crate) policy_config: Option<zeph_tools::PolicyConfig>,
776 /// `CwdChanged` hook definitions extracted from `[hooks]` config.
777 pub(crate) hooks_config: HooksConfigSnapshot,
778 /// Whether the current turn originates from a Telegram guest query (`guest_message` update).
779 ///
780 /// When `true`, the agent prompt includes a brief guest-context annotation, and the response
781 /// is delivered via `answerGuestQuery` instead of `sendMessage`.
782 pub(crate) is_guest_context: bool,
783}
784
785/// Extracted hook lists from `[hooks]` config, stored in `SessionState`.
786#[derive(Default)]
787pub(crate) struct HooksConfigSnapshot {
788 /// Hooks fired when working directory changes.
789 pub(crate) cwd_changed: Vec<zeph_config::HookDef>,
790 /// Hooks fired when a watched file changes.
791 pub(crate) file_changed_hooks: Vec<zeph_config::HookDef>,
792 /// Hooks fired when a tool execution is blocked by a `RuntimeLayer::before_tool` check.
793 pub(crate) permission_denied: Vec<zeph_config::HookDef>,
794 /// Hooks fired after each agent turn completes (#3327).
795 ///
796 /// Populated from `HooksConfig::turn_complete` at session construction. Shares the
797 /// `Notifier::should_fire` gate when a notifier is configured; fires on every completion
798 /// when no notifier is present.
799 pub(crate) turn_complete: Vec<zeph_config::HookDef>,
800 /// Hooks fired before each tool execution, matched by tool name pattern.
801 pub(crate) pre_tool_use: Vec<zeph_config::HookMatcher>,
802 /// Hooks fired after each tool execution completes, matched by tool name pattern.
803 pub(crate) post_tool_use: Vec<zeph_config::HookMatcher>,
804}
805
806// Groups message buffering and image staging state.
807pub(crate) struct MessageState {
808 pub(crate) messages: Vec<Message>,
809 // QueuedMessage is pub(super) in message_queue — same visibility as this struct; lint suppressed.
810 #[allow(private_interfaces)]
811 pub(crate) message_queue: VecDeque<QueuedMessage>,
812 /// Image parts staged by `/image` commands, attached to the next user message.
813 pub(crate) pending_image_parts: Vec<zeph_llm::provider::MessagePart>,
814 /// DB row ID of the most recently persisted message. Set by `persist_message`;
815 /// consumed by `push_message` call sites to populate `metadata.db_id` on in-memory messages.
816 pub(crate) last_persisted_message_id: Option<i64>,
817 /// DB message IDs pending hide after deferred tool pair summarization.
818 pub(crate) deferred_db_hide_ids: Vec<i64>,
819 /// Summary texts pending insertion after deferred tool pair summarization.
820 pub(crate) deferred_db_summaries: Vec<String>,
821}
822
823impl McpState {
824 /// Write the **full** `self.tools` set to the shared executor `RwLock`.
825 ///
826 /// This is the first of two writers to `shared_tools`. Within a turn this method must run
827 /// **before** `apply_pruned_tools`, which writes the pruned subset. The normal call order
828 /// guarantees this: tool-list change events call this method, and pruning runs later inside
829 /// `rebuild_system_prompt`. See also: `apply_pruned_tools`.
830 pub(crate) fn sync_executor_tools(&self) {
831 if let Some(ref shared) = self.shared_tools {
832 shared.write().clone_from(&self.tools);
833 }
834 }
835
836 /// Write the **pruned** tool subset to the shared executor `RwLock`.
837 ///
838 /// Must only be called **after** `sync_executor_tools` has established the full tool set for
839 /// the current turn. `self.tools` (the full set) is intentionally **not** modified.
840 ///
841 /// This method must **NOT** call `sync_executor_tools` internally — doing so would overwrite
842 /// the pruned subset with the full set. See also: `sync_executor_tools`.
843 pub(crate) fn apply_pruned_tools(&self, pruned: Vec<zeph_mcp::McpTool>) {
844 debug_assert!(
845 pruned.iter().all(|p| self
846 .tools
847 .iter()
848 .any(|t| t.server_id == p.server_id && t.name == p.name)),
849 "pruned set must be a subset of self.tools"
850 );
851 if let Some(ref shared) = self.shared_tools {
852 *shared.write() = pruned;
853 }
854 }
855
856 #[cfg(test)]
857 pub(crate) fn tool_count(&self) -> usize {
858 self.tools.len()
859 }
860}
861
862impl IndexState {
863 #[tracing::instrument(name = "core.index.fetch_code_rag", skip(self), fields(%query, token_budget))]
864 pub(crate) async fn fetch_code_rag(
865 &self,
866 query: &str,
867 token_budget: usize,
868 ) -> Result<Option<String>, crate::agent::error::AgentError> {
869 let Some(retriever) = &self.retriever else {
870 return Ok(None);
871 };
872 if token_budget == 0 {
873 return Ok(None);
874 }
875
876 let result = retriever
877 .retrieve(query, token_budget)
878 .await
879 .map_err(|e| crate::agent::error::AgentError::ContextError(format!("{e:#}")))?;
880 let context_text = zeph_index::retriever::format_as_context(&result);
881
882 if context_text.is_empty() {
883 Ok(None)
884 } else {
885 tracing::debug!(
886 strategy = ?result.strategy,
887 chunks = result.chunks.len(),
888 tokens = result.total_tokens,
889 "code context fetched"
890 );
891 Ok(Some(context_text))
892 }
893 }
894}
895
896impl DebugState {
897 pub(crate) fn start_iteration_span(&mut self, iteration_index: usize, text: &str) {
898 if let Some(ref mut tc) = self.trace_collector {
899 tc.begin_iteration(iteration_index, text);
900 self.current_iteration_span_id = tc.current_iteration_span_id(iteration_index);
901 }
902 }
903
904 pub(crate) fn end_iteration_span(
905 &mut self,
906 iteration_index: usize,
907 status: crate::debug_dump::trace::SpanStatus,
908 ) {
909 if let Some(ref mut tc) = self.trace_collector {
910 tc.end_iteration(iteration_index, status);
911 }
912 self.current_iteration_span_id = None;
913 }
914
915 pub(crate) fn switch_format(&mut self, new_format: crate::debug_dump::DumpFormat) {
916 let was_trace = self.dump_format == crate::debug_dump::DumpFormat::Trace;
917 let now_trace = new_format == crate::debug_dump::DumpFormat::Trace;
918
919 if now_trace
920 && !was_trace
921 && let Some(ref dump_dir) = self.dump_dir.clone()
922 {
923 let service_name = self.trace_service_name.clone();
924 let redact = self.trace_redact;
925 let trace_metadata = self.trace_metadata.clone();
926 match crate::debug_dump::trace::TracingCollector::new(
927 dump_dir.as_path(),
928 &service_name,
929 trace_metadata,
930 redact,
931 None,
932 ) {
933 Ok(collector) => {
934 self.trace_collector = Some(collector);
935 }
936 Err(e) => {
937 tracing::warn!(error = %e, "failed to create TracingCollector on format switch");
938 }
939 }
940 }
941 if was_trace
942 && !now_trace
943 && let Some(mut tc) = self.trace_collector.take()
944 {
945 tc.finish();
946 }
947
948 self.dump_format = new_format;
949 }
950
951 pub(crate) fn write_chat_debug_dump(
952 &self,
953 dump_id: Option<u32>,
954 result: &zeph_llm::provider::ChatResponse,
955 pii_filter: &zeph_sanitizer::pii::PiiFilter,
956 ) {
957 let Some((d, id)) = self.debug_dumper.as_ref().zip(dump_id) else {
958 return;
959 };
960 let raw = match result {
961 zeph_llm::provider::ChatResponse::Text(t) => t.clone(),
962 zeph_llm::provider::ChatResponse::ToolUse {
963 text, tool_calls, ..
964 } => {
965 let calls = serde_json::to_string_pretty(tool_calls).unwrap_or_default();
966 format!(
967 "{}\n\n---TOOL_CALLS---\n{calls}",
968 text.as_deref().unwrap_or("")
969 )
970 }
971 _ => String::new(),
972 };
973 let text = if pii_filter.is_enabled() {
974 pii_filter.scrub(&raw).into_owned()
975 } else {
976 raw
977 };
978 d.dump_response(id, &text);
979 }
980}
981
982impl Default for McpState {
983 fn default() -> Self {
984 Self {
985 tools: Vec::new(),
986 registry: None,
987 manager: None,
988 allowed_commands: Vec::new(),
989 max_dynamic: 10,
990 elicitation_rx: None,
991 shared_tools: None,
992 tool_rx: None,
993 server_outcomes: Vec::new(),
994 pruning_cache: zeph_mcp::PruningCache::new(),
995 pruning_provider: None,
996 pruning_enabled: false,
997 pruning_params: zeph_mcp::PruningParams::default(),
998 semantic_index: None,
999 discovery_strategy: zeph_mcp::ToolDiscoveryStrategy::default(),
1000 discovery_params: zeph_mcp::DiscoveryParams::default(),
1001 discovery_provider: None,
1002 elicitation_warn_sensitive_fields: true,
1003 pending_semantic_rebuild: false,
1004 }
1005 }
1006}
1007
1008impl Default for IndexState {
1009 fn default() -> Self {
1010 Self {
1011 retriever: None,
1012 repo_map_tokens: 0,
1013 cached_repo_map: None,
1014 repo_map_ttl: std::time::Duration::from_mins(5),
1015 }
1016 }
1017}
1018
1019impl Default for DebugState {
1020 fn default() -> Self {
1021 Self {
1022 debug_dumper: None,
1023 dump_format: crate::debug_dump::DumpFormat::default(),
1024 trace_collector: None,
1025 iteration_counter: 0,
1026 anomaly_detector: None,
1027 reasoning_model_warning: true,
1028 logging_config: crate::config::LoggingConfig::default(),
1029 dump_dir: None,
1030 trace_service_name: String::new(),
1031 trace_redact: true,
1032 trace_metadata: std::collections::HashMap::new(),
1033 current_iteration_span_id: None,
1034 }
1035 }
1036}
1037
1038impl Default for FeedbackState {
1039 fn default() -> Self {
1040 Self {
1041 detector: zeph_agent_feedback::FeedbackDetector::new(0.6),
1042 judge: None,
1043 llm_classifier: None,
1044 }
1045 }
1046}
1047
1048/// Goal lifecycle feature configuration stored in `RuntimeConfig`.
1049#[derive(Debug, Clone)]
1050pub(crate) struct GoalRuntimeConfig {
1051 /// Whether goal tracking is enabled.
1052 pub(crate) enabled: bool,
1053 /// Maximum allowed length (in Unicode chars) of goal text at creation.
1054 pub(crate) max_text_chars: usize,
1055 /// Default token budget for new goals (`None` = unlimited).
1056 pub(crate) default_token_budget: Option<u64>,
1057 /// Whether to inject the active goal block into the volatile system prompt region.
1058 pub(crate) inject_into_system_prompt: bool,
1059 /// Whether autonomous multi-turn execution is permitted.
1060 pub(crate) autonomous_enabled: bool,
1061 /// Maximum turns per autonomous session.
1062 pub(crate) autonomous_max_turns: u32,
1063 /// Provider name for the supervisor LLM call (`None` = use main provider).
1064 pub(crate) supervisor_provider: Option<zeph_config::ProviderName>,
1065 /// Turns between supervisor verification checks.
1066 pub(crate) verify_interval: u32,
1067 /// Timeout for a single supervisor call in seconds.
1068 pub(crate) supervisor_timeout_secs: u64,
1069 /// Consecutive stuck-detection threshold before aborting.
1070 pub(crate) max_stuck_count: u32,
1071 /// Wall-clock timeout in seconds for a single autonomous LLM turn.
1072 pub(crate) autonomous_turn_timeout_secs: u64,
1073 /// Maximum consecutive supervisor verification failures before pausing the session.
1074 pub(crate) max_supervisor_fail_count: u32,
1075}
1076
1077impl Default for GoalRuntimeConfig {
1078 fn default() -> Self {
1079 Self {
1080 enabled: false,
1081 max_text_chars: 2000,
1082 default_token_budget: None,
1083 inject_into_system_prompt: true,
1084 autonomous_enabled: false,
1085 autonomous_max_turns: 20,
1086 supervisor_provider: None,
1087 verify_interval: 5,
1088 supervisor_timeout_secs: 30,
1089 max_stuck_count: 3,
1090 autonomous_turn_timeout_secs: 300,
1091 max_supervisor_fail_count: 3,
1092 }
1093 }
1094}
1095
1096impl Default for RuntimeConfig {
1097 fn default() -> Self {
1098 Self {
1099 security: SecurityConfig::default(),
1100 timeouts: TimeoutConfig::default(),
1101 model_name: String::new(),
1102 active_provider_name: String::new(),
1103 permission_policy: zeph_tools::PermissionPolicy::default(),
1104 redact_credentials: true,
1105 rate_limiter: super::rate_limiter::ToolRateLimiter::new(
1106 super::rate_limiter::RateLimitConfig::default(),
1107 ),
1108 semantic_cache_enabled: false,
1109 semantic_cache_threshold: 0.95,
1110 semantic_cache_max_candidates: 10,
1111 dependency_config: zeph_tools::DependencyConfig::default(),
1112 adversarial_policy_info: None,
1113 spawn_depth: 0,
1114 budget_hint_enabled: true,
1115 channel_skills: zeph_config::ChannelSkillsConfig::default(),
1116 channel_tool_allowlist: None,
1117 loop_min_interval_secs: 5,
1118 layers: Vec::new(),
1119 supervisor_config: crate::config::TaskSupervisorConfig::default(),
1120 recap_config: zeph_config::RecapConfig::default(),
1121 acp_config: zeph_config::AcpConfig::default(),
1122 auto_recap_shown: false,
1123 msg_count_at_resume: 0,
1124 acp_subagent_spawn_fn: None,
1125 channel_type: String::new(),
1126 provider_persistence_enabled: true,
1127 persist_provider_overrides_enabled: true,
1128 restoring_provider: false,
1129 goals: GoalRuntimeConfig::default(),
1130 }
1131 }
1132}
1133
1134impl SessionState {
1135 pub(crate) fn new() -> Self {
1136 Self {
1137 env_context: EnvironmentContext::gather(""),
1138 last_assistant_at: None,
1139 response_cache: None,
1140 parent_tool_use_id: None,
1141 current_turn_intent: None,
1142 status_tx: None,
1143 lsp_hooks: None,
1144 policy_config: None,
1145 hooks_config: HooksConfigSnapshot::default(),
1146 is_guest_context: false,
1147 }
1148 }
1149}
1150
1151impl SkillState {
1152 pub(crate) fn new(
1153 registry: Arc<RwLock<SkillRegistry>>,
1154 matcher: Option<SkillMatcherBackend>,
1155 max_active_skills: usize,
1156 last_skills_prompt: String,
1157 ) -> Self {
1158 Self {
1159 registry,
1160 trust_snapshot: Arc::new(RwLock::new(HashMap::new())),
1161 skill_paths: Vec::new(),
1162 managed_dir: None,
1163 trust_config: crate::config::TrustConfig::default(),
1164 matcher,
1165 max_active_skills,
1166 disambiguation_threshold: 0.20,
1167 min_injection_score: 0.20,
1168 embedding_model: String::new(),
1169 skill_reload_rx: None,
1170 plugin_dirs_supplier: None,
1171 active_skill_names: Vec::new(),
1172 last_skills_prompt,
1173 prompt_mode: crate::config::SkillPromptMode::Auto,
1174 available_custom_secrets: HashMap::new(),
1175 cosine_weight: 0.7,
1176 hybrid_search: true,
1177 bm25_alpha: 0.7,
1178 bm25_index: None,
1179 two_stage_matching: false,
1180 confusability_threshold: 0.0,
1181 rl_head: None,
1182 rl_weight: 0.3,
1183 rl_warmup_updates: 50,
1184 generation_output_dir: None,
1185 query_rewrite_provider_name: String::new(),
1186 generation_provider_name: String::new(),
1187 disambiguate_provider_name: String::new(),
1188 generation_timeout_ms: 60_000,
1189 skill_evaluator: None,
1190 eval_weights: zeph_skills::evaluator::EvaluationWeights::default(),
1191 eval_threshold: 0.60,
1192 group_structured: false,
1193 support_similarity_threshold: 0.50,
1194 semantic_scan: false,
1195 semantic_scan_provider: String::new(),
1196 }
1197 }
1198}
1199
1200impl LifecycleState {
1201 pub(crate) fn new() -> Self {
1202 let (_tx, rx) = watch::channel(false);
1203 Self {
1204 shutdown: rx,
1205 start_time: Instant::now(),
1206 cancel_signal: Arc::new(tokio::sync::Notify::new()),
1207 cancel_token: tokio_util::sync::CancellationToken::new(),
1208 cancel_bridge_handle: None,
1209 config_path: None,
1210 config_reload_rx: None,
1211 plugins_dir: PathBuf::new(),
1212 startup_shell_overlay: ShellOverlaySnapshot::default(),
1213 shell_policy_handle: None,
1214 warmup_ready: None,
1215 update_notify_rx: None,
1216 custom_task_rx: None,
1217 user_loop: None,
1218 last_known_cwd: std::env::current_dir().unwrap_or_default(),
1219 file_changed_rx: None,
1220 file_watcher: None,
1221 supervisor: super::agent_supervisor::BackgroundSupervisor::new(
1222 &crate::config::TaskSupervisorConfig::default(),
1223 None,
1224 ),
1225 notifier: None,
1226 turn_llm_requests: 0,
1227 last_no_providers_at: None,
1228 pending_background_completions: VecDeque::new(),
1229 background_completion_rx: None,
1230 shell_executor_handle: None,
1231 task_supervisor: Arc::new(zeph_common::TaskSupervisor::new(
1232 tokio_util::sync::CancellationToken::new(),
1233 )),
1234 }
1235 }
1236}
1237
1238impl ProviderState {
1239 pub(crate) fn new(initial_prompt_tokens: u64) -> Self {
1240 Self {
1241 summary_provider: None,
1242 provider_override: None,
1243 judge_provider: None,
1244 probe_provider: None,
1245 compress_provider: None,
1246 cached_prompt_tokens: initial_prompt_tokens,
1247 server_compaction_active: false,
1248 stt: None,
1249 provider_pool: Vec::new(),
1250 provider_config_snapshot: None,
1251 }
1252 }
1253}
1254
1255impl MetricsState {
1256 pub(crate) fn new(token_counter: Arc<zeph_memory::TokenCounter>) -> Self {
1257 Self {
1258 metrics_tx: None,
1259 cost_tracker: None,
1260 token_counter,
1261 extended_context: false,
1262 classifier_metrics: None,
1263 timing_window: std::collections::VecDeque::new(),
1264 pending_timings: crate::metrics::TurnTimings::default(),
1265 histogram_recorder: None,
1266 }
1267 }
1268}
1269
1270impl ExperimentState {
1271 pub(crate) fn new() -> Self {
1272 let (notify_tx, notify_rx) = tokio::sync::mpsc::channel::<String>(4);
1273 Self {
1274 config: crate::config::ExperimentConfig::default(),
1275 cancel: None,
1276 handle: None,
1277 baseline: zeph_experiments::ConfigSnapshot::default(),
1278 eval_provider: None,
1279 notify_rx: Some(notify_rx),
1280 notify_tx,
1281 }
1282 }
1283}
1284
1285pub(super) mod security;
1286pub(super) mod skill;
1287
1288#[cfg(test)]
1289mod tests;