Skip to main content

zeph_core/agent/state/
mod.rs

1// SPDX-FileCopyrightText: 2026 Andrei G <bug-ops>
2// SPDX-License-Identifier: MIT OR Apache-2.0
3
4//! Sub-struct definitions for the `Agent` struct.
5//!
6//! Each struct groups a related cluster of `Agent` fields.
7//! All types are `pub(super)` — visible only within the `agent` module.
8
9use std::collections::{HashMap, HashSet, VecDeque};
10use std::path::PathBuf;
11use std::sync::Arc;
12
13use parking_lot::RwLock;
14use std::time::Instant;
15
16use tokio::sync::{Notify, mpsc, watch};
17use tokio::task::JoinHandle;
18use tokio_util::sync::CancellationToken;
19use zeph_llm::any::AnyProvider;
20use zeph_llm::provider::Message;
21use zeph_llm::stt::SpeechToText;
22
23use crate::config::{ProviderEntry, SecurityConfig, SkillPromptMode, TimeoutConfig};
24use crate::config_watcher::ConfigEvent;
25use crate::context::EnvironmentContext;
26use crate::cost::CostTracker;
27use crate::file_watcher::FileChangedEvent;
28use crate::instructions::{InstructionBlock, InstructionEvent, InstructionReloadState};
29use crate::metrics::MetricsSnapshot;
30use crate::vault::Secret;
31use zeph_config;
32use zeph_memory::TokenCounter;
33use zeph_memory::semantic::SemanticMemory;
34use zeph_sanitizer::ContentSanitizer;
35use zeph_sanitizer::quarantine::QuarantinedSummarizer;
36use zeph_skills::matcher::SkillMatcherBackend;
37use zeph_skills::registry::SkillRegistry;
38use zeph_skills::watcher::SkillEvent;
39
40use super::message_queue::QueuedMessage;
41
42pub(crate) struct MemoryState {
43    pub(crate) memory: Option<Arc<SemanticMemory>>,
44    pub(crate) conversation_id: Option<zeph_memory::ConversationId>,
45    pub(crate) history_limit: u32,
46    pub(crate) recall_limit: usize,
47    pub(crate) summarization_threshold: usize,
48    pub(crate) cross_session_score_threshold: f32,
49    pub(crate) autosave_assistant: bool,
50    pub(crate) autosave_min_length: usize,
51    pub(crate) tool_call_cutoff: usize,
52    pub(crate) unsummarized_count: usize,
53    pub(crate) document_config: crate::config::DocumentConfig,
54    pub(crate) graph_config: crate::config::GraphConfig,
55    pub(crate) compression_guidelines_config: zeph_memory::CompressionGuidelinesConfig,
56    pub(crate) shutdown_summary: bool,
57    pub(crate) shutdown_summary_min_messages: usize,
58    pub(crate) shutdown_summary_max_messages: usize,
59    pub(crate) shutdown_summary_timeout_secs: u64,
60    /// When `true`, hard compaction uses `AnchoredSummary` (structured JSON) instead of
61    /// free-form prose. Falls back to prose on any LLM or validation failure.
62    pub(crate) structured_summaries: bool,
63    /// Top-1 semantic recall score from the most recent `prepare_context` cycle.
64    /// Used by MAR (Memory-Augmented Routing) to bias the bandit toward cheap providers
65    /// when memory confidence is high. Reset to `None` at the start of each turn.
66    pub(crate) last_recall_confidence: Option<f32>,
67    /// Session digest configuration (#2289).
68    pub(crate) digest_config: crate::config::DigestConfig,
69    /// Cached session digest text and its token count, loaded at session start.
70    pub(crate) cached_session_digest: Option<(String, usize)>,
71    /// Context assembly strategy (#2288).
72    pub(crate) context_strategy: crate::config::ContextStrategy,
73    /// Turn threshold for `Adaptive` strategy crossover (#2288).
74    pub(crate) crossover_turn_threshold: u32,
75    /// D-MEM RPE router. `Some` when `graph_config.rpe.enabled = true`.
76    /// Protected by `std::sync::Mutex` for non-async access from `maybe_spawn_graph_extraction`.
77    pub(crate) rpe_router: Option<std::sync::Mutex<zeph_memory::RpeRouter>>,
78    /// Goal text for the current user turn, derived from raw user input (#2483).
79    /// Passed to A-MAC admission control to enable goal-conditioned write gating.
80    /// Reset at the start of each user turn. `None` only before the first user message.
81    pub(crate) goal_text: Option<String>,
82    /// Persona memory configuration (#2461).
83    pub(crate) persona_config: zeph_config::PersonaConfig,
84    /// Trajectory-informed memory configuration (#2498).
85    pub(crate) trajectory_config: zeph_config::TrajectoryConfig,
86    /// Category-aware memory configuration (#2428).
87    pub(crate) category_config: zeph_config::CategoryConfig,
88    /// `TiMem` temporal-hierarchical memory tree configuration (#2262).
89    pub(crate) tree_config: zeph_config::TreeConfig,
90    /// Time-based microcompact configuration (#2699).
91    pub(crate) microcompact_config: zeph_config::MicrocompactConfig,
92    /// autoDream configuration (#2697).
93    pub(crate) autodream_config: zeph_config::AutoDreamConfig,
94    /// `MagicDocs` configuration (#2702).
95    pub(crate) magic_docs_config: zeph_config::MagicDocsConfig,
96    /// Background tree consolidation loop handle — kept alive for the agent's lifetime (#2262).
97    /// `None` when tree consolidation is disabled or memory is not initialized.
98    pub(crate) tree_consolidation_handle: Option<tokio::task::JoinHandle<()>>,
99    /// autoDream session state (#2697). Tracks session count and last consolidation time.
100    pub(crate) autodream: super::autodream::AutoDreamState,
101    /// `MagicDocs` session state (#2702). Tracks registered doc paths and last update turn.
102    pub(crate) magic_docs: super::magic_docs::MagicDocsState,
103}
104
105pub(crate) struct SkillState {
106    pub(crate) registry: Arc<RwLock<SkillRegistry>>,
107    pub(crate) skill_paths: Vec<PathBuf>,
108    pub(crate) managed_dir: Option<PathBuf>,
109    pub(crate) trust_config: crate::config::TrustConfig,
110    pub(crate) matcher: Option<SkillMatcherBackend>,
111    pub(crate) max_active_skills: usize,
112    pub(crate) disambiguation_threshold: f32,
113    pub(crate) min_injection_score: f32,
114    pub(crate) embedding_model: String,
115    pub(crate) skill_reload_rx: Option<mpsc::Receiver<SkillEvent>>,
116    pub(crate) active_skill_names: Vec<String>,
117    pub(crate) last_skills_prompt: String,
118    pub(crate) prompt_mode: SkillPromptMode,
119    /// Custom secrets available at runtime: key=hyphenated name, value=secret.
120    pub(crate) available_custom_secrets: HashMap<String, Secret>,
121    pub(crate) cosine_weight: f32,
122    pub(crate) hybrid_search: bool,
123    pub(crate) bm25_index: Option<zeph_skills::bm25::Bm25Index>,
124    pub(crate) two_stage_matching: bool,
125    /// Threshold for confusability warnings (0.0 = disabled).
126    pub(crate) confusability_threshold: f32,
127    /// `SkillOrchestra` RL routing head. `Some` when `rl_routing_enabled = true` and
128    /// weights are loaded or initialized. `None` when RL routing is disabled.
129    pub(crate) rl_head: Option<zeph_skills::rl_head::RoutingHead>,
130    /// Blend weight for RL routing: `final = (1-rl_weight)*cosine + rl_weight*rl_score`.
131    pub(crate) rl_weight: f32,
132    /// Skip RL blending for the first N updates (cold-start warmup).
133    pub(crate) rl_warmup_updates: u32,
134    /// Directory where `/skill create` writes generated skills.
135    /// Defaults to `managed_dir` if `None`.
136    pub(crate) generation_output_dir: Option<std::path::PathBuf>,
137    /// Provider name for `/skill create` generation. Empty = primary.
138    pub(crate) generation_provider_name: String,
139}
140
141pub(crate) struct McpState {
142    pub(crate) tools: Vec<zeph_mcp::McpTool>,
143    pub(crate) registry: Option<zeph_mcp::McpToolRegistry>,
144    pub(crate) manager: Option<std::sync::Arc<zeph_mcp::McpManager>>,
145    pub(crate) allowed_commands: Vec<String>,
146    pub(crate) max_dynamic: usize,
147    /// Receives elicitation requests from MCP server handlers during tool execution.
148    /// When `Some`, the agent loop must process these concurrently with tool result awaiting
149    /// to avoid deadlock (tool result waits for elicitation, elicitation waits for agent loop).
150    pub(crate) elicitation_rx: Option<tokio::sync::mpsc::Receiver<zeph_mcp::ElicitationEvent>>,
151    /// Shared with `McpToolExecutor` so native `tool_use` sees the current tool list.
152    ///
153    /// Two methods write to this `RwLock` — ordering matters:
154    /// - `sync_executor_tools()`: writes the **full** `self.tools` set.
155    /// - `apply_pruned_tools()`: writes the **pruned** subset (used after pruning).
156    ///
157    /// Within a turn, `sync_executor_tools` must always run **before**
158    /// `apply_pruned_tools`.  The normal call order guarantees this: tool-list
159    /// change events call `sync_executor_tools` (inside `check_tool_refresh`,
160    /// `handle_mcp_add`, `handle_mcp_remove`), and pruning runs later inside
161    /// `rebuild_system_prompt`.  See also: `apply_pruned_tools`.
162    pub(crate) shared_tools: Option<Arc<RwLock<Vec<zeph_mcp::McpTool>>>>,
163    /// Receives full flattened tool list after any `tools/list_changed` notification.
164    pub(crate) tool_rx: Option<tokio::sync::watch::Receiver<Vec<zeph_mcp::McpTool>>>,
165    /// Per-server connection outcomes from the initial `connect_all()` call.
166    pub(crate) server_outcomes: Vec<zeph_mcp::ServerConnectOutcome>,
167    /// Per-message cache for MCP tool pruning results (#2298).
168    ///
169    /// Reset at the start of each user turn and whenever the MCP tool list
170    /// changes (via `tools/list_changed`, `/mcp add`, or `/mcp remove`).
171    pub(crate) pruning_cache: zeph_mcp::PruningCache,
172    /// Dedicated provider for MCP tool pruning LLM calls.
173    ///
174    /// `None` means fall back to the agent's primary provider.
175    /// Resolved from `[[llm.providers]]` at build time using `pruning_provider`
176    /// from `ToolPruningConfig`.
177    pub(crate) pruning_provider: Option<zeph_llm::any::AnyProvider>,
178    /// Whether MCP tool pruning is enabled.  Mirrors `ToolPruningConfig::enabled`.
179    pub(crate) pruning_enabled: bool,
180    /// Pruning parameters snapshot.  Derived from `ToolPruningConfig` at build time.
181    pub(crate) pruning_params: zeph_mcp::PruningParams,
182    /// Pre-computed semantic tool index for embedding-based discovery (#2321).
183    ///
184    /// Built at connect time via `rebuild_semantic_index()`, rebuilt on tool list change.
185    /// `None` when strategy is not `Embedding` or when build failed (fallback to all tools).
186    pub(crate) semantic_index: Option<zeph_mcp::SemanticToolIndex>,
187    /// Active discovery strategy and parameters.  Derived from `ToolDiscoveryConfig`.
188    pub(crate) discovery_strategy: zeph_mcp::ToolDiscoveryStrategy,
189    /// Discovery parameters snapshot.  Derived from `ToolDiscoveryConfig` at build time.
190    pub(crate) discovery_params: zeph_mcp::DiscoveryParams,
191    /// Dedicated embedding provider for tool discovery.  `None` = fall back to the
192    /// agent's primary embedding provider.
193    pub(crate) discovery_provider: Option<zeph_llm::any::AnyProvider>,
194    /// When `true`, show a security warning before prompting for fields whose names
195    /// match sensitive patterns (password, token, secret, key, credential, etc.).
196    pub(crate) elicitation_warn_sensitive_fields: bool,
197}
198
199pub(crate) struct IndexState {
200    pub(crate) retriever: Option<std::sync::Arc<zeph_index::retriever::CodeRetriever>>,
201    pub(crate) repo_map_tokens: usize,
202    pub(crate) cached_repo_map: Option<(String, std::time::Instant)>,
203    pub(crate) repo_map_ttl: std::time::Duration,
204}
205
206/// Snapshot of adversarial policy gate configuration for status display.
207#[derive(Debug, Clone)]
208pub struct AdversarialPolicyInfo {
209    pub provider: String,
210    pub policy_count: usize,
211    pub fail_open: bool,
212}
213
214pub(crate) struct RuntimeConfig {
215    pub(crate) security: SecurityConfig,
216    pub(crate) timeouts: TimeoutConfig,
217    pub(crate) model_name: String,
218    /// Configured name from `[[llm.providers]]` (the `name` field), set at startup and on
219    /// `/provider` switch. Falls back to the provider type string when empty.
220    pub(crate) active_provider_name: String,
221    pub(crate) permission_policy: zeph_tools::PermissionPolicy,
222    pub(crate) redact_credentials: bool,
223    pub(crate) rate_limiter: super::rate_limiter::ToolRateLimiter,
224    pub(crate) semantic_cache_enabled: bool,
225    pub(crate) semantic_cache_threshold: f32,
226    pub(crate) semantic_cache_max_candidates: u32,
227    /// Dependency config snapshot stored for per-turn boost parameters.
228    pub(crate) dependency_config: zeph_tools::DependencyConfig,
229    /// Adversarial policy gate runtime info for /status display.
230    pub(crate) adversarial_policy_info: Option<AdversarialPolicyInfo>,
231    /// Current spawn depth of this agent instance (0 = top-level, 1 = first sub-agent, etc.).
232    /// Used by `build_spawn_context()` to propagate depth to children.
233    pub(crate) spawn_depth: u32,
234    /// Inject `<budget>` XML into the volatile system prompt section (#2267).
235    pub(crate) budget_hint_enabled: bool,
236    /// Per-channel skill allowlist. Skills not matching the allowlist are excluded from the
237    /// prompt. An empty `allowed` list means all skills are permitted (default).
238    pub(crate) channel_skills: zeph_config::ChannelSkillsConfig,
239    /// Runtime middleware layers for LLM calls and tool dispatch (#2286).
240    ///
241    /// Default: empty vec (zero-cost — loops never iterate).
242    pub(crate) layers: Vec<std::sync::Arc<dyn crate::runtime_layer::RuntimeLayer>>,
243}
244
245/// Groups feedback detection subsystems: correction detector, judge detector, and LLM classifier.
246pub(crate) struct FeedbackState {
247    pub(crate) detector: super::feedback_detector::FeedbackDetector,
248    pub(crate) judge: Option<super::feedback_detector::JudgeDetector>,
249    /// LLM-backed zero-shot classifier for `DetectorMode::Model`.
250    /// When `Some`, `spawn_judge_correction_check` uses this instead of `JudgeDetector`.
251    pub(crate) llm_classifier: Option<zeph_llm::classifier::llm::LlmClassifier>,
252}
253
254/// Groups security-related subsystems (sanitizer, quarantine, exfiltration guard).
255pub(crate) struct SecurityState {
256    pub(crate) sanitizer: ContentSanitizer,
257    pub(crate) quarantine_summarizer: Option<QuarantinedSummarizer>,
258    /// Whether this agent session is serving an ACP client.
259    /// When `true` and `mcp_to_acp_boundary` is enabled, MCP tool results
260    /// receive unconditional quarantine and cross-boundary audit logging.
261    pub(crate) is_acp_session: bool,
262    pub(crate) exfiltration_guard: zeph_sanitizer::exfiltration::ExfiltrationGuard,
263    pub(crate) flagged_urls: HashSet<String>,
264    /// URLs explicitly provided by the user across all turns in this session.
265    /// Populated from raw user message text; cleared on `/clear`.
266    /// Shared with `UrlGroundingVerifier` to check `fetch`/`web_scrape` calls at dispatch time.
267    pub(crate) user_provided_urls: Arc<RwLock<HashSet<String>>>,
268    pub(crate) pii_filter: zeph_sanitizer::pii::PiiFilter,
269    /// NER classifier for PII detection (`classifiers.ner_model`). When `Some`, the PII path
270    /// runs both regex (`pii_filter`) and NER, then merges spans before redaction.
271    /// `None` when `classifiers` feature is disabled or `classifiers.enabled = false`.
272    #[cfg(feature = "classifiers")]
273    pub(crate) pii_ner_backend: Option<std::sync::Arc<dyn zeph_llm::classifier::ClassifierBackend>>,
274    /// Per-call timeout for the NER PII classifier in milliseconds.
275    #[cfg(feature = "classifiers")]
276    pub(crate) pii_ner_timeout_ms: u64,
277    /// Maximum number of bytes passed to the NER PII classifier per call.
278    ///
279    /// Large tool outputs (e.g. `search_code`) can produce 150+ `DeBERTa` chunks and exceed
280    /// the per-call timeout. Input is truncated at a valid UTF-8 boundary before classification.
281    #[cfg(feature = "classifiers")]
282    pub(crate) pii_ner_max_chars: usize,
283    /// Circuit-breaker threshold: number of consecutive timeouts before NER is disabled.
284    /// `0` means the circuit breaker is disabled (NER is always attempted).
285    #[cfg(feature = "classifiers")]
286    pub(crate) pii_ner_circuit_breaker_threshold: u32,
287    /// Number of consecutive NER timeouts observed since the last successful call.
288    #[cfg(feature = "classifiers")]
289    pub(crate) pii_ner_consecutive_timeouts: u32,
290    /// Set to `true` when the circuit breaker trips. NER is skipped for the rest of the session.
291    #[cfg(feature = "classifiers")]
292    pub(crate) pii_ner_tripped: bool,
293    pub(crate) memory_validator: zeph_sanitizer::memory_validation::MemoryWriteValidator,
294    /// LLM-based prompt injection pre-screener (opt-in).
295    pub(crate) guardrail: Option<zeph_sanitizer::guardrail::GuardrailFilter>,
296    /// Post-LLM response verification layer.
297    pub(crate) response_verifier: zeph_sanitizer::response_verifier::ResponseVerifier,
298    /// Temporal causal IPI analyzer (opt-in, disabled when `None`).
299    pub(crate) causal_analyzer: Option<zeph_sanitizer::causal_ipi::TurnCausalAnalyzer>,
300}
301
302/// Groups debug/diagnostics subsystems (dumper, trace collector, anomaly detector, logging config).
303pub(crate) struct DebugState {
304    pub(crate) debug_dumper: Option<crate::debug_dump::DebugDumper>,
305    pub(crate) dump_format: crate::debug_dump::DumpFormat,
306    pub(crate) trace_collector: Option<crate::debug_dump::trace::TracingCollector>,
307    /// Monotonically increasing counter for `process_user_message` calls.
308    /// Used to key spans in `trace_collector.active_iterations`.
309    pub(crate) iteration_counter: usize,
310    pub(crate) anomaly_detector: Option<zeph_tools::AnomalyDetector>,
311    /// Whether to emit `reasoning_amplification` warnings for quality failures from reasoning
312    /// models. Mirrors `AnomalyConfig::reasoning_model_warning`. Default: `true`.
313    pub(crate) reasoning_model_warning: bool,
314    pub(crate) logging_config: crate::config::LoggingConfig,
315    /// Base dump directory — stored so `/dump-format trace` can create a `TracingCollector` (CR-04).
316    pub(crate) dump_dir: Option<PathBuf>,
317    /// Service name for `TracingCollector` created via runtime format switch (CR-04).
318    pub(crate) trace_service_name: String,
319    /// Whether to redact in `TracingCollector` created via runtime format switch (CR-04).
320    pub(crate) trace_redact: bool,
321    /// Span ID of the currently executing iteration — used by LLM/tool span wiring (CR-01).
322    /// Set to `Some` at the start of `process_user_message`, cleared at end.
323    pub(crate) current_iteration_span_id: Option<[u8; 8]>,
324}
325
326/// Groups agent lifecycle state: shutdown signaling, timing, and I/O notification channels.
327pub(crate) struct LifecycleState {
328    pub(crate) shutdown: watch::Receiver<bool>,
329    pub(crate) start_time: Instant,
330    pub(crate) cancel_signal: Arc<Notify>,
331    pub(crate) cancel_token: CancellationToken,
332    /// Handle to the cancel bridge task spawned each turn. Aborted before a new one is created
333    /// to prevent unbounded task accumulation across turns.
334    pub(crate) cancel_bridge_handle: Option<JoinHandle<()>>,
335    pub(crate) config_path: Option<PathBuf>,
336    pub(crate) config_reload_rx: Option<mpsc::Receiver<ConfigEvent>>,
337    pub(crate) warmup_ready: Option<watch::Receiver<bool>>,
338    pub(crate) update_notify_rx: Option<mpsc::Receiver<String>>,
339    pub(crate) custom_task_rx: Option<mpsc::Receiver<String>>,
340    /// Last known process cwd. Compared after each tool call to detect changes.
341    pub(crate) last_known_cwd: PathBuf,
342    /// Receiver for file-change events from `FileChangeWatcher`. `None` when no paths configured.
343    pub(crate) file_changed_rx: Option<mpsc::Receiver<FileChangedEvent>>,
344    /// Keeps the `FileChangeWatcher` alive for the agent's lifetime. Dropping it aborts the watcher task.
345    pub(crate) file_watcher: Option<crate::file_watcher::FileChangeWatcher>,
346}
347
348/// Minimal config snapshot needed to reconstruct a provider at runtime via `/provider <name>`.
349///
350/// Secrets are stored as plain strings because [`Secret`] intentionally does not implement
351/// `Clone`. They are re-wrapped in `Secret` when passed to `build_provider_for_switch`.
352pub struct ProviderConfigSnapshot {
353    pub claude_api_key: Option<String>,
354    pub openai_api_key: Option<String>,
355    pub gemini_api_key: Option<String>,
356    pub compatible_api_keys: std::collections::HashMap<String, String>,
357    pub llm_request_timeout_secs: u64,
358    pub embedding_model: String,
359}
360
361/// Groups provider-related state: alternate providers, runtime switching, and compaction flags.
362pub(crate) struct ProviderState {
363    pub(crate) summary_provider: Option<AnyProvider>,
364    /// Shared slot for runtime model switching; set by external caller (e.g. ACP).
365    pub(crate) provider_override: Option<Arc<RwLock<Option<AnyProvider>>>>,
366    pub(crate) judge_provider: Option<AnyProvider>,
367    /// Dedicated provider for compaction probe LLM calls. Falls back to `summary_provider`
368    /// (or primary) when `None`.
369    pub(crate) probe_provider: Option<AnyProvider>,
370    /// Dedicated provider for `compress_context` LLM calls (#2356).
371    /// Falls back to the primary provider when `None`.
372    pub(crate) compress_provider: Option<AnyProvider>,
373    pub(crate) cached_prompt_tokens: u64,
374    /// Whether the active provider has server-side compaction enabled (Claude compact-2026-01-12).
375    /// When true, client-side compaction is skipped.
376    pub(crate) server_compaction_active: bool,
377    pub(crate) stt: Option<Box<dyn SpeechToText>>,
378    /// Snapshot of `[[llm.providers]]` entries for runtime `/provider` switching.
379    pub(crate) provider_pool: Vec<ProviderEntry>,
380    /// Resolved secrets and timeout settings needed to reconstruct providers at runtime.
381    pub(crate) provider_config_snapshot: Option<ProviderConfigSnapshot>,
382}
383
384/// Groups metrics and cost tracking state.
385pub(crate) struct MetricsState {
386    pub(crate) metrics_tx: Option<watch::Sender<MetricsSnapshot>>,
387    pub(crate) cost_tracker: Option<CostTracker>,
388    pub(crate) token_counter: Arc<TokenCounter>,
389    /// Set to `true` when Claude extended context (`enable_extended_context = true`) is active.
390    /// Read from config at build time, not derived from provider internals.
391    pub(crate) extended_context: bool,
392    /// Shared classifier latency ring buffer. Populated by `ContentSanitizer` (injection, PII)
393    /// and `LlmClassifier` (feedback). `None` when classifiers are not configured.
394    pub(crate) classifier_metrics: Option<Arc<zeph_llm::ClassifierMetrics>>,
395}
396
397/// Groups task orchestration and subagent state.
398#[derive(Default)]
399pub(crate) struct OrchestrationState {
400    /// On `OrchestrationState` (not `ProviderState`) because this provider is used exclusively
401    /// by `LlmPlanner` during orchestration, not shared across subsystems.
402    pub(crate) planner_provider: Option<AnyProvider>,
403    /// Provider for `PlanVerifier` LLM calls. `None` falls back to the primary provider.
404    /// On `OrchestrationState` for the same reason as `planner_provider`.
405    pub(crate) verify_provider: Option<AnyProvider>,
406    /// Graph waiting for `/plan confirm` before execution starts.
407    pub(crate) pending_graph: Option<zeph_orchestration::TaskGraph>,
408    /// Cancellation token for the currently executing plan. `None` when no plan is running.
409    /// Created fresh in `handle_plan_confirm()`, cancelled in `handle_plan_cancel()`.
410    ///
411    /// # Known limitation
412    ///
413    /// Token plumbing is ready; the delivery path requires the agent message loop to be
414    /// restructured so `/plan cancel` can be received while `run_scheduler_loop` holds
415    /// `&mut self`. See follow-up issue #1603 (SEC-M34-002).
416    pub(crate) plan_cancel_token: Option<CancellationToken>,
417    /// Manages spawned sub-agents.
418    pub(crate) subagent_manager: Option<zeph_subagent::SubAgentManager>,
419    pub(crate) subagent_config: crate::config::SubAgentConfig,
420    pub(crate) orchestration_config: crate::config::OrchestrationConfig,
421    /// Lazily initialized plan template cache. `None` until first use or when
422    /// memory (`SQLite`) is unavailable.
423    pub(crate) plan_cache: Option<zeph_orchestration::PlanCache>,
424    /// Goal embedding from the most recent `plan_with_cache()` call. Consumed by
425    /// `finalize_plan_execution()` to cache the completed plan template.
426    pub(crate) pending_goal_embedding: Option<Vec<f32>>,
427}
428
429/// Groups instruction hot-reload state.
430#[derive(Default)]
431pub(crate) struct InstructionState {
432    pub(crate) blocks: Vec<InstructionBlock>,
433    pub(crate) reload_rx: Option<mpsc::Receiver<InstructionEvent>>,
434    pub(crate) reload_state: Option<InstructionReloadState>,
435}
436
437/// Groups experiment feature state (gated behind `experiments` feature flag).
438pub(crate) struct ExperimentState {
439    pub(crate) config: crate::config::ExperimentConfig,
440    /// Cancellation token for a running experiment session. `Some` means an experiment is active.
441    pub(crate) cancel: Option<tokio_util::sync::CancellationToken>,
442    /// Pre-built config snapshot used as the experiment baseline (agent path).
443    pub(crate) baseline: zeph_experiments::ConfigSnapshot,
444    /// Dedicated judge provider for evaluation. When `Some`, the evaluator uses this provider
445    /// instead of the agent's primary provider, eliminating self-judge bias.
446    pub(crate) eval_provider: Option<AnyProvider>,
447    /// Receives completion/error messages from the background experiment engine task.
448    /// Always present so the select! branch compiles unconditionally.
449    pub(crate) notify_rx: Option<tokio::sync::mpsc::Receiver<String>>,
450    /// Sender end paired with `experiment_notify_rx`. Cloned into the background task.
451    pub(crate) notify_tx: tokio::sync::mpsc::Sender<String>,
452}
453
454/// Output of a background subgoal extraction LLM call.
455pub(crate) struct SubgoalExtractionResult {
456    /// Current subgoal the agent is working toward.
457    pub(crate) current: String,
458    /// Just-completed subgoal, if the LLM detected a transition (`COMPLETED:` non-NONE).
459    pub(crate) completed: Option<String>,
460}
461
462/// Groups context-compression feature state (gated behind `context-compression` feature flag).
463#[derive(Default)]
464pub(crate) struct CompressionState {
465    /// Cached task goal for TaskAware/MIG pruning. Set by `maybe_compact()`,
466    /// invalidated when the last user message hash changes.
467    pub(crate) current_task_goal: Option<String>,
468    /// Hash of the last user message when `current_task_goal` was populated.
469    pub(crate) task_goal_user_msg_hash: Option<u64>,
470    /// Pending background task for goal extraction. Spawned fire-and-forget when the user message
471    /// hash changes; result applied at the start of the next Soft compaction (#1909).
472    pub(crate) pending_task_goal: Option<tokio::task::JoinHandle<Option<String>>>,
473    /// Pending `SideQuest` eviction result from the background LLM call spawned last turn.
474    /// Applied at the START of the next turn before compaction (PERF-1 fix).
475    pub(crate) pending_sidequest_result: Option<tokio::task::JoinHandle<Option<Vec<usize>>>>,
476    /// In-memory subgoal registry for `Subgoal`/`SubgoalMig` pruning strategies (#2022).
477    pub(crate) subgoal_registry: crate::agent::compaction_strategy::SubgoalRegistry,
478    /// Pending background subgoal extraction task.
479    pub(crate) pending_subgoal: Option<tokio::task::JoinHandle<Option<SubgoalExtractionResult>>>,
480    /// Hash of the last user message when subgoal extraction was scheduled.
481    pub(crate) subgoal_user_msg_hash: Option<u64>,
482}
483
484/// Groups runtime tool filtering, dependency tracking, and iteration bookkeeping.
485#[derive(Default)]
486pub(crate) struct ToolState {
487    /// Dynamic tool schema filter: pre-computed tool embeddings for per-turn filtering (#2020).
488    pub(crate) tool_schema_filter: Option<zeph_tools::ToolSchemaFilter>,
489    /// Cached filtered tool IDs for the current user turn.
490    pub(crate) cached_filtered_tool_ids: Option<HashSet<String>>,
491    /// Tool dependency graph for sequential tool availability (#2024).
492    pub(crate) dependency_graph: Option<zeph_tools::ToolDependencyGraph>,
493    /// Always-on tool IDs, mirrored from the tool schema filter for dependency gate bypass.
494    pub(crate) dependency_always_on: HashSet<String>,
495    /// Tool IDs that completed successfully in the current session.
496    pub(crate) completed_tool_ids: HashSet<String>,
497    /// Current tool loop iteration index within the active user turn.
498    pub(crate) current_tool_iteration: usize,
499}
500
501/// Groups per-session I/O and policy state.
502pub(crate) struct SessionState {
503    pub(crate) env_context: EnvironmentContext,
504    /// Timestamp of the last assistant message appended to context.
505    /// Used by time-based microcompact to compute session idle gap (#2699).
506    /// `None` before the first assistant response.
507    pub(crate) last_assistant_at: Option<Instant>,
508    pub(crate) response_cache: Option<std::sync::Arc<zeph_memory::ResponseCache>>,
509    /// Parent tool call ID when this agent runs as a subagent inside another agent session.
510    /// Propagated into every `LoopbackEvent::ToolStart` / `ToolOutput` so the IDE can build
511    /// a subagent hierarchy.
512    pub(crate) parent_tool_use_id: Option<String>,
513    /// Optional status channel for sending spinner/status messages to TUI or stderr.
514    pub(crate) status_tx: Option<tokio::sync::mpsc::UnboundedSender<String>>,
515    /// LSP context injection hooks. Fires after native tool execution, injects
516    /// diagnostics/hover notes as `Role::System` messages before the next LLM call.
517    pub(crate) lsp_hooks: Option<crate::lsp_hooks::LspHookRunner>,
518    /// Snapshot of the policy config for `/policy` command inspection.
519    pub(crate) policy_config: Option<zeph_tools::PolicyConfig>,
520    /// `CwdChanged` hook definitions extracted from `[hooks]` config.
521    pub(crate) hooks_config: HooksConfigSnapshot,
522}
523
524/// Extracted hook lists from `[hooks]` config, stored in `SessionState`.
525#[derive(Default)]
526pub(crate) struct HooksConfigSnapshot {
527    /// Hooks fired when working directory changes.
528    pub(crate) cwd_changed: Vec<zeph_config::HookDef>,
529    /// Hooks fired when a watched file changes.
530    pub(crate) file_changed_hooks: Vec<zeph_config::HookDef>,
531}
532
533// Groups message buffering and image staging state.
534pub(crate) struct MessageState {
535    pub(crate) messages: Vec<Message>,
536    // QueuedMessage is pub(super) in message_queue — same visibility as this struct; lint suppressed.
537    #[allow(private_interfaces)]
538    pub(crate) message_queue: VecDeque<QueuedMessage>,
539    /// Image parts staged by `/image` commands, attached to the next user message.
540    pub(crate) pending_image_parts: Vec<zeph_llm::provider::MessagePart>,
541    /// DB row ID of the most recently persisted message. Set by `persist_message`;
542    /// consumed by `push_message` call sites to populate `metadata.db_id` on in-memory messages.
543    pub(crate) last_persisted_message_id: Option<i64>,
544    /// DB message IDs pending hide after deferred tool pair summarization.
545    pub(crate) deferred_db_hide_ids: Vec<i64>,
546    /// Summary texts pending insertion after deferred tool pair summarization.
547    pub(crate) deferred_db_summaries: Vec<String>,
548}
549
550impl McpState {
551    /// Write the **full** `self.tools` set to the shared executor `RwLock`.
552    ///
553    /// This is the first of two writers to `shared_tools`. Within a turn this method must run
554    /// **before** `apply_pruned_tools`, which writes the pruned subset. The normal call order
555    /// guarantees this: tool-list change events call this method, and pruning runs later inside
556    /// `rebuild_system_prompt`. See also: `apply_pruned_tools`.
557    pub(crate) fn sync_executor_tools(&self) {
558        if let Some(ref shared) = self.shared_tools {
559            shared.write().clone_from(&self.tools);
560        }
561    }
562
563    /// Write the **pruned** tool subset to the shared executor `RwLock`.
564    ///
565    /// Must only be called **after** `sync_executor_tools` has established the full tool set for
566    /// the current turn. `self.tools` (the full set) is intentionally **not** modified.
567    ///
568    /// This method must **NOT** call `sync_executor_tools` internally — doing so would overwrite
569    /// the pruned subset with the full set. See also: `sync_executor_tools`.
570    pub(crate) fn apply_pruned_tools(&self, pruned: Vec<zeph_mcp::McpTool>) {
571        debug_assert!(
572            pruned.iter().all(|p| self
573                .tools
574                .iter()
575                .any(|t| t.server_id == p.server_id && t.name == p.name)),
576            "pruned set must be a subset of self.tools"
577        );
578        if let Some(ref shared) = self.shared_tools {
579            *shared.write() = pruned;
580        }
581    }
582
583    #[cfg(test)]
584    pub(crate) fn tool_count(&self) -> usize {
585        self.tools.len()
586    }
587}
588
589impl IndexState {
590    pub(crate) async fn fetch_code_rag(
591        &self,
592        query: &str,
593        token_budget: usize,
594    ) -> Result<Option<String>, crate::agent::error::AgentError> {
595        let Some(retriever) = &self.retriever else {
596            return Ok(None);
597        };
598        if token_budget == 0 {
599            return Ok(None);
600        }
601
602        let result = retriever
603            .retrieve(query, token_budget)
604            .await
605            .map_err(|e| crate::agent::error::AgentError::Other(format!("{e:#}")))?;
606        let context_text = zeph_index::retriever::format_as_context(&result);
607
608        if context_text.is_empty() {
609            Ok(None)
610        } else {
611            tracing::debug!(
612                strategy = ?result.strategy,
613                chunks = result.chunks.len(),
614                tokens = result.total_tokens,
615                "code context fetched"
616            );
617            Ok(Some(context_text))
618        }
619    }
620}
621
622impl DebugState {
623    pub(crate) fn start_iteration_span(&mut self, iteration_index: usize, text: &str) {
624        if let Some(ref mut tc) = self.trace_collector {
625            tc.begin_iteration(iteration_index, text);
626            self.current_iteration_span_id = tc.current_iteration_span_id(iteration_index);
627        }
628    }
629
630    pub(crate) fn end_iteration_span(
631        &mut self,
632        iteration_index: usize,
633        status: crate::debug_dump::trace::SpanStatus,
634    ) {
635        if let Some(ref mut tc) = self.trace_collector {
636            tc.end_iteration(iteration_index, status);
637        }
638        self.current_iteration_span_id = None;
639    }
640
641    pub(crate) fn switch_format(&mut self, new_format: crate::debug_dump::DumpFormat) {
642        let was_trace = self.dump_format == crate::debug_dump::DumpFormat::Trace;
643        let now_trace = new_format == crate::debug_dump::DumpFormat::Trace;
644
645        if now_trace
646            && !was_trace
647            && let Some(ref dump_dir) = self.dump_dir.clone()
648        {
649            let service_name = self.trace_service_name.clone();
650            let redact = self.trace_redact;
651            match crate::debug_dump::trace::TracingCollector::new(
652                dump_dir.as_path(),
653                &service_name,
654                redact,
655                None,
656            ) {
657                Ok(collector) => {
658                    self.trace_collector = Some(collector);
659                }
660                Err(e) => {
661                    tracing::warn!(error = %e, "failed to create TracingCollector on format switch");
662                }
663            }
664        }
665        if was_trace
666            && !now_trace
667            && let Some(mut tc) = self.trace_collector.take()
668        {
669            tc.finish();
670        }
671
672        self.dump_format = new_format;
673    }
674
675    pub(crate) fn write_chat_debug_dump(
676        &self,
677        dump_id: Option<u32>,
678        result: &zeph_llm::provider::ChatResponse,
679        pii_filter: &zeph_sanitizer::pii::PiiFilter,
680    ) {
681        let Some((d, id)) = self.debug_dumper.as_ref().zip(dump_id) else {
682            return;
683        };
684        let raw = match result {
685            zeph_llm::provider::ChatResponse::Text(t) => t.clone(),
686            zeph_llm::provider::ChatResponse::ToolUse {
687                text, tool_calls, ..
688            } => {
689                let calls = serde_json::to_string_pretty(tool_calls).unwrap_or_default();
690                format!(
691                    "{}\n\n---TOOL_CALLS---\n{calls}",
692                    text.as_deref().unwrap_or("")
693                )
694            }
695        };
696        let text = if pii_filter.is_enabled() {
697            pii_filter.scrub(&raw).into_owned()
698        } else {
699            raw
700        };
701        d.dump_response(id, &text);
702    }
703}
704
705impl Default for MemoryState {
706    fn default() -> Self {
707        Self {
708            memory: None,
709            conversation_id: None,
710            history_limit: 50,
711            recall_limit: 5,
712            summarization_threshold: 50,
713            cross_session_score_threshold: 0.35,
714            autosave_assistant: false,
715            autosave_min_length: 20,
716            tool_call_cutoff: 6,
717            unsummarized_count: 0,
718            document_config: crate::config::DocumentConfig::default(),
719            graph_config: crate::config::GraphConfig::default(),
720            compression_guidelines_config: zeph_memory::CompressionGuidelinesConfig::default(),
721            shutdown_summary: true,
722            shutdown_summary_min_messages: 4,
723            shutdown_summary_max_messages: 20,
724            shutdown_summary_timeout_secs: 10,
725            structured_summaries: false,
726            last_recall_confidence: None,
727            digest_config: crate::config::DigestConfig::default(),
728            cached_session_digest: None,
729            context_strategy: crate::config::ContextStrategy::default(),
730            crossover_turn_threshold: 20,
731            rpe_router: None,
732            goal_text: None,
733            persona_config: crate::config::PersonaConfig::default(),
734            trajectory_config: crate::config::TrajectoryConfig::default(),
735            category_config: crate::config::CategoryConfig::default(),
736            tree_config: crate::config::TreeConfig::default(),
737            tree_consolidation_handle: None,
738            microcompact_config: crate::config::MicrocompactConfig::default(),
739            autodream_config: crate::config::AutoDreamConfig::default(),
740            magic_docs_config: crate::config::MagicDocsConfig::default(),
741            autodream: super::autodream::AutoDreamState::new(),
742            magic_docs: super::magic_docs::MagicDocsState::new(),
743        }
744    }
745}
746
747impl MemoryState {
748    pub(crate) fn apply_graph_config(&mut self, config: crate::config::GraphConfig) {
749        if config.enabled {
750            tracing::warn!(
751                "graph-memory is enabled: extracted entities are stored without PII redaction. \
752                 Do not use with sensitive personal data until redaction is implemented."
753            );
754        }
755        if config.rpe.enabled {
756            self.rpe_router = Some(std::sync::Mutex::new(zeph_memory::RpeRouter::new(
757                config.rpe.threshold,
758                config.rpe.max_skip_turns,
759            )));
760        } else {
761            self.rpe_router = None;
762        }
763        self.graph_config = config;
764    }
765}
766
767impl Default for McpState {
768    fn default() -> Self {
769        Self {
770            tools: Vec::new(),
771            registry: None,
772            manager: None,
773            allowed_commands: Vec::new(),
774            max_dynamic: 10,
775            elicitation_rx: None,
776            shared_tools: None,
777            tool_rx: None,
778            server_outcomes: Vec::new(),
779            pruning_cache: zeph_mcp::PruningCache::new(),
780            pruning_provider: None,
781            pruning_enabled: false,
782            pruning_params: zeph_mcp::PruningParams::default(),
783            semantic_index: None,
784            discovery_strategy: zeph_mcp::ToolDiscoveryStrategy::default(),
785            discovery_params: zeph_mcp::DiscoveryParams::default(),
786            discovery_provider: None,
787            elicitation_warn_sensitive_fields: true,
788        }
789    }
790}
791
792impl Default for IndexState {
793    fn default() -> Self {
794        Self {
795            retriever: None,
796            repo_map_tokens: 0,
797            cached_repo_map: None,
798            repo_map_ttl: std::time::Duration::from_secs(300),
799        }
800    }
801}
802
803impl Default for DebugState {
804    fn default() -> Self {
805        Self {
806            debug_dumper: None,
807            dump_format: crate::debug_dump::DumpFormat::default(),
808            trace_collector: None,
809            iteration_counter: 0,
810            anomaly_detector: None,
811            reasoning_model_warning: true,
812            logging_config: crate::config::LoggingConfig::default(),
813            dump_dir: None,
814            trace_service_name: String::new(),
815            trace_redact: true,
816            current_iteration_span_id: None,
817        }
818    }
819}
820
821impl Default for FeedbackState {
822    fn default() -> Self {
823        Self {
824            detector: super::feedback_detector::FeedbackDetector::new(0.6),
825            judge: None,
826            llm_classifier: None,
827        }
828    }
829}
830
831impl Default for RuntimeConfig {
832    fn default() -> Self {
833        Self {
834            security: SecurityConfig::default(),
835            timeouts: TimeoutConfig::default(),
836            model_name: String::new(),
837            active_provider_name: String::new(),
838            permission_policy: zeph_tools::PermissionPolicy::default(),
839            redact_credentials: true,
840            rate_limiter: super::rate_limiter::ToolRateLimiter::new(
841                super::rate_limiter::RateLimitConfig::default(),
842            ),
843            semantic_cache_enabled: false,
844            semantic_cache_threshold: 0.95,
845            semantic_cache_max_candidates: 10,
846            dependency_config: zeph_tools::DependencyConfig::default(),
847            adversarial_policy_info: None,
848            spawn_depth: 0,
849            budget_hint_enabled: true,
850            channel_skills: zeph_config::ChannelSkillsConfig::default(),
851            layers: Vec::new(),
852        }
853    }
854}
855
856impl SessionState {
857    pub(crate) fn new() -> Self {
858        Self {
859            env_context: EnvironmentContext::gather(""),
860            last_assistant_at: None,
861            response_cache: None,
862            parent_tool_use_id: None,
863            status_tx: None,
864            lsp_hooks: None,
865            policy_config: None,
866            hooks_config: HooksConfigSnapshot::default(),
867        }
868    }
869}
870
871impl SkillState {
872    pub(crate) fn new(
873        registry: Arc<RwLock<SkillRegistry>>,
874        matcher: Option<SkillMatcherBackend>,
875        max_active_skills: usize,
876        last_skills_prompt: String,
877    ) -> Self {
878        Self {
879            registry,
880            skill_paths: Vec::new(),
881            managed_dir: None,
882            trust_config: crate::config::TrustConfig::default(),
883            matcher,
884            max_active_skills,
885            disambiguation_threshold: 0.20,
886            min_injection_score: 0.20,
887            embedding_model: String::new(),
888            skill_reload_rx: None,
889            active_skill_names: Vec::new(),
890            last_skills_prompt,
891            prompt_mode: crate::config::SkillPromptMode::Auto,
892            available_custom_secrets: HashMap::new(),
893            cosine_weight: 0.7,
894            hybrid_search: false,
895            bm25_index: None,
896            two_stage_matching: false,
897            confusability_threshold: 0.0,
898            rl_head: None,
899            rl_weight: 0.3,
900            rl_warmup_updates: 50,
901            generation_output_dir: None,
902            generation_provider_name: String::new(),
903        }
904    }
905}
906
907impl LifecycleState {
908    pub(crate) fn new() -> Self {
909        let (_tx, rx) = watch::channel(false);
910        Self {
911            shutdown: rx,
912            start_time: Instant::now(),
913            cancel_signal: Arc::new(tokio::sync::Notify::new()),
914            cancel_token: tokio_util::sync::CancellationToken::new(),
915            cancel_bridge_handle: None,
916            config_path: None,
917            config_reload_rx: None,
918            warmup_ready: None,
919            update_notify_rx: None,
920            custom_task_rx: None,
921            last_known_cwd: std::env::current_dir().unwrap_or_default(),
922            file_changed_rx: None,
923            file_watcher: None,
924        }
925    }
926}
927
928impl ProviderState {
929    pub(crate) fn new(initial_prompt_tokens: u64) -> Self {
930        Self {
931            summary_provider: None,
932            provider_override: None,
933            judge_provider: None,
934            probe_provider: None,
935            compress_provider: None,
936            cached_prompt_tokens: initial_prompt_tokens,
937            server_compaction_active: false,
938            stt: None,
939            provider_pool: Vec::new(),
940            provider_config_snapshot: None,
941        }
942    }
943}
944
945impl MetricsState {
946    pub(crate) fn new(token_counter: Arc<zeph_memory::TokenCounter>) -> Self {
947        Self {
948            metrics_tx: None,
949            cost_tracker: None,
950            token_counter,
951            extended_context: false,
952            classifier_metrics: None,
953        }
954    }
955}
956
957impl ExperimentState {
958    pub(crate) fn new() -> Self {
959        let (notify_tx, notify_rx) = tokio::sync::mpsc::channel::<String>(4);
960        Self {
961            config: crate::config::ExperimentConfig::default(),
962            cancel: None,
963            baseline: zeph_experiments::ConfigSnapshot::default(),
964            eval_provider: None,
965            notify_rx: Some(notify_rx),
966            notify_tx,
967        }
968    }
969}
970
971pub(super) mod security;
972pub(super) mod skill;
973
974#[cfg(test)]
975mod tests;