defect_agent/session.rs
1//! Session — state container and lifecycle interface for a single conversation.
2//!
3//! ## Abstraction layers
4//!
5//! - [`AgentCore`]: process-level "agent instance", holds the built-in tool set and
6//! global configuration;
7//! it is the root object assembled by `defect-cli` and injected into
8//! `defect-acp::serve`
9//! - [`Session`]: lifecycle unit for a single conversation; holds history, per-session
10//! tool
11//! table (including MCP), cancel token, and event stream
12//! - [`History`]: wrapper around message history, with hooks reserved for compression,
13//! token counting, and resume
14//!
15//! All three are **exposed as traits**; concrete implementations live in the `session/`
16//! submodule within this crate
17//! and at the assembly point in `defect-cli`; `defect-acp` interacts with them only
18//! through the traits.
19
20use std::path::PathBuf;
21use std::sync::Arc;
22
23use agent_client_protocol_schema::{ContentBlock, McpServer, SessionId, StopReason, ToolCallId};
24use futures::future::BoxFuture;
25
26use crate::error::BoxError;
27use crate::event::{AgentEvent, PermissionResolution};
28use crate::fs::FsBackend;
29use crate::llm::{
30 Message, ModelCandidate, ModelInfo, ProviderError, ProviderInfo, ReasoningEffort,
31};
32use crate::shell::ShellBackend;
33use crate::tool::{Tool, ToolSchema};
34
35mod background;
36mod capabilities;
37mod context;
38mod default;
39mod events;
40mod goal;
41mod history;
42mod permissions;
43mod prompt;
44mod tool_registry;
45mod turn;
46
47pub use background::{
48 BackgroundOutcome, BackgroundProgressConfig, BackgroundResult, BackgroundTasks, BlockKind,
49 ProgressBlock, TaskHandle, TaskSnapshot, TaskStatus, format_background_outcome,
50};
51pub use capabilities::{
52 ResolvedSessionCapabilities, SessionCapabilitiesConfig, WebSearchCapabilityConfig,
53 WebSearchCapabilityMode,
54};
55pub use context::{Frontend, RunningContext};
56pub use default::{DefaultAgentCore, DefaultAgentCoreBuilder, DefaultSession, new_session_id};
57pub use events::EventEmitter;
58pub use goal::GoalState;
59pub use history::VecHistory;
60pub use permissions::PermissionGate;
61pub use prompt::resolve_system_prompt;
62pub use tool_registry::{CompositeRegistry, StaticToolRegistry, StaticToolRegistryBuilder};
63/// Re-exported for reuse within the crate: the `spawn_agent` sub-agent tool needs a
64/// `RequestAuditTracker` instance when constructing a nested [`TurnRunner`]. This type is
65/// not public (it exposes internal diagnostic state), but `crate::tool::spawn_agent` in
66/// the same crate must be able to call `new()`.
67pub(crate) use turn::RequestAuditTracker;
68pub use turn::{
69 BasePromptConfig, CompactionSlot, PromptConfig, TurnConfig, TurnRequestLimit, TurnRunner,
70};
71
72/// Process-level agent root object.
73///
74/// `defect-cli` constructs a concrete implementation at startup (holding the LLM provider
75/// registry, built-in tool set, and configuration) and injects an `Arc<dyn AgentCore>`
76/// into `defect-acp::serve`.
77///
78/// Rationale for extracting a trait:
79/// - Allows injecting a mock in tests without spinning up a real LLM.
80/// - If an "embedded agent" (library mode called by a host application) emerges in the
81/// future, a second concrete implementation can be added without touching the ACP
82/// bridge code.
83pub trait AgentCore: Send + Sync {
84 /// Creates a new session.
85 ///
86 /// `id` is generated and passed in by the caller (the `defect-acp` `session/new`
87 /// handler) — the filesystem backend already needs a `SessionId` when constructed
88 /// outside of [`AgentCore::create_session`] (see the ACP filesystem delegation
89 /// contract). Concrete implementations treat it as the authoritative external id and
90 /// return [`AgentError::DuplicateSessionId`] on duplicates.
91 ///
92 /// `mcp_servers` is the per-session MCP server list from the `session/new` request;
93 /// the concrete implementation spawns subprocesses or establishes SSE connections
94 /// during initialization, wrapping each MCP tool as a [`Tool`] and adding it to the
95 /// session's tool table.
96 ///
97 /// `fs` is the session-level filesystem backend — `defect-acp` selects
98 /// `LocalFsBackend` or `AcpFsBackend` at assembly time based on the client's
99 /// [`FileSystemCapabilities`]. The session holds an `Arc` to it, and all filesystem
100 /// tool calls go through it.
101 ///
102 /// `shell` is the session-level shell backend — `defect-acp` selects
103 /// `LocalShellBackend` or `AcpShellBackend` at assembly time based on the client's
104 /// [`ClientCapabilities::terminal`]. The session holds an `Arc` to it, and all `bash`
105 /// tool calls go through it.
106 ///
107 /// `frontend` indicates how the agent is being accessed ([`Frontend::Acp`] carries
108 /// the fs/shell delegation state negotiated during the ACP handshake) and is used to
109 /// inject the `# Environment` section of the system prompt.
110 ///
111 /// # Errors
112 ///
113 /// MCP startup failure, missing cwd, duplicate id, etc.
114 ///
115 /// [`FileSystemCapabilities`]: agent_client_protocol_schema::FileSystemCapabilities
116 /// [`ClientCapabilities::terminal`]: agent_client_protocol_schema::ClientCapabilities
117 fn create_session(
118 &self,
119 id: SessionId,
120 cwd: PathBuf,
121 mcp_servers: Vec<McpServer>,
122 fs: Arc<dyn FsBackend>,
123 shell: Arc<dyn ShellBackend>,
124 frontend: Frontend,
125 ) -> BoxFuture<'_, Result<Arc<dyn Session>, AgentError>>;
126
127 /// Restore an existing session from persistent state.
128 ///
129 /// `frontend` works the same as in [`AgentCore::create_session`] — the restored
130 /// session also uses it to inject runtime environment information.
131 ///
132 /// # Errors
133 ///
134 /// The session does not exist, the persisted data is corrupted, the restored `cwd` is
135 /// unavailable, etc.
136 fn load_session(
137 &self,
138 id: SessionId,
139 fs: Arc<dyn FsBackend>,
140 shell: Arc<dyn ShellBackend>,
141 frontend: Frontend,
142 ) -> BoxFuture<'_, Result<Arc<dyn Session>, AgentError>>;
143
144 /// Look up an existing session by id.
145 fn session(&self, id: &SessionId) -> Option<Arc<dyn Session>>;
146}
147
148/// Abstraction for restoring a session from persistent storage.
149///
150/// Concrete implementations typically come from `defect-storage`.
151pub trait SessionLoader: Send + Sync {
152 /// Read back the state needed for recovery by session id.
153 ///
154 /// # Errors
155 ///
156 /// The session does not exist, the storage is corrupted, or replay fails.
157 fn load_session(&self, id: SessionId) -> BoxFuture<'_, Result<LoadedSession, BoxError>>;
158}
159
160/// Abstraction for building an additional tool registry for a single session.
161///
162/// A typical implementation comes from `defect-mcp`: it connects to the list of MCP
163/// servers provided by `session/new` or `session/load`, and wraps the remote tools into a
164/// [`ToolRegistry`].
165pub trait SessionToolFactory: Send + Sync {
166 /// Build a session-level tool registry for the current session.
167 ///
168 /// # Errors
169 ///
170 /// Returns an error if the external tool source fails to initialize, the remote
171 /// inventory cannot be fetched, or the configuration is unsupported.
172 fn build_registry(
173 &self,
174 cwd: PathBuf,
175 mcp_servers: Vec<McpServer>,
176 ) -> BoxFuture<'_, Result<Arc<dyn ToolRegistry>, BoxError>>;
177}
178
179/// Observer for when `AgentCore::create_session` succeeds.
180///
181/// Typical uses:
182/// - Start `defect-storage` event subscription persistence
183/// - Attach per-session sidecar consumers for tracing / metrics
184pub trait SessionObserver: Send + Sync {
185 /// Called after the session is successfully created.
186 ///
187 /// # Errors
188 ///
189 /// Returns an error if initializing the side‑channel consumer fails, preventing the
190 /// session from becoming externally visible.
191 fn on_session_created(
192 &self,
193 session: Arc<dyn Session>,
194 info: SessionCreateInfo,
195 ) -> Result<(), BoxError>;
196}
197
198/// A public description of an optional permission mode. Used by `defect-acp` to construct
199/// an ACP `SessionMode`.
200///
201/// It is a "policy-free" projection of [`crate::policy::PolicyMode`] — exposing only the
202/// id/display fields without leaking the internal decision engine.
203#[derive(Debug, Clone)]
204pub struct ModeDescriptor {
205 pub id: String,
206 pub name: String,
207 pub description: Option<String>,
208}
209
210/// Model selection key: a `(provider vendor, model id)` pair.
211///
212/// The same model id can be declared by multiple providers (multiple gateways with the
213/// same model), so selection must include both the provider vendor and the model id.
214/// `provider` refers to [`ProviderInfo::vendor`].
215#[derive(Debug, Clone, PartialEq, Eq)]
216pub struct ModelSelection {
217 pub provider: String,
218 pub model: String,
219}
220
221/// A single session.
222///
223/// All methods are trait-object-friendly (`&self` + `BoxFuture`). The `Arc<dyn Session>`
224/// is shared between `defect-acp` and the main loop.
225pub trait Session: Send + Sync {
226 fn id(&self) -> &SessionId;
227
228 /// Provider metadata used by the current session.
229 fn provider_info(&self) -> ProviderInfo;
230
231 /// The model ID used by the current session.
232 fn current_model(&self) -> String;
233
234 /// List the model candidates available from the current provider for this session.
235 ///
236 /// # Errors
237 ///
238 /// Returns [`ProviderError`] if the provider fails to fetch the model list.
239 fn list_models(&self) -> BoxFuture<'_, Result<Vec<ModelInfo>, ProviderError>>;
240
241 /// List the (provider, model) candidate pairs visible to the session. Under a
242 /// multi-provider setup, the same session may switch models across providers, so ACP
243 /// rendering needs to annotate each candidate with its provider.
244 ///
245 /// # Errors
246 ///
247 /// Same as [`Self::list_models`]: returns [`ProviderError`] if fetching the provider
248 /// list fails.
249 fn list_candidates(&self) -> BoxFuture<'_, Result<Vec<ModelCandidate>, ProviderError>>;
250
251 /// Switches the model for the current session.
252 ///
253 /// The selection key is a `(provider vendor, model)` pair — the same model id may be
254 /// advertised by multiple providers (multiple gateways for the same model), so the
255 /// provider must be explicitly specified. The currently in-progress turn retains its
256 /// original selection; subsequent turns use the new selection.
257 ///
258 /// # Errors
259 ///
260 /// Returns [`ProviderError`] when the provider fails to fetch its model list, or when
261 /// the requested `(provider, model)` pair does not exist.
262 fn set_model(&self, selection: ModelSelection) -> BoxFuture<'_, Result<(), ProviderError>>;
263
264 /// The current active permission mode ID. Returns `None` if no mode catalog is
265 /// loaded.
266 ///
267 /// Maps to ACP `SessionModeState::current_mode_id`.
268 fn current_mode(&self) -> Option<String>;
269
270 /// The list of permission modes available to this session, in assembly order. Returns
271 /// an empty list when no mode directory is mounted. Maps to ACP
272 /// `SessionModeState::available_modes`.
273 fn available_modes(&self) -> Vec<ModeDescriptor>;
274
275 /// Switch the current permission mode. The change takes effect on subsequent turns;
276 /// the in-flight turn retains its original policy (same semantics as
277 /// [`Self::set_model`] — the policy is snapshotted when `run_turn` starts).
278 ///
279 /// # Errors
280 ///
281 /// Returns [`AgentError::ModeNotFound`] if `mode_id` does not match any available
282 /// mode, or if the session has no mode directory installed.
283 fn set_mode(&self, mode_id: String) -> Result<(), AgentError>;
284
285 /// The current `reasoning_effort` level (`None` = unset, falling back to the provider
286 /// default). Maps to the current value of the ACP thought-level configuration item.
287 fn current_reasoning_effort(&self) -> Option<ReasoningEffort>;
288
289 /// Sets the `reasoning_effort` level. `None` clears the override (falls back to the
290 /// provider default). Takes effect on subsequent turns. Providers that do not support
291 /// this concept ignore it when assembling requests.
292 fn set_reasoning_effort(&self, effort: Option<ReasoningEffort>);
293
294 /// Subscribe to the event stream. Three independent consumers (acp / storage /
295 /// tracing) each call this once without interfering with each other — internally uses
296 /// mpsc with fan-out so that slow consumers only experience backpressure without
297 /// dropping events.
298 fn subscribe(&self) -> EventStream;
299
300 /// A read-only snapshot of the current history, used to replay the transcript to the
301 /// client after a session load.
302 fn history_snapshot(&self) -> Vec<Message>;
303
304 /// Starts a turn.
305 ///
306 /// The returned future resolves when the turn ends:
307 /// - `Ok(StopReason)` – normal termination (including Cancelled); drives the ACP
308 /// `PromptResponse`
309 /// - `Err(TurnError)` – fatal error (auth expiry, model unavailable, etc.);
310 /// drives the ACP JSON-RPC `Error` response
311 ///
312 /// [`AgentEvent`]s produced during the turn are pushed via [`Session::subscribe`],
313 /// **not** through this future. The `TurnEnded` event is still emitted on the event
314 /// stream (for storage / tracing), but the ACP bridge uses this future's outcome.
315 ///
316 /// Only one turn may be in progress per session at a time; concurrent calls return
317 /// [`TurnError::TurnInProgress`].
318 fn run_turn(&self, prompt: Vec<ContentBlock>) -> BoxFuture<'_, Result<StopReason, TurnError>>;
319
320 /// Cancels the current turn. Idempotent: no-op if no turn is in progress.
321 fn cancel_turn(&self);
322
323 /// Writes back the client response to the ACP reverse request
324 /// `session/request_permission` to the main loop.
325 fn resolve_permission(&self, id: ToolCallId, outcome: PermissionResolution);
326
327 /// Current context usage. Read-only and cheap; backs the `/context` slash command.
328 fn context_status(&self) -> ContextStatus;
329
330 /// Synchronously compact the session history now (out-of-band `/compact` command),
331 /// reusing the same boundary selection + summarization as the turn loop's hard
332 /// watermark.
333 ///
334 /// Returns `Ok(Some(report))` when a compaction ran, `Ok(None)` when there was no safe
335 /// boundary to summarize (e.g. a single short turn — nothing to do).
336 ///
337 /// # Errors
338 ///
339 /// Returns [`TurnError::TurnInProgress`] if a turn is currently running: compaction
340 /// rewrites history and would race the in-flight turn, so the caller must `/cancel` or
341 /// wait first.
342 fn compact_now(&self) -> BoxFuture<'_, Result<Option<CompactionReport>, TurnError>>;
343}
344
345/// Event stream. Type-erased to support trait object return.
346pub type EventStream = futures::stream::BoxStream<'static, AgentEvent>;
347
348/// Stable information provided to [`SessionObserver`] after successful creation.
349#[derive(Debug, Clone)]
350pub struct SessionCreateInfo {
351 pub id: SessionId,
352 pub cwd: PathBuf,
353 pub mcp_servers: Vec<McpServer>,
354}
355
356/// Minimal session data restored from persistent storage.
357#[derive(Debug, Clone)]
358pub struct LoadedSession {
359 pub info: SessionCreateInfo,
360 pub history: Vec<Message>,
361}
362
363/// Abstraction over message history — pure storage + token accounting.
364///
365/// Compaction is **not** handled here: summarization requires calling the LLM, which the
366/// storage abstraction cannot reach.
367/// Compaction is orchestrated in the turn main loop (`session/turn/compact.rs`) — it
368/// reads [`History::snapshot`], calls the LLM for a summary, then writes back the
369/// computed new message list via [`History::replace`]. This trait is only responsible
370/// for: appending, snapshotting, wholesale replacement, and providing the main loop with
371/// an estimate of "how many tokens the current history is worth."
372///
373/// Token estimation strategy (see [`VecHistory`]): use the **actual input token** count
374/// reported by the last LLM call as a baseline, then add a **character-heuristic**
375/// increment for messages appended after that baseline; when no real baseline is
376/// available, fall back to a pure character-heuristic estimate for the entire history.
377/// The turn main loop compares this estimate against the compaction threshold.
378pub trait History: Send + Sync {
379 /// Appends a message.
380 fn append(&self, msg: Message);
381
382 /// A snapshot of the current history, to be fed into the next LLM call.
383 fn snapshot(&self) -> Vec<Message>;
384
385 /// Replace the entire message list after compression. The turn main loop calls this
386 /// to write back the new list consisting of a summary plus the retained tail. The
387 /// implementation should also reset the token estimation baseline, since the old
388 /// actual token counts no longer apply to the new list.
389 fn replace(&self, messages: Vec<Message>);
390
391 /// Prefix splice: replaces the first `drop_count` messages in the **current** list
392 /// with the single `summary` message, preserving everything after them. Returns the
393 /// actual number of messages dropped (`drop_count` is clamped to the current length).
394 ///
395 /// This is the primitive for **background compression** write-back: a background task
396 /// computes `drop_count` (= the prefix length to summarize) and `summary` from a
397 /// snapshot taken at some point, but while the summarization LLM call is in flight,
398 /// the foreground turn may still be `append`ing to the **tail**. Writing back with
399 /// `replace(entire list)` would discard any tail messages added during that time.
400 /// `splice_prefix` only touches the first `drop_count` messages of the **current**
401 /// list, preserving everything from `drop_count..` onward (including tail messages
402 /// added in the meantime), so the write-back is correct.
403 ///
404 /// **Concurrency invariant** (must be maintained): `drop_count` is computed from an
405 /// old snapshot and remains valid for the **current** list provided that during the
406 /// flight only tail appends (`append`) and in-place content replacements
407 /// (micro-compression `replace` with same-length rebuild) occur — no insertion or
408 /// deletion of middle messages. The only operation that removes middle messages is
409 /// compression itself, and compression runs **solo** (at most one in flight at a
410 /// time), so the invariant holds.
411 ///
412 /// Like [`Self::replace`], resets the token estimation baseline after write-back (the
413 /// true token count of the new prefix is unknown).
414 fn splice_prefix(&self, drop_count: usize, summary: Message) -> usize;
415
416 /// Records the actual input token count from the last LLM call
417 /// (`input + cache_read + cache_creation`). Serves as the precise baseline for
418 /// [`Self::token_estimate`]; subsequent [`Self::append`] messages are accumulated
419 /// incrementally using a character heuristic.
420 fn record_input_tokens(&self, tokens: u64);
421
422 /// Estimates the token count for the current history. `None` indicates the history is
423 /// empty or no estimate is available.
424 fn token_estimate(&self) -> Option<u64>;
425}
426
427/// Compaction report. The token counts before and after compaction are wrapped into
428/// [`AgentEvent::ContextCompressed`] by the main loop.
429#[derive(Debug, Clone, Copy)]
430pub struct CompactionReport {
431 pub tokens_before: u64,
432 pub tokens_after: u64,
433}
434
435/// Snapshot of the session's context usage, returned by [`Session::context_status`].
436/// Powers the `/context` slash command (and any client-side context gauge).
437#[derive(Debug, Clone, Copy)]
438pub struct ContextStatus {
439 /// Estimated tokens currently held in history. `None` when no estimate is available
440 /// yet (e.g. an empty session before the first request).
441 pub used_tokens: Option<u64>,
442 /// The model's context window in tokens, if the provider exposes it.
443 pub context_window: Option<u64>,
444 /// Fraction of the window in use (`used / window`), only when both are known.
445 pub ratio: Option<f64>,
446}
447
448/// Process-level agent error.
449#[non_exhaustive]
450#[derive(Debug, thiserror::Error)]
451pub enum AgentError {
452 #[error("invalid working directory: {0}")]
453 InvalidCwd(PathBuf),
454
455 /// MCP server failed to start (stdio process could not be launched / SSE connection
456 /// could not be established).
457 #[error("mcp startup failed for {server}: {source}")]
458 McpStartup {
459 server: String,
460 #[source]
461 source: BoxError,
462 },
463
464 /// The caller-provided [`SessionId`] already exists in the session table.
465 /// A monotonic + timestamp ID generator should theoretically never collide; this is a
466 /// safety net.
467 #[error("session id already in use: {0}")]
468 DuplicateSessionId(SessionId),
469
470 #[error("session observer failed: {0}")]
471 Observer(#[source] BoxError),
472
473 #[error("session not found in storage: {0}")]
474 SessionNotFound(SessionId),
475
476 /// The `mode_id` received by `set_mode` is not in the session's mode directory (or
477 /// the directory is not mounted).
478 #[error("permission mode not found: {0}")]
479 ModeNotFound(String),
480
481 #[error("session restore failed: {0}")]
482 Restore(#[source] BoxError),
483
484 /// Session capability adjudication failed during startup. See [`SessionInitError`].
485 #[error(transparent)]
486 Init(#[from] SessionInitError),
487
488 #[error(transparent)]
489 Other(#[from] BoxError),
490}
491
492/// A one-time adjudication failure during session startup.
493///
494/// See capabilities design.
495/// The session is refused when `capabilities.<name>.mode = "delegate"` but the current
496/// provider's
497/// [`crate::llm::LlmProvider::hosted_capabilities`] does not support that capability.
498#[non_exhaustive]
499#[derive(Debug)]
500pub enum SessionInitError {
501 /// The user explicitly chose `Delegate`, but the provider does not support the
502 /// corresponding hosted capability.
503 CapabilityUnsatisfied {
504 /// The name of the problematic capability (e.g. `"web_search"`).
505 capability: &'static str,
506 /// The name of the provider bound to the current session.
507 provider: String,
508 },
509}
510
511impl std::fmt::Display for SessionInitError {
512 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
513 match self {
514 Self::CapabilityUnsatisfied {
515 capability,
516 provider,
517 } => {
518 writeln!(
519 f,
520 "{capability} capability is unsatisfied: provider `{provider}` does not support hosted {capability}."
521 )?;
522 writeln!(f)?;
523 writeln!(f, "To fix this, choose one of:")?;
524 writeln!(f, " 1. Disable hosted {capability} for this provider:")?;
525 writeln!(f, " [providers.{provider}.capabilities.{capability}]")?;
526 writeln!(f, " mode = \"disabled\"")?;
527 writeln!(
528 f,
529 " 2. Change global default to `disabled` and only delegate where supported:"
530 )?;
531 writeln!(f, " [capabilities.{capability}]")?;
532 writeln!(f, " mode = \"disabled\"")?;
533 writeln!(
534 f,
535 " [providers.<hosted-supported>.capabilities.{capability}]"
536 )?;
537 write!(f, " mode = \"delegate\"")
538 }
539 }
540 }
541}
542
543impl std::error::Error for SessionInitError {}
544
545/// Reasons why a turn fails.
546///
547/// Rule of thumb: **only include errors that make the turn unable to continue**. Internal
548/// tool failures within a turn, single LLM retry failures, etc. belong in [`AgentEvent`]
549/// and the historical state machine instead.
550#[non_exhaustive]
551#[derive(Debug, thiserror::Error)]
552pub enum TurnError {
553 /// A turn is already in progress for this session.
554 #[error("turn already in progress for this session")]
555 TurnInProgress,
556
557 /// Provider error that still fails after retries are exhausted.
558 #[error(transparent)]
559 Provider(#[from] ProviderError),
560
561 /// Internal invariant broken (should be a bug).
562 #[error("internal turn error: {0}")]
563 Internal(#[source] BoxError),
564}
565
566/// Abstraction for a tool registry.
567///
568/// Both the process-level registry (owned by [`AgentCore`], for built-in tools) and the
569/// session-level registry (owned by [`Session`], for MCP tools) share the same shape; the
570/// turn main loop looks up tools through the composite registry exposed by [`Session`].
571pub trait ToolRegistry: Send + Sync {
572 /// Return the schemas of all tools in the registry, used to populate the `tools`
573 /// field of an LLM request.
574 fn schemas(&self) -> Vec<ToolSchema>;
575
576 /// Looks up a tool by name.
577 fn get(&self, name: &str) -> Option<Arc<dyn Tool>>;
578}