Skip to main content

defect_agent/
session.rs

1//! Session — state container and lifecycle interface for a single conversation.
2//!
3//! ## Abstraction layers
4//!
5//! - [`AgentCore`]: process-level "agent instance", holds the built-in tool set and
6//!   global configuration;
7//!   it is the root object assembled by `defect-cli` and injected into
8//!   `defect-acp::serve`
9//! - [`Session`]: lifecycle unit for a single conversation; holds history, per-session
10//!   tool
11//!   table (including MCP), cancel token, and event stream
12//! - [`History`]: wrapper around message history, with hooks reserved for compression,
13//!   token counting, and resume
14//!
15//! All three are **exposed as traits**; concrete implementations live in the `session/`
16//! submodule within this crate
17//! and at the assembly point in `defect-cli`; `defect-acp` interacts with them only
18//! through the traits.
19
20use std::path::PathBuf;
21use std::sync::Arc;
22
23use agent_client_protocol_schema::{ContentBlock, McpServer, SessionId, StopReason, ToolCallId};
24use futures::future::BoxFuture;
25
26use crate::error::BoxError;
27use crate::event::{AgentEvent, PermissionResolution};
28use crate::fs::FsBackend;
29use crate::llm::{
30    Message, ModelCandidate, ModelInfo, ProviderError, ProviderInfo, ReasoningEffort,
31};
32use crate::shell::ShellBackend;
33use crate::tool::{Tool, ToolSchema};
34
35mod background;
36mod capabilities;
37mod context;
38mod default;
39mod events;
40mod goal;
41mod history;
42mod permissions;
43mod prompt;
44mod tool_registry;
45mod turn;
46
47pub use background::{
48    BackgroundOutcome, BackgroundProgressConfig, BackgroundResult, BackgroundTasks, BlockKind,
49    ProgressBlock, TaskHandle, TaskSnapshot, TaskStatus, format_background_outcome,
50};
51pub use capabilities::{
52    ResolvedSessionCapabilities, SessionCapabilitiesConfig, WebSearchCapabilityConfig,
53    WebSearchCapabilityMode,
54};
55pub use context::{Frontend, RunningContext};
56pub use default::{DefaultAgentCore, DefaultAgentCoreBuilder, DefaultSession, new_session_id};
57pub use events::EventEmitter;
58pub use goal::GoalState;
59pub use history::VecHistory;
60pub use permissions::PermissionGate;
61pub use prompt::resolve_system_prompt;
62pub use tool_registry::{CompositeRegistry, StaticToolRegistry, StaticToolRegistryBuilder};
63/// Re-exported for reuse within the crate: the `spawn_agent` sub-agent tool needs a
64/// `RequestAuditTracker` instance when constructing a nested [`TurnRunner`]. This type is
65/// not public (it exposes internal diagnostic state), but `crate::tool::spawn_agent` in
66/// the same crate must be able to call `new()`.
67pub(crate) use turn::RequestAuditTracker;
68pub use turn::{
69    BasePromptConfig, CompactionSlot, PromptConfig, TurnConfig, TurnRequestLimit, TurnRunner,
70};
71
72/// Process-level agent root object.
73///
74/// `defect-cli` constructs a concrete implementation at startup (holding the LLM provider
75/// registry, built-in tool set, and configuration) and injects an `Arc<dyn AgentCore>`
76/// into `defect-acp::serve`.
77///
78/// Rationale for extracting a trait:
79/// - Allows injecting a mock in tests without spinning up a real LLM.
80/// - If an "embedded agent" (library mode called by a host application) emerges in the
81///   future, a second concrete implementation can be added without touching the ACP
82///   bridge code.
83pub trait AgentCore: Send + Sync {
84    /// Creates a new session.
85    ///
86    /// `id` is generated and passed in by the caller (the `defect-acp` `session/new`
87    /// handler) — the filesystem backend already needs a `SessionId` when constructed
88    /// outside of [`AgentCore::create_session`] (see the ACP filesystem delegation
89    /// contract). Concrete implementations treat it as the authoritative external id and
90    /// return [`AgentError::DuplicateSessionId`] on duplicates.
91    ///
92    /// `mcp_servers` is the per-session MCP server list from the `session/new` request;
93    /// the concrete implementation spawns subprocesses or establishes SSE connections
94    /// during initialization, wrapping each MCP tool as a [`Tool`] and adding it to the
95    /// session's tool table.
96    ///
97    /// `fs` is the session-level filesystem backend — `defect-acp` selects
98    /// `LocalFsBackend` or `AcpFsBackend` at assembly time based on the client's
99    /// [`FileSystemCapabilities`]. The session holds an `Arc` to it, and all filesystem
100    /// tool calls go through it.
101    ///
102    /// `shell` is the session-level shell backend — `defect-acp` selects
103    /// `LocalShellBackend` or `AcpShellBackend` at assembly time based on the client's
104    /// [`ClientCapabilities::terminal`]. The session holds an `Arc` to it, and all `bash`
105    /// tool calls go through it.
106    ///
107    /// `frontend` indicates how the agent is being accessed ([`Frontend::Acp`] carries
108    /// the fs/shell delegation state negotiated during the ACP handshake) and is used to
109    /// inject the `# Environment` section of the system prompt.
110    ///
111    /// # Errors
112    ///
113    /// MCP startup failure, missing cwd, duplicate id, etc.
114    ///
115    /// [`FileSystemCapabilities`]: agent_client_protocol_schema::FileSystemCapabilities
116    /// [`ClientCapabilities::terminal`]: agent_client_protocol_schema::ClientCapabilities
117    fn create_session(
118        &self,
119        id: SessionId,
120        cwd: PathBuf,
121        mcp_servers: Vec<McpServer>,
122        fs: Arc<dyn FsBackend>,
123        shell: Arc<dyn ShellBackend>,
124        frontend: Frontend,
125    ) -> BoxFuture<'_, Result<Arc<dyn Session>, AgentError>>;
126
127    /// Restore an existing session from persistent state.
128    ///
129    /// `frontend` works the same as in [`AgentCore::create_session`] — the restored
130    /// session also uses it to inject runtime environment information.
131    ///
132    /// # Errors
133    ///
134    /// The session does not exist, the persisted data is corrupted, the restored `cwd` is
135    /// unavailable, etc.
136    fn load_session(
137        &self,
138        id: SessionId,
139        fs: Arc<dyn FsBackend>,
140        shell: Arc<dyn ShellBackend>,
141        frontend: Frontend,
142    ) -> BoxFuture<'_, Result<Arc<dyn Session>, AgentError>>;
143
144    /// Look up an existing session by id.
145    fn session(&self, id: &SessionId) -> Option<Arc<dyn Session>>;
146}
147
148/// Abstraction for restoring a session from persistent storage.
149///
150/// Concrete implementations typically come from `defect-storage`.
151pub trait SessionLoader: Send + Sync {
152    /// Read back the state needed for recovery by session id.
153    ///
154    /// # Errors
155    ///
156    /// The session does not exist, the storage is corrupted, or replay fails.
157    fn load_session(&self, id: SessionId) -> BoxFuture<'_, Result<LoadedSession, BoxError>>;
158}
159
160/// Abstraction for building an additional tool registry for a single session.
161///
162/// A typical implementation comes from `defect-mcp`: it connects to the list of MCP
163/// servers provided by `session/new` or `session/load`, and wraps the remote tools into a
164/// [`ToolRegistry`].
165pub trait SessionToolFactory: Send + Sync {
166    /// Build a session-level tool registry for the current session.
167    ///
168    /// # Errors
169    ///
170    /// Returns an error if the external tool source fails to initialize, the remote
171    /// inventory cannot be fetched, or the configuration is unsupported.
172    fn build_registry(
173        &self,
174        cwd: PathBuf,
175        mcp_servers: Vec<McpServer>,
176    ) -> BoxFuture<'_, Result<Arc<dyn ToolRegistry>, BoxError>>;
177}
178
179/// Observer for when `AgentCore::create_session` succeeds.
180///
181/// Typical uses:
182/// - Start `defect-storage` event subscription persistence
183/// - Attach per-session sidecar consumers for tracing / metrics
184pub trait SessionObserver: Send + Sync {
185    /// Called after the session is successfully created.
186    ///
187    /// # Errors
188    ///
189    /// Returns an error if initializing the side‑channel consumer fails, preventing the
190    /// session from becoming externally visible.
191    fn on_session_created(
192        &self,
193        session: Arc<dyn Session>,
194        info: SessionCreateInfo,
195    ) -> Result<(), BoxError>;
196}
197
198/// A public description of an optional permission mode. Used by `defect-acp` to construct
199/// an ACP `SessionMode`.
200///
201/// It is a "policy-free" projection of [`crate::policy::PolicyMode`] — exposing only the
202/// id/display fields without leaking the internal decision engine.
203#[derive(Debug, Clone)]
204pub struct ModeDescriptor {
205    pub id: String,
206    pub name: String,
207    pub description: Option<String>,
208}
209
210/// Model selection key: a `(provider vendor, model id)` pair.
211///
212/// The same model id can be declared by multiple providers (multiple gateways with the
213/// same model), so selection must include both the provider vendor and the model id.
214/// `provider` refers to [`ProviderInfo::vendor`].
215#[derive(Debug, Clone, PartialEq, Eq)]
216pub struct ModelSelection {
217    pub provider: String,
218    pub model: String,
219}
220
221/// A single session.
222///
223/// All methods are trait-object-friendly (`&self` + `BoxFuture`). The `Arc<dyn Session>`
224/// is shared between `defect-acp` and the main loop.
225pub trait Session: Send + Sync {
226    fn id(&self) -> &SessionId;
227
228    /// Provider metadata used by the current session.
229    fn provider_info(&self) -> ProviderInfo;
230
231    /// The model ID used by the current session.
232    fn current_model(&self) -> String;
233
234    /// List the model candidates available from the current provider for this session.
235    ///
236    /// # Errors
237    ///
238    /// Returns [`ProviderError`] if the provider fails to fetch the model list.
239    fn list_models(&self) -> BoxFuture<'_, Result<Vec<ModelInfo>, ProviderError>>;
240
241    /// List the (provider, model) candidate pairs visible to the session. Under a
242    /// multi-provider setup, the same session may switch models across providers, so ACP
243    /// rendering needs to annotate each candidate with its provider.
244    ///
245    /// # Errors
246    ///
247    /// Same as [`Self::list_models`]: returns [`ProviderError`] if fetching the provider
248    /// list fails.
249    fn list_candidates(&self) -> BoxFuture<'_, Result<Vec<ModelCandidate>, ProviderError>>;
250
251    /// Switches the model for the current session.
252    ///
253    /// The selection key is a `(provider vendor, model)` pair — the same model id may be
254    /// advertised by multiple providers (multiple gateways for the same model), so the
255    /// provider must be explicitly specified. The currently in-progress turn retains its
256    /// original selection; subsequent turns use the new selection.
257    ///
258    /// # Errors
259    ///
260    /// Returns [`ProviderError`] when the provider fails to fetch its model list, or when
261    /// the requested `(provider, model)` pair does not exist.
262    fn set_model(&self, selection: ModelSelection) -> BoxFuture<'_, Result<(), ProviderError>>;
263
264    /// The current active permission mode ID. Returns `None` if no mode catalog is
265    /// loaded.
266    ///
267    /// Maps to ACP `SessionModeState::current_mode_id`.
268    fn current_mode(&self) -> Option<String>;
269
270    /// The list of permission modes available to this session, in assembly order. Returns
271    /// an empty list when no mode directory is mounted. Maps to ACP
272    /// `SessionModeState::available_modes`.
273    fn available_modes(&self) -> Vec<ModeDescriptor>;
274
275    /// Switch the current permission mode. The change takes effect on subsequent turns;
276    /// the in-flight turn retains its original policy (same semantics as
277    /// [`Self::set_model`] — the policy is snapshotted when `run_turn` starts).
278    ///
279    /// # Errors
280    ///
281    /// Returns [`AgentError::ModeNotFound`] if `mode_id` does not match any available
282    /// mode, or if the session has no mode directory installed.
283    fn set_mode(&self, mode_id: String) -> Result<(), AgentError>;
284
285    /// The current `reasoning_effort` level (`None` = unset, falling back to the provider
286    /// default). Maps to the current value of the ACP thought-level configuration item.
287    fn current_reasoning_effort(&self) -> Option<ReasoningEffort>;
288
289    /// Sets the `reasoning_effort` level. `None` clears the override (falls back to the
290    /// provider default). Takes effect on subsequent turns. Providers that do not support
291    /// this concept ignore it when assembling requests.
292    fn set_reasoning_effort(&self, effort: Option<ReasoningEffort>);
293
294    /// Subscribe to the event stream. Three independent consumers (acp / storage /
295    /// tracing) each call this once without interfering with each other — internally uses
296    /// mpsc with fan-out so that slow consumers only experience backpressure without
297    /// dropping events.
298    fn subscribe(&self) -> EventStream;
299
300    /// A read-only snapshot of the current history, used to replay the transcript to the
301    /// client after a session load.
302    fn history_snapshot(&self) -> Vec<Message>;
303
304    /// Starts a turn.
305    ///
306    /// The returned future resolves when the turn ends:
307    /// - `Ok(StopReason)` – normal termination (including Cancelled); drives the ACP
308    ///   `PromptResponse`
309    /// - `Err(TurnError)` – fatal error (auth expiry, model unavailable, etc.);
310    ///   drives the ACP JSON-RPC `Error` response
311    ///
312    /// [`AgentEvent`]s produced during the turn are pushed via [`Session::subscribe`],
313    /// **not** through this future. The `TurnEnded` event is still emitted on the event
314    /// stream (for storage / tracing), but the ACP bridge uses this future's outcome.
315    ///
316    /// Only one turn may be in progress per session at a time; concurrent calls return
317    /// [`TurnError::TurnInProgress`].
318    fn run_turn(&self, prompt: Vec<ContentBlock>) -> BoxFuture<'_, Result<StopReason, TurnError>>;
319
320    /// Cancels the current turn. Idempotent: no-op if no turn is in progress.
321    fn cancel_turn(&self);
322
323    /// Writes back the client response to the ACP reverse request
324    /// `session/request_permission` to the main loop.
325    fn resolve_permission(&self, id: ToolCallId, outcome: PermissionResolution);
326}
327
328/// Event stream. Type-erased to support trait object return.
329pub type EventStream = futures::stream::BoxStream<'static, AgentEvent>;
330
331/// Stable information provided to [`SessionObserver`] after successful creation.
332#[derive(Debug, Clone)]
333pub struct SessionCreateInfo {
334    pub id: SessionId,
335    pub cwd: PathBuf,
336    pub mcp_servers: Vec<McpServer>,
337}
338
339/// Minimal session data restored from persistent storage.
340#[derive(Debug, Clone)]
341pub struct LoadedSession {
342    pub info: SessionCreateInfo,
343    pub history: Vec<Message>,
344}
345
346/// Abstraction over message history — pure storage + token accounting.
347///
348/// Compaction is **not** handled here: summarization requires calling the LLM, which the
349/// storage abstraction cannot reach.
350/// Compaction is orchestrated in the turn main loop (`session/turn/compact.rs`) — it
351/// reads [`History::snapshot`], calls the LLM for a summary, then writes back the
352/// computed new message list via [`History::replace`]. This trait is only responsible
353/// for: appending, snapshotting, wholesale replacement, and providing the main loop with
354/// an estimate of "how many tokens the current history is worth."
355///
356/// Token estimation strategy (see [`VecHistory`]): use the **actual input token** count
357/// reported by the last LLM call as a baseline, then add a **character-heuristic**
358/// increment for messages appended after that baseline; when no real baseline is
359/// available, fall back to a pure character-heuristic estimate for the entire history.
360/// The turn main loop compares this estimate against the compaction threshold.
361pub trait History: Send + Sync {
362    /// Appends a message.
363    fn append(&self, msg: Message);
364
365    /// A snapshot of the current history, to be fed into the next LLM call.
366    fn snapshot(&self) -> Vec<Message>;
367
368    /// Replace the entire message list after compression. The turn main loop calls this
369    /// to write back the new list consisting of a summary plus the retained tail. The
370    /// implementation should also reset the token estimation baseline, since the old
371    /// actual token counts no longer apply to the new list.
372    fn replace(&self, messages: Vec<Message>);
373
374    /// Prefix splice: replaces the first `drop_count` messages in the **current** list
375    /// with the single `summary` message, preserving everything after them. Returns the
376    /// actual number of messages dropped (`drop_count` is clamped to the current length).
377    ///
378    /// This is the primitive for **background compression** write-back: a background task
379    /// computes `drop_count` (= the prefix length to summarize) and `summary` from a
380    /// snapshot taken at some point, but while the summarization LLM call is in flight,
381    /// the foreground turn may still be `append`ing to the **tail**. Writing back with
382    /// `replace(entire list)` would discard any tail messages added during that time.
383    /// `splice_prefix` only touches the first `drop_count` messages of the **current**
384    /// list, preserving everything from `drop_count..` onward (including tail messages
385    /// added in the meantime), so the write-back is correct.
386    ///
387    /// **Concurrency invariant** (must be maintained): `drop_count` is computed from an
388    /// old snapshot and remains valid for the **current** list provided that during the
389    /// flight only tail appends (`append`) and in-place content replacements
390    /// (micro-compression `replace` with same-length rebuild) occur — no insertion or
391    /// deletion of middle messages. The only operation that removes middle messages is
392    /// compression itself, and compression runs **solo** (at most one in flight at a
393    /// time), so the invariant holds.
394    ///
395    /// Like [`Self::replace`], resets the token estimation baseline after write-back (the
396    /// true token count of the new prefix is unknown).
397    fn splice_prefix(&self, drop_count: usize, summary: Message) -> usize;
398
399    /// Records the actual input token count from the last LLM call
400    /// (`input + cache_read + cache_creation`). Serves as the precise baseline for
401    /// [`Self::token_estimate`]; subsequent [`Self::append`] messages are accumulated
402    /// incrementally using a character heuristic.
403    fn record_input_tokens(&self, tokens: u64);
404
405    /// Estimates the token count for the current history. `None` indicates the history is
406    /// empty or no estimate is available.
407    fn token_estimate(&self) -> Option<u64>;
408}
409
410/// Compaction report. The token counts before and after compaction are wrapped into
411/// [`AgentEvent::ContextCompressed`] by the main loop.
412#[derive(Debug, Clone, Copy)]
413pub struct CompactionReport {
414    pub tokens_before: u64,
415    pub tokens_after: u64,
416}
417
418/// Process-level agent error.
419#[non_exhaustive]
420#[derive(Debug, thiserror::Error)]
421pub enum AgentError {
422    #[error("invalid working directory: {0}")]
423    InvalidCwd(PathBuf),
424
425    /// MCP server failed to start (stdio process could not be launched / SSE connection
426    /// could not be established).
427    #[error("mcp startup failed for {server}: {source}")]
428    McpStartup {
429        server: String,
430        #[source]
431        source: BoxError,
432    },
433
434    /// The caller-provided [`SessionId`] already exists in the session table.
435    /// A monotonic + timestamp ID generator should theoretically never collide; this is a
436    /// safety net.
437    #[error("session id already in use: {0}")]
438    DuplicateSessionId(SessionId),
439
440    #[error("session observer failed: {0}")]
441    Observer(#[source] BoxError),
442
443    #[error("session not found in storage: {0}")]
444    SessionNotFound(SessionId),
445
446    /// The `mode_id` received by `set_mode` is not in the session's mode directory (or
447    /// the directory is not mounted).
448    #[error("permission mode not found: {0}")]
449    ModeNotFound(String),
450
451    #[error("session restore failed: {0}")]
452    Restore(#[source] BoxError),
453
454    /// Session capability adjudication failed during startup. See [`SessionInitError`].
455    #[error(transparent)]
456    Init(#[from] SessionInitError),
457
458    #[error(transparent)]
459    Other(#[from] BoxError),
460}
461
462/// A one-time adjudication failure during session startup.
463///
464/// See capabilities design.
465/// The session is refused when `capabilities.<name>.mode = "delegate"` but the current
466/// provider's
467/// [`crate::llm::LlmProvider::hosted_capabilities`] does not support that capability.
468#[non_exhaustive]
469#[derive(Debug)]
470pub enum SessionInitError {
471    /// The user explicitly chose `Delegate`, but the provider does not support the
472    /// corresponding hosted capability.
473    CapabilityUnsatisfied {
474        /// The name of the problematic capability (e.g. `"web_search"`).
475        capability: &'static str,
476        /// The name of the provider bound to the current session.
477        provider: String,
478    },
479}
480
481impl std::fmt::Display for SessionInitError {
482    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
483        match self {
484            Self::CapabilityUnsatisfied {
485                capability,
486                provider,
487            } => {
488                writeln!(
489                    f,
490                    "{capability} capability is unsatisfied: provider `{provider}` does not support hosted {capability}."
491                )?;
492                writeln!(f)?;
493                writeln!(f, "To fix this, choose one of:")?;
494                writeln!(f, "  1. Disable hosted {capability} for this provider:")?;
495                writeln!(f, "       [providers.{provider}.capabilities.{capability}]")?;
496                writeln!(f, "       mode = \"disabled\"")?;
497                writeln!(
498                    f,
499                    "  2. Change global default to `disabled` and only delegate where supported:"
500                )?;
501                writeln!(f, "       [capabilities.{capability}]")?;
502                writeln!(f, "       mode = \"disabled\"")?;
503                writeln!(
504                    f,
505                    "       [providers.<hosted-supported>.capabilities.{capability}]"
506                )?;
507                write!(f, "       mode = \"delegate\"")
508            }
509        }
510    }
511}
512
513impl std::error::Error for SessionInitError {}
514
515/// Reasons why a turn fails.
516///
517/// Rule of thumb: **only include errors that make the turn unable to continue**. Internal
518/// tool failures within a turn, single LLM retry failures, etc. belong in [`AgentEvent`]
519/// and the historical state machine instead.
520#[non_exhaustive]
521#[derive(Debug, thiserror::Error)]
522pub enum TurnError {
523    /// A turn is already in progress for this session.
524    #[error("turn already in progress for this session")]
525    TurnInProgress,
526
527    /// Provider error that still fails after retries are exhausted.
528    #[error(transparent)]
529    Provider(#[from] ProviderError),
530
531    /// Internal invariant broken (should be a bug).
532    #[error("internal turn error: {0}")]
533    Internal(#[source] BoxError),
534}
535
536/// Abstraction for a tool registry.
537///
538/// Both the process-level registry (owned by [`AgentCore`], for built-in tools) and the
539/// session-level registry (owned by [`Session`], for MCP tools) share the same shape; the
540/// turn main loop looks up tools through the composite registry exposed by [`Session`].
541pub trait ToolRegistry: Send + Sync {
542    /// Return the schemas of all tools in the registry, used to populate the `tools`
543    /// field of an LLM request.
544    fn schemas(&self) -> Vec<ToolSchema>;
545
546    /// Looks up a tool by name.
547    fn get(&self, name: &str) -> Option<Arc<dyn Tool>>;
548}