defect_agent/
session.rs

1//! Session — state container and lifecycle interface for a single conversation.
2//!
3//! ## Abstraction layers
4//!
5//! - [`AgentCore`]: process-level "agent instance", holds the built-in tool set and
6//!   global configuration;
7//!   it is the root object assembled by `defect-cli` and injected into
8//!   `defect-acp::serve`
9//! - [`Session`]: lifecycle unit for a single conversation; holds history, per-session
10//!   tool
11//!   table (including MCP), cancel token, and event stream
12//! - [`History`]: wrapper around message history, with hooks reserved for compression,
13//!   token counting, and resume
14//!
15//! All three are **exposed as traits**; concrete implementations live in the `session/`
16//! submodule within this crate
17//! and at the assembly point in `defect-cli`; `defect-acp` interacts with them only
18//! through the traits.
19
20use std::path::PathBuf;
21use std::sync::Arc;
22
23use agent_client_protocol_schema::{ContentBlock, McpServer, SessionId, StopReason, ToolCallId};
24use futures::future::BoxFuture;
25
26use crate::error::BoxError;
27use crate::event::{AgentEvent, PermissionResolution};
28use crate::fs::FsBackend;
29use crate::llm::{
30    Message, ModelCandidate, ModelInfo, ProviderError, ProviderInfo, ReasoningEffort,
31};
32use crate::shell::ShellBackend;
33use crate::tool::{Tool, ToolSchema};
34
35mod background;
36mod capabilities;
37mod context;
38mod default;
39mod events;
40mod goal;
41mod history;
42mod permissions;
43mod prompt;
44mod tool_registry;
45mod turn;
46
47pub use background::{
48    BackgroundOutcome, BackgroundProgressConfig, BackgroundResult, BackgroundTasks, BlockKind,
49    ProgressBlock, TaskHandle, TaskSnapshot, TaskStatus, format_background_outcome,
50};
51pub use capabilities::{
52    ResolvedSessionCapabilities, SessionCapabilitiesConfig, WebSearchCapabilityConfig,
53    WebSearchCapabilityMode,
54};
55pub use context::{Frontend, RunningContext};
56pub use default::{DefaultAgentCore, DefaultAgentCoreBuilder, DefaultSession, new_session_id};
57pub use events::EventEmitter;
58pub use goal::GoalState;
59pub use history::VecHistory;
60pub use permissions::PermissionGate;
61pub use prompt::resolve_system_prompt;
62pub use tool_registry::{CompositeRegistry, StaticToolRegistry, StaticToolRegistryBuilder};
63/// Re-exported for reuse within the crate: the `spawn_agent` sub-agent tool needs a
64/// `RequestAuditTracker` instance when constructing a nested [`TurnRunner`]. This type is
65/// not public (it exposes internal diagnostic state), but `crate::tool::spawn_agent` in
66/// the same crate must be able to call `new()`.
67pub(crate) use turn::RequestAuditTracker;
68pub use turn::{
69    BasePromptConfig, CompactionSlot, PromptConfig, TurnConfig, TurnRequestLimit, TurnRunner,
70};
71
72/// Process-level agent root object.
73///
74/// `defect-cli` constructs a concrete implementation at startup (holding the LLM provider
75/// registry, built-in tool set, and configuration) and injects an `Arc<dyn AgentCore>`
76/// into `defect-acp::serve`.
77///
78/// Rationale for extracting a trait:
79/// - Allows injecting a mock in tests without spinning up a real LLM.
80/// - If an "embedded agent" (library mode called by a host application) emerges in the
81///   future, a second concrete implementation can be added without touching the ACP
82///   bridge code.
83pub trait AgentCore: Send + Sync {
84    /// Creates a new session.
85    ///
86    /// `id` is generated and passed in by the caller (the `defect-acp` `session/new`
87    /// handler) — the filesystem backend already needs a `SessionId` when constructed
88    /// outside of [`AgentCore::create_session`] (see the ACP filesystem delegation
89    /// contract). Concrete implementations treat it as the authoritative external id and
90    /// return [`AgentError::DuplicateSessionId`] on duplicates.
91    ///
92    /// `mcp_servers` is the per-session MCP server list from the `session/new` request;
93    /// the concrete implementation spawns subprocesses or establishes SSE connections
94    /// during initialization, wrapping each MCP tool as a [`Tool`] and adding it to the
95    /// session's tool table.
96    ///
97    /// `fs` is the session-level filesystem backend — `defect-acp` selects
98    /// `LocalFsBackend` or `AcpFsBackend` at assembly time based on the client's
99    /// [`FileSystemCapabilities`]. The session holds an `Arc` to it, and all filesystem
100    /// tool calls go through it.
101    ///
102    /// `shell` is the session-level shell backend — `defect-acp` selects
103    /// `LocalShellBackend` or `AcpShellBackend` at assembly time based on the client's
104    /// [`ClientCapabilities::terminal`]. The session holds an `Arc` to it, and all `bash`
105    /// tool calls go through it.
106    ///
107    /// `frontend` indicates how the agent is being accessed ([`Frontend::Acp`] carries
108    /// the fs/shell delegation state negotiated during the ACP handshake) and is used to
109    /// inject the `# Environment` section of the system prompt.
110    ///
111    /// # Errors
112    ///
113    /// MCP startup failure, missing cwd, duplicate id, etc.
114    ///
115    /// [`FileSystemCapabilities`]: agent_client_protocol_schema::FileSystemCapabilities
116    /// [`ClientCapabilities::terminal`]: agent_client_protocol_schema::ClientCapabilities
117    fn create_session(
118        &self,
119        id: SessionId,
120        cwd: PathBuf,
121        mcp_servers: Vec<McpServer>,
122        fs: Arc<dyn FsBackend>,
123        shell: Arc<dyn ShellBackend>,
124        frontend: Frontend,
125    ) -> BoxFuture<'_, Result<Arc<dyn Session>, AgentError>>;
126
127    /// Restore an existing session from persistent state.
128    ///
129    /// `frontend` works the same as in [`AgentCore::create_session`] — the restored
130    /// session also uses it to inject runtime environment information.
131    ///
132    /// # Errors
133    ///
134    /// The session does not exist, the persisted data is corrupted, the restored `cwd` is
135    /// unavailable, etc.
136    fn load_session(
137        &self,
138        id: SessionId,
139        fs: Arc<dyn FsBackend>,
140        shell: Arc<dyn ShellBackend>,
141        frontend: Frontend,
142    ) -> BoxFuture<'_, Result<Arc<dyn Session>, AgentError>>;
143
144    /// Look up an existing session by id.
145    fn session(&self, id: &SessionId) -> Option<Arc<dyn Session>>;
146}
147
148/// Abstraction for restoring a session from persistent storage.
149///
150/// Concrete implementations typically come from `defect-storage`.
151pub trait SessionLoader: Send + Sync {
152    /// Read back the state needed for recovery by session id.
153    ///
154    /// # Errors
155    ///
156    /// The session does not exist, the storage is corrupted, or replay fails.
157    fn load_session(&self, id: SessionId) -> BoxFuture<'_, Result<LoadedSession, BoxError>>;
158}
159
160/// Abstraction for building an additional tool registry for a single session.
161///
162/// A typical implementation comes from `defect-mcp`: it connects to the list of MCP
163/// servers provided by `session/new` or `session/load`, and wraps the remote tools into a
164/// [`ToolRegistry`].
165pub trait SessionToolFactory: Send + Sync {
166    /// Build a session-level tool registry for the current session.
167    ///
168    /// # Errors
169    ///
170    /// Returns an error if the external tool source fails to initialize, the remote
171    /// inventory cannot be fetched, or the configuration is unsupported.
172    fn build_registry(
173        &self,
174        cwd: PathBuf,
175        mcp_servers: Vec<McpServer>,
176    ) -> BoxFuture<'_, Result<Arc<dyn ToolRegistry>, BoxError>>;
177}
178
179/// Observer for when `AgentCore::create_session` succeeds.
180///
181/// Typical uses:
182/// - Start `defect-storage` event subscription persistence
183/// - Attach per-session sidecar consumers for tracing / metrics
184pub trait SessionObserver: Send + Sync {
185    /// Called after the session is successfully created.
186    ///
187    /// # Errors
188    ///
189    /// Returns an error if initializing the side‑channel consumer fails, preventing the
190    /// session from becoming externally visible.
191    fn on_session_created(
192        &self,
193        session: Arc<dyn Session>,
194        info: SessionCreateInfo,
195    ) -> Result<(), BoxError>;
196}
197
198/// A public description of an optional permission mode. Used by `defect-acp` to construct
199/// an ACP `SessionMode`.
200///
201/// It is a "policy-free" projection of [`crate::policy::PolicyMode`] — exposing only the
202/// id/display fields without leaking the internal decision engine.
203#[derive(Debug, Clone)]
204pub struct ModeDescriptor {
205    pub id: String,
206    pub name: String,
207    pub description: Option<String>,
208}
209
210/// Model selection key: a `(provider vendor, model id)` pair.
211///
212/// The same model id can be declared by multiple providers (multiple gateways with the
213/// same model), so selection must include both the provider vendor and the model id.
214/// `provider` refers to [`ProviderInfo::vendor`].
215#[derive(Debug, Clone, PartialEq, Eq)]
216pub struct ModelSelection {
217    pub provider: String,
218    pub model: String,
219}
220
221/// A single session.
222///
223/// All methods are trait-object-friendly (`&self` + `BoxFuture`). The `Arc<dyn Session>`
224/// is shared between `defect-acp` and the main loop.
225pub trait Session: Send + Sync {
226    fn id(&self) -> &SessionId;
227
228    /// Provider metadata used by the current session.
229    fn provider_info(&self) -> ProviderInfo;
230
231    /// The model ID used by the current session.
232    fn current_model(&self) -> String;
233
234    /// List the model candidates available from the current provider for this session.
235    ///
236    /// # Errors
237    ///
238    /// Returns [`ProviderError`] if the provider fails to fetch the model list.
239    fn list_models(&self) -> BoxFuture<'_, Result<Vec<ModelInfo>, ProviderError>>;
240
241    /// List the (provider, model) candidate pairs visible to the session. Under a
242    /// multi-provider setup, the same session may switch models across providers, so ACP
243    /// rendering needs to annotate each candidate with its provider.
244    ///
245    /// # Errors
246    ///
247    /// Same as [`Self::list_models`]: returns [`ProviderError`] if fetching the provider
248    /// list fails.
249    fn list_candidates(&self) -> BoxFuture<'_, Result<Vec<ModelCandidate>, ProviderError>>;
250
251    /// Switches the model for the current session.
252    ///
253    /// The selection key is a `(provider vendor, model)` pair — the same model id may be
254    /// advertised by multiple providers (multiple gateways for the same model), so the
255    /// provider must be explicitly specified. The currently in-progress turn retains its
256    /// original selection; subsequent turns use the new selection.
257    ///
258    /// # Errors
259    ///
260    /// Returns [`ProviderError`] when the provider fails to fetch its model list, or when
261    /// the requested `(provider, model)` pair does not exist.
262    fn set_model(&self, selection: ModelSelection) -> BoxFuture<'_, Result<(), ProviderError>>;
263
264    /// The current active permission mode ID. Returns `None` if no mode catalog is
265    /// loaded.
266    ///
267    /// Maps to ACP `SessionModeState::current_mode_id`.
268    fn current_mode(&self) -> Option<String>;
269
270    /// The list of permission modes available to this session, in assembly order. Returns
271    /// an empty list when no mode directory is mounted. Maps to ACP
272    /// `SessionModeState::available_modes`.
273    fn available_modes(&self) -> Vec<ModeDescriptor>;
274
275    /// Switch the current permission mode. The change takes effect on subsequent turns;
276    /// the in-flight turn retains its original policy (same semantics as
277    /// [`Self::set_model`] — the policy is snapshotted when `run_turn` starts).
278    ///
279    /// # Errors
280    ///
281    /// Returns [`AgentError::ModeNotFound`] if `mode_id` does not match any available
282    /// mode, or if the session has no mode directory installed.
283    fn set_mode(&self, mode_id: String) -> Result<(), AgentError>;
284
285    /// The current `reasoning_effort` level (`None` = unset, falling back to the provider
286    /// default). Maps to the current value of the ACP thought-level configuration item.
287    fn current_reasoning_effort(&self) -> Option<ReasoningEffort>;
288
289    /// Sets the `reasoning_effort` level. `None` clears the override (falls back to the
290    /// provider default). Takes effect on subsequent turns. Providers that do not support
291    /// this concept ignore it when assembling requests.
292    fn set_reasoning_effort(&self, effort: Option<ReasoningEffort>);
293
294    /// Subscribe to the event stream. Three independent consumers (acp / storage /
295    /// tracing) each call this once without interfering with each other — internally uses
296    /// mpsc with fan-out so that slow consumers only experience backpressure without
297    /// dropping events.
298    fn subscribe(&self) -> EventStream;
299
300    /// A read-only snapshot of the current history, used to replay the transcript to the
301    /// client after a session load.
302    fn history_snapshot(&self) -> Vec<Message>;
303
304    /// Starts a turn.
305    ///
306    /// The returned future resolves when the turn ends:
307    /// - `Ok(StopReason)` – normal termination (including Cancelled); drives the ACP
308    ///   `PromptResponse`
309    /// - `Err(TurnError)` – fatal error (auth expiry, model unavailable, etc.);
310    ///   drives the ACP JSON-RPC `Error` response
311    ///
312    /// [`AgentEvent`]s produced during the turn are pushed via [`Session::subscribe`],
313    /// **not** through this future. The `TurnEnded` event is still emitted on the event
314    /// stream (for storage / tracing), but the ACP bridge uses this future's outcome.
315    ///
316    /// Only one turn may be in progress per session at a time; concurrent calls return
317    /// [`TurnError::TurnInProgress`].
318    fn run_turn(&self, prompt: Vec<ContentBlock>) -> BoxFuture<'_, Result<StopReason, TurnError>>;
319
320    /// Cancels the current turn. Idempotent: no-op if no turn is in progress.
321    fn cancel_turn(&self);
322
323    /// Writes back the client response to the ACP reverse request
324    /// `session/request_permission` to the main loop.
325    fn resolve_permission(&self, id: ToolCallId, outcome: PermissionResolution);
326
327    /// Current context usage. Read-only and cheap; backs the `/context` slash command.
328    fn context_status(&self) -> ContextStatus;
329
330    /// Synchronously compact the session history now (out-of-band `/compact` command),
331    /// reusing the same boundary selection + summarization as the turn loop's hard
332    /// watermark.
333    ///
334    /// Returns `Ok(Some(report))` when a compaction ran, `Ok(None)` when there was no safe
335    /// boundary to summarize (e.g. a single short turn — nothing to do).
336    ///
337    /// # Errors
338    ///
339    /// Returns [`TurnError::TurnInProgress`] if a turn is currently running: compaction
340    /// rewrites history and would race the in-flight turn, so the caller must `/cancel` or
341    /// wait first.
342    fn compact_now(&self) -> BoxFuture<'_, Result<Option<CompactionReport>, TurnError>>;
343}
344
345/// Event stream. Type-erased to support trait object return.
346pub type EventStream = futures::stream::BoxStream<'static, AgentEvent>;
347
348/// Stable information provided to [`SessionObserver`] after successful creation.
349#[derive(Debug, Clone)]
350pub struct SessionCreateInfo {
351    pub id: SessionId,
352    pub cwd: PathBuf,
353    pub mcp_servers: Vec<McpServer>,
354}
355
356/// Minimal session data restored from persistent storage.
357#[derive(Debug, Clone)]
358pub struct LoadedSession {
359    pub info: SessionCreateInfo,
360    pub history: Vec<Message>,
361}
362
363/// Abstraction over message history — pure storage + token accounting.
364///
365/// Compaction is **not** handled here: summarization requires calling the LLM, which the
366/// storage abstraction cannot reach.
367/// Compaction is orchestrated in the turn main loop (`session/turn/compact.rs`) — it
368/// reads [`History::snapshot`], calls the LLM for a summary, then writes back the
369/// computed new message list via [`History::replace`]. This trait is only responsible
370/// for: appending, snapshotting, wholesale replacement, and providing the main loop with
371/// an estimate of "how many tokens the current history is worth."
372///
373/// Token estimation strategy (see [`VecHistory`]): use the **actual input token** count
374/// reported by the last LLM call as a baseline, then add a **character-heuristic**
375/// increment for messages appended after that baseline; when no real baseline is
376/// available, fall back to a pure character-heuristic estimate for the entire history.
377/// The turn main loop compares this estimate against the compaction threshold.
378pub trait History: Send + Sync {
379    /// Appends a message.
380    fn append(&self, msg: Message);
381
382    /// A snapshot of the current history, to be fed into the next LLM call.
383    fn snapshot(&self) -> Vec<Message>;
384
385    /// Replace the entire message list after compression. The turn main loop calls this
386    /// to write back the new list consisting of a summary plus the retained tail. The
387    /// implementation should also reset the token estimation baseline, since the old
388    /// actual token counts no longer apply to the new list.
389    fn replace(&self, messages: Vec<Message>);
390
391    /// Prefix splice: replaces the first `drop_count` messages in the **current** list
392    /// with the single `summary` message, preserving everything after them. Returns the
393    /// actual number of messages dropped (`drop_count` is clamped to the current length).
394    ///
395    /// This is the primitive for **background compression** write-back: a background task
396    /// computes `drop_count` (= the prefix length to summarize) and `summary` from a
397    /// snapshot taken at some point, but while the summarization LLM call is in flight,
398    /// the foreground turn may still be `append`ing to the **tail**. Writing back with
399    /// `replace(entire list)` would discard any tail messages added during that time.
400    /// `splice_prefix` only touches the first `drop_count` messages of the **current**
401    /// list, preserving everything from `drop_count..` onward (including tail messages
402    /// added in the meantime), so the write-back is correct.
403    ///
404    /// **Concurrency invariant** (must be maintained): `drop_count` is computed from an
405    /// old snapshot and remains valid for the **current** list provided that during the
406    /// flight only tail appends (`append`) and in-place content replacements
407    /// (micro-compression `replace` with same-length rebuild) occur — no insertion or
408    /// deletion of middle messages. The only operation that removes middle messages is
409    /// compression itself, and compression runs **solo** (at most one in flight at a
410    /// time), so the invariant holds.
411    ///
412    /// Like [`Self::replace`], resets the token estimation baseline after write-back (the
413    /// true token count of the new prefix is unknown).
414    fn splice_prefix(&self, drop_count: usize, summary: Message) -> usize;
415
416    /// Records the actual input token count from the last LLM call
417    /// (`input + cache_read + cache_creation`). Serves as the precise baseline for
418    /// [`Self::token_estimate`]; subsequent [`Self::append`] messages are accumulated
419    /// incrementally using a character heuristic.
420    fn record_input_tokens(&self, tokens: u64);
421
422    /// Estimates the token count for the current history. `None` indicates the history is
423    /// empty or no estimate is available.
424    fn token_estimate(&self) -> Option<u64>;
425}
426
427/// Compaction report. The token counts before and after compaction are wrapped into
428/// [`AgentEvent::ContextCompressed`] by the main loop.
429#[derive(Debug, Clone, Copy)]
430pub struct CompactionReport {
431    pub tokens_before: u64,
432    pub tokens_after: u64,
433}
434
435/// Snapshot of the session's context usage, returned by [`Session::context_status`].
436/// Powers the `/context` slash command (and any client-side context gauge).
437#[derive(Debug, Clone, Copy)]
438pub struct ContextStatus {
439    /// Estimated tokens currently held in history. `None` when no estimate is available
440    /// yet (e.g. an empty session before the first request).
441    pub used_tokens: Option<u64>,
442    /// The model's context window in tokens, if the provider exposes it.
443    pub context_window: Option<u64>,
444    /// Fraction of the window in use (`used / window`), only when both are known.
445    pub ratio: Option<f64>,
446}
447
448/// Process-level agent error.
449#[non_exhaustive]
450#[derive(Debug, thiserror::Error)]
451pub enum AgentError {
452    #[error("invalid working directory: {0}")]
453    InvalidCwd(PathBuf),
454
455    /// MCP server failed to start (stdio process could not be launched / SSE connection
456    /// could not be established).
457    #[error("mcp startup failed for {server}: {source}")]
458    McpStartup {
459        server: String,
460        #[source]
461        source: BoxError,
462    },
463
464    /// The caller-provided [`SessionId`] already exists in the session table.
465    /// A monotonic + timestamp ID generator should theoretically never collide; this is a
466    /// safety net.
467    #[error("session id already in use: {0}")]
468    DuplicateSessionId(SessionId),
469
470    #[error("session observer failed: {0}")]
471    Observer(#[source] BoxError),
472
473    #[error("session not found in storage: {0}")]
474    SessionNotFound(SessionId),
475
476    /// The `mode_id` received by `set_mode` is not in the session's mode directory (or
477    /// the directory is not mounted).
478    #[error("permission mode not found: {0}")]
479    ModeNotFound(String),
480
481    #[error("session restore failed: {0}")]
482    Restore(#[source] BoxError),
483
484    /// Session capability adjudication failed during startup. See [`SessionInitError`].
485    #[error(transparent)]
486    Init(#[from] SessionInitError),
487
488    #[error(transparent)]
489    Other(#[from] BoxError),
490}
491
492/// A one-time adjudication failure during session startup.
493///
494/// See capabilities design.
495/// The session is refused when `capabilities.<name>.mode = "delegate"` but the current
496/// provider's
497/// [`crate::llm::LlmProvider::hosted_capabilities`] does not support that capability.
498#[non_exhaustive]
499#[derive(Debug)]
500pub enum SessionInitError {
501    /// The user explicitly chose `Delegate`, but the provider does not support the
502    /// corresponding hosted capability.
503    CapabilityUnsatisfied {
504        /// The name of the problematic capability (e.g. `"web_search"`).
505        capability: &'static str,
506        /// The name of the provider bound to the current session.
507        provider: String,
508    },
509}
510
511impl std::fmt::Display for SessionInitError {
512    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
513        match self {
514            Self::CapabilityUnsatisfied {
515                capability,
516                provider,
517            } => {
518                writeln!(
519                    f,
520                    "{capability} capability is unsatisfied: provider `{provider}` does not support hosted {capability}."
521                )?;
522                writeln!(f)?;
523                writeln!(f, "To fix this, choose one of:")?;
524                writeln!(f, "  1. Disable hosted {capability} for this provider:")?;
525                writeln!(f, "       [providers.{provider}.capabilities.{capability}]")?;
526                writeln!(f, "       mode = \"disabled\"")?;
527                writeln!(
528                    f,
529                    "  2. Change global default to `disabled` and only delegate where supported:"
530                )?;
531                writeln!(f, "       [capabilities.{capability}]")?;
532                writeln!(f, "       mode = \"disabled\"")?;
533                writeln!(
534                    f,
535                    "       [providers.<hosted-supported>.capabilities.{capability}]"
536                )?;
537                write!(f, "       mode = \"delegate\"")
538            }
539        }
540    }
541}
542
543impl std::error::Error for SessionInitError {}
544
545/// Reasons why a turn fails.
546///
547/// Rule of thumb: **only include errors that make the turn unable to continue**. Internal
548/// tool failures within a turn, single LLM retry failures, etc. belong in [`AgentEvent`]
549/// and the historical state machine instead.
550#[non_exhaustive]
551#[derive(Debug, thiserror::Error)]
552pub enum TurnError {
553    /// A turn is already in progress for this session.
554    #[error("turn already in progress for this session")]
555    TurnInProgress,
556
557    /// Provider error that still fails after retries are exhausted.
558    #[error(transparent)]
559    Provider(#[from] ProviderError),
560
561    /// Internal invariant broken (should be a bug).
562    #[error("internal turn error: {0}")]
563    Internal(#[source] BoxError),
564}
565
566/// Abstraction for a tool registry.
567///
568/// Both the process-level registry (owned by [`AgentCore`], for built-in tools) and the
569/// session-level registry (owned by [`Session`], for MCP tools) share the same shape; the
570/// turn main loop looks up tools through the composite registry exposed by [`Session`].
571pub trait ToolRegistry: Send + Sync {
572    /// Return the schemas of all tools in the registry, used to populate the `tools`
573    /// field of an LLM request.
574    fn schemas(&self) -> Vec<ToolSchema>;
575
576    /// Looks up a tool by name.
577    fn get(&self, name: &str) -> Option<Arc<dyn Tool>>;
578}
defect_agent/session.rs

defect_agent/
session.rs