Skip to main content

defect_agent/
fs.rs

1//! Filesystem backend abstraction.
2//!
3//! [`FsBackend`] is the trait boundary between the fs tool family (`read_file` /
4//! `write_file` / `edit_file`) and the underlying I/O. Two implementations:
5//! - `defect_tools::fs::LocalFsBackend`: writes directly to disk
6//! - `defect_acp::fs::AcpFsBackend`: delegates to the client via ACP `fs/read_text_file`
7//!   / `fs/write_text_file` reverse requests
8//!
9//! Assembly is handled in the `defect-acp` `session/new` handler — the backend is
10//! selected based on the client's [`FileSystemCapabilities`] negotiation result and
11//! injected into [`crate::session::AgentCore::create_session`].
12//!
13//! [`FileSystemCapabilities`]: agent_client_protocol_schema::FileSystemCapabilities
14
15use std::collections::hash_map::DefaultHasher;
16use std::hash::{Hash, Hasher};
17use std::path::{Path, PathBuf};
18
19use futures::future::BoxFuture;
20use thiserror::Error;
21
22use crate::error::BoxError;
23
24/// A fingerprint of file content. Used with [`FsBackend::fingerprint`] and
25/// [`Fingerprint::of`]:
26/// `edit_file` records the fingerprint after reading, and takes it again before writing;
27/// a mismatch indicates a concurrent write conflict.
28///
29/// Uses `(bytes, hash)` instead of a plain hash: comparing both length and hash reduces
30/// the collision probability of a single `u64` hash to negligible. `DefaultHasher` is
31/// only used for in-process one-shot comparisons, never persisted or shared across
32/// processes, so the standard library's "unspecified but stable" semantics are
33/// acceptable.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub struct Fingerprint {
36    pub bytes: u64,
37    pub hash: u64,
38}
39
40impl Fingerprint {
41    /// Compute a fingerprint directly from a text string. `edit_file` uses this after
42    /// reading `old_content` to avoid re-reading before writing.
43    pub fn of(content: &str) -> Self {
44        let mut h = DefaultHasher::new();
45        content.hash(&mut h);
46        Self {
47            bytes: content.len() as u64,
48            hash: h.finish(),
49        }
50    }
51}
52
53/// A no-op fs backend for testing only. All methods return [`FsError::NotPermitted`],
54/// allowing test scenarios that require `Arc<dyn FsBackend>` (without actually running fs
55/// tools) to skip setup.
56///
57/// In production, use `defect_tools::fs::LocalFsBackend` or
58/// `defect_acp::fs::AcpFsBackend`.
59pub struct NoopFsBackend;
60
61impl FsBackend for NoopFsBackend {
62    fn read_text(
63        &self,
64        _path: PathBuf,
65        _line: Option<u32>,
66        _limit: Option<u32>,
67    ) -> BoxFuture<'_, Result<String, FsError>> {
68        Box::pin(async {
69            Err(FsError::NotPermitted(
70                "NoopFsBackend cannot read".to_string(),
71            ))
72        })
73    }
74
75    fn write_text(&self, _path: PathBuf, _content: String) -> BoxFuture<'_, Result<(), FsError>> {
76        Box::pin(async {
77            Err(FsError::NotPermitted(
78                "NoopFsBackend cannot write".to_string(),
79            ))
80        })
81    }
82}
83
84/// Fs backend trait.
85///
86/// Two verbs cover all low-level operations of the fs tool family:
87/// - `edit_file` is composed at the tool layer (first [`read_text`] then
88///   [`write_text`](FsBackend::write_text));
89///   the backend is unaware of patch semantics
90/// - Delete / move / mkdir are not part of the fs tool family (ACP has no
91///   corresponding inverse methods);
92///   the LLM uses `bash`
93///
94/// Parameters use owned `PathBuf` / `String` to confine the future's lifetime to `&'_
95/// self`,
96/// avoiding explicit lifetime parameters; same trade-off as `LlmProvider::complete`.
97///
98/// [`read_text`]: FsBackend::read_text
99pub trait FsBackend: Send + Sync {
100    /// Reads the entire file as UTF-8 text.
101    ///
102    /// `line` / `limit` have the same semantics as ACP `ReadTextFileRequest`:
103    /// - `line = Some(n)` starts reading from line n (1-based)
104    /// - `limit = Some(k)` reads at most k lines
105    /// - Both `None` reads the full file
106    fn read_text(
107        &self,
108        path: PathBuf,
109        line: Option<u32>,
110        limit: Option<u32>,
111    ) -> BoxFuture<'_, Result<String, FsError>>;
112
113    /// Reads the raw bytes of an entire file. The `read_file` tool takes this path when
114    /// it detects a binary type such as an image, passing the bytes to the caller for
115    /// base64 encoding into a multimodal `tool_result`.
116    ///
117    /// The default implementation returns [`FsError::NotPermitted`] — the delegated
118    /// backend (`AcpFsBackend`) uses the ACP `fs/read_text_file` reverse channel, which
119    /// is text-only and cannot obtain binary data. In ACP environments, reading images is
120    /// discouraged by the system prompt (the `# Environment` section notes that the
121    /// frontend is delegated). The local backend (`LocalFsBackend`) overrides this to
122    /// read directly from disk.
123    fn read_bytes(&self, path: PathBuf) -> BoxFuture<'_, Result<Vec<u8>, FsError>> {
124        Box::pin(async move {
125            let _ = path;
126            Err(FsError::NotPermitted(
127                "this backend cannot read raw bytes (e.g. images); delegated environments only support text reads".to_string(),
128            ))
129        })
130    }
131
132    /// Write a UTF-8 text file, overwriting any existing content.
133    ///
134    /// The backend is responsible for ensuring the parent directory exists (`mkdir -p`
135    /// semantics).
136    ///
137    /// Line-ending / atomicity responsibilities are split as:
138    /// - Local backend performs line-ending normalization and atomic write via `tmp +
139    ///   rename`
140    /// - Delegated backend leaves the decision to the client
141    fn write_text(&self, path: PathBuf, content: String) -> BoxFuture<'_, Result<(), FsError>>;
142
143    /// Returns a "content fingerprint" used by `edit_file` to detect concurrent write
144    /// conflicts in the read–modify–write window.
145    ///
146    /// The default implementation reads the full content via [`FsBackend::read_text`] and
147    /// computes [`Fingerprint::of`] — this allows delegating backends (e.g.
148    /// `AcpFsBackend`) to work without additional protocol methods. Local backends may
149    /// override this method to use cheaper checks like mtime + size.
150    fn fingerprint(&self, path: PathBuf) -> BoxFuture<'_, Result<Fingerprint, FsError>> {
151        Box::pin(async move {
152            let text = self.read_text(path, None, None).await?;
153            Ok(Fingerprint::of(&text))
154        })
155    }
156}
157
158/// Fs backend error.
159#[non_exhaustive]
160#[derive(Debug, Error)]
161pub enum FsError {
162    /// File not found.
163    #[error("file not found: {0}")]
164    NotFound(PathBuf),
165
166    /// Operation not permitted: path out of bounds, binary file, client deny,
167    /// insufficient permissions, etc.
168    /// Currently uses a string placeholder; may become an enum in a later iteration.
169    #[error("operation not permitted: {0}")]
170    NotPermitted(String),
171
172    /// File exceeds the size threshold.
173    #[error("file too large: {bytes} bytes > {limit}")]
174    TooLarge { bytes: u64, limit: u64 },
175
176    /// File was externally modified during a read-modify-write cycle.
177    /// `edit_file` compares fingerprints via [`FsBackend::fingerprint`] before writing:
178    /// a mismatch raises `Conflict`, prompting the LLM to re-read and re-edit instead of
179    /// overwriting.
180    #[error("file changed since last read: {0}")]
181    Conflict(PathBuf),
182
183    /// Underlying I/O or RPC failure.
184    #[error("backend failure: {0}")]
185    Backend(#[source] BoxError),
186}
187
188/// Resolves a request path to an absolute path within the workspace, verifying it does
189/// not escape.
190///
191/// Behavior:
192/// 1. Relative paths are joined with `workspace_root`; absolute paths are used as-is.
193/// 2. Walks up from the target to find the nearest **existing** ancestor and
194///    canonicalizes it
195///    (on writes, the target itself and even multiple parent directories may not yet
196///    exist).
197/// 3. Checks that the real path of the existing ancestor starts with the real path of
198///    `workspace_root` —
199///    prevents symlink escape (e.g. `workspace/dir/link → /etc`).
200/// 4. Appends the remaining non-existent path segments as-is, then appends the file name.
201///
202/// Both `LocalFsBackend` and `AcpFsBackend` implementations of [`crate::fs::FsBackend`]
203/// call
204/// this same function — in delegated mode the agent still enforces its own boundary, not
205/// relying on the client.
206///
207/// # Errors
208/// - [`FsError::NotPermitted`]: path escapes / no parent directory / no file name
209/// - [`FsError::Backend`]: canonicalization of ancestor failed (IO error)
210pub fn resolve_workspace_path(workspace_root: &Path, requested: &Path) -> Result<PathBuf, FsError> {
211    let target = if requested.is_absolute() {
212        requested.to_path_buf()
213    } else {
214        workspace_root.join(requested)
215    };
216
217    let parent = target.parent().ok_or_else(|| {
218        FsError::NotPermitted(format!("path has no parent: {}", target.display()))
219    })?;
220
221    // Walk up from `parent` to find the nearest existing ancestor directory.
222    // `canonicalize` requires the path to exist — in a write scenario the target
223    // and even multiple parent directories may not yet exist, so we walk up to
224    // the first real directory before calling `canonicalize`.
225    let (existing_ancestor, missing_suffix) = find_existing_ancestor(parent).ok_or_else(|| {
226        FsError::NotPermitted(format!(
227            "no existing ancestor found for: {}",
228            target.display()
229        ))
230    })?;
231
232    let existing_canon =
233        std::fs::canonicalize(existing_ancestor).map_err(|e| FsError::Backend(BoxError::new(e)))?;
234
235    let root_canon =
236        std::fs::canonicalize(workspace_root).unwrap_or_else(|_| workspace_root.to_path_buf());
237
238    if !existing_canon.starts_with(&root_canon) {
239        return Err(FsError::NotPermitted(format!(
240            "path {} escapes workspace root {}",
241            target.display(),
242            root_canon.display()
243        )));
244    }
245
246    let file_name = target.file_name().ok_or_else(|| {
247        FsError::NotPermitted(format!("path has no file component: {}", target.display()))
248    })?;
249
250    // Append the missing path segments back to the existing ancestor, then join the file
251    // name.
252    Ok(existing_canon.join(missing_suffix).join(file_name))
253}
254
255/// Walk upward from `path`, returning `(nearest existing ancestor, remaining path
256/// segments)`.
257///
258/// The remaining path segments preserve their original relative structure (not
259/// canonicalized),
260/// so that reassembly retains the original semantics.
261fn find_existing_ancestor(path: &Path) -> Option<(&Path, PathBuf)> {
262    let mut missing = Vec::new();
263    let mut current = path;
264    loop {
265        if current.exists() {
266            // The path segments collected from bottom to top need to be reversed before
267            // joining.
268            missing.reverse();
269            return Some((current, missing.into_iter().collect()));
270        }
271        missing.push(current.file_name()?.to_os_string());
272        current = current.parent()?;
273    }
274}
275
276#[cfg(test)]
277mod tests;