defect_agent/fs.rs
1//! Filesystem backend abstraction.
2//!
3//! [`FsBackend`] is the trait boundary between the fs tool family (`read_file` /
4//! `write_file` / `edit_file`) and the underlying I/O. Two implementations:
5//! - `defect_tools::fs::LocalFsBackend`: writes directly to disk
6//! - `defect_acp::fs::AcpFsBackend`: delegates to the client via ACP `fs/read_text_file`
7//! / `fs/write_text_file` reverse requests
8//!
9//! Assembly is handled in the `defect-acp` `session/new` handler — the backend is
10//! selected based on the client's [`FileSystemCapabilities`] negotiation result and
11//! injected into [`crate::session::AgentCore::create_session`].
12//!
13//! [`FileSystemCapabilities`]: agent_client_protocol_schema::FileSystemCapabilities
14
15use std::collections::hash_map::DefaultHasher;
16use std::hash::{Hash, Hasher};
17use std::path::{Path, PathBuf};
18
19use futures::future::BoxFuture;
20use thiserror::Error;
21
22use crate::error::BoxError;
23
24/// A fingerprint of file content. Used with [`FsBackend::fingerprint`] and
25/// [`Fingerprint::of`]:
26/// `edit_file` records the fingerprint after reading, and takes it again before writing;
27/// a mismatch indicates a concurrent write conflict.
28///
29/// Uses `(bytes, hash)` instead of a plain hash: comparing both length and hash reduces
30/// the collision probability of a single `u64` hash to negligible. `DefaultHasher` is
31/// only used for in-process one-shot comparisons, never persisted or shared across
32/// processes, so the standard library's "unspecified but stable" semantics are
33/// acceptable.
34#[derive(Debug, Clone, Copy, PartialEq, Eq)]
35pub struct Fingerprint {
36 pub bytes: u64,
37 pub hash: u64,
38}
39
40impl Fingerprint {
41 /// Compute a fingerprint directly from a text string. `edit_file` uses this after
42 /// reading `old_content` to avoid re-reading before writing.
43 pub fn of(content: &str) -> Self {
44 let mut h = DefaultHasher::new();
45 content.hash(&mut h);
46 Self {
47 bytes: content.len() as u64,
48 hash: h.finish(),
49 }
50 }
51}
52
53/// A no-op fs backend for testing only. All methods return [`FsError::NotPermitted`],
54/// allowing test scenarios that require `Arc<dyn FsBackend>` (without actually running fs
55/// tools) to skip setup.
56///
57/// In production, use `defect_tools::fs::LocalFsBackend` or
58/// `defect_acp::fs::AcpFsBackend`.
59pub struct NoopFsBackend;
60
61impl FsBackend for NoopFsBackend {
62 fn read_text(
63 &self,
64 _path: PathBuf,
65 _line: Option<u32>,
66 _limit: Option<u32>,
67 ) -> BoxFuture<'_, Result<String, FsError>> {
68 Box::pin(async {
69 Err(FsError::NotPermitted(
70 "NoopFsBackend cannot read".to_string(),
71 ))
72 })
73 }
74
75 fn write_text(&self, _path: PathBuf, _content: String) -> BoxFuture<'_, Result<(), FsError>> {
76 Box::pin(async {
77 Err(FsError::NotPermitted(
78 "NoopFsBackend cannot write".to_string(),
79 ))
80 })
81 }
82}
83
84/// Fs backend trait.
85///
86/// Two verbs cover all low-level operations of the fs tool family:
87/// - `edit_file` is composed at the tool layer (first [`read_text`] then
88/// [`write_text`](FsBackend::write_text));
89/// the backend is unaware of patch semantics
90/// - Delete / move / mkdir are not part of the fs tool family (ACP has no
91/// corresponding inverse methods);
92/// the LLM uses `bash`
93///
94/// Parameters use owned `PathBuf` / `String` to confine the future's lifetime to `&'_
95/// self`,
96/// avoiding explicit lifetime parameters; same trade-off as `LlmProvider::complete`.
97///
98/// [`read_text`]: FsBackend::read_text
99pub trait FsBackend: Send + Sync {
100 /// Reads the entire file as UTF-8 text.
101 ///
102 /// `line` / `limit` have the same semantics as ACP `ReadTextFileRequest`:
103 /// - `line = Some(n)` starts reading from line n (1-based)
104 /// - `limit = Some(k)` reads at most k lines
105 /// - Both `None` reads the full file
106 fn read_text(
107 &self,
108 path: PathBuf,
109 line: Option<u32>,
110 limit: Option<u32>,
111 ) -> BoxFuture<'_, Result<String, FsError>>;
112
113 /// Reads the raw bytes of an entire file. The `read_file` tool takes this path when
114 /// it detects a binary type such as an image, passing the bytes to the caller for
115 /// base64 encoding into a multimodal `tool_result`.
116 ///
117 /// The default implementation returns [`FsError::NotPermitted`] — the delegated
118 /// backend (`AcpFsBackend`) uses the ACP `fs/read_text_file` reverse channel, which
119 /// is text-only and cannot obtain binary data. In ACP environments, reading images is
120 /// discouraged by the system prompt (the `# Environment` section notes that the
121 /// frontend is delegated). The local backend (`LocalFsBackend`) overrides this to
122 /// read directly from disk.
123 fn read_bytes(&self, path: PathBuf) -> BoxFuture<'_, Result<Vec<u8>, FsError>> {
124 Box::pin(async move {
125 let _ = path;
126 Err(FsError::NotPermitted(
127 "this backend cannot read raw bytes (e.g. images); delegated environments only support text reads".to_string(),
128 ))
129 })
130 }
131
132 /// Write a UTF-8 text file, overwriting any existing content.
133 ///
134 /// The backend is responsible for ensuring the parent directory exists (`mkdir -p`
135 /// semantics).
136 ///
137 /// Line-ending / atomicity responsibilities are split as:
138 /// - Local backend performs line-ending normalization and atomic write via `tmp +
139 /// rename`
140 /// - Delegated backend leaves the decision to the client
141 fn write_text(&self, path: PathBuf, content: String) -> BoxFuture<'_, Result<(), FsError>>;
142
143 /// Returns a "content fingerprint" used by `edit_file` to detect concurrent write
144 /// conflicts in the read–modify–write window.
145 ///
146 /// The default implementation reads the full content via [`FsBackend::read_text`] and
147 /// computes [`Fingerprint::of`] — this allows delegating backends (e.g.
148 /// `AcpFsBackend`) to work without additional protocol methods. Local backends may
149 /// override this method to use cheaper checks like mtime + size.
150 fn fingerprint(&self, path: PathBuf) -> BoxFuture<'_, Result<Fingerprint, FsError>> {
151 Box::pin(async move {
152 let text = self.read_text(path, None, None).await?;
153 Ok(Fingerprint::of(&text))
154 })
155 }
156}
157
158/// Fs backend error.
159#[non_exhaustive]
160#[derive(Debug, Error)]
161pub enum FsError {
162 /// File not found.
163 #[error("file not found: {0}")]
164 NotFound(PathBuf),
165
166 /// Operation not permitted: path out of bounds, binary file, client deny,
167 /// insufficient permissions, etc.
168 /// Currently uses a string placeholder; may become an enum in a later iteration.
169 #[error("operation not permitted: {0}")]
170 NotPermitted(String),
171
172 /// File exceeds the size threshold.
173 #[error("file too large: {bytes} bytes > {limit}")]
174 TooLarge { bytes: u64, limit: u64 },
175
176 /// File was externally modified during a read-modify-write cycle.
177 /// `edit_file` compares fingerprints via [`FsBackend::fingerprint`] before writing:
178 /// a mismatch raises `Conflict`, prompting the LLM to re-read and re-edit instead of
179 /// overwriting.
180 #[error("file changed since last read: {0}")]
181 Conflict(PathBuf),
182
183 /// Underlying I/O or RPC failure.
184 #[error("backend failure: {0}")]
185 Backend(#[source] BoxError),
186}
187
188/// Resolves a request path to an absolute path within the workspace, verifying it does
189/// not escape.
190///
191/// Behavior:
192/// 1. Relative paths are joined with `workspace_root`; absolute paths are used as-is.
193/// 2. Walks up from the target to find the nearest **existing** ancestor and
194/// canonicalizes it
195/// (on writes, the target itself and even multiple parent directories may not yet
196/// exist).
197/// 3. Checks that the real path of the existing ancestor starts with the real path of
198/// `workspace_root` —
199/// prevents symlink escape (e.g. `workspace/dir/link → /etc`).
200/// 4. Appends the remaining non-existent path segments as-is, then appends the file name.
201///
202/// Both `LocalFsBackend` and `AcpFsBackend` implementations of [`crate::fs::FsBackend`]
203/// call
204/// this same function — in delegated mode the agent still enforces its own boundary, not
205/// relying on the client.
206///
207/// # Errors
208/// - [`FsError::NotPermitted`]: path escapes / no parent directory / no file name
209/// - [`FsError::Backend`]: canonicalization of ancestor failed (IO error)
210pub fn resolve_workspace_path(workspace_root: &Path, requested: &Path) -> Result<PathBuf, FsError> {
211 let target = if requested.is_absolute() {
212 requested.to_path_buf()
213 } else {
214 workspace_root.join(requested)
215 };
216
217 let parent = target.parent().ok_or_else(|| {
218 FsError::NotPermitted(format!("path has no parent: {}", target.display()))
219 })?;
220
221 // Walk up from `parent` to find the nearest existing ancestor directory.
222 // `canonicalize` requires the path to exist — in a write scenario the target
223 // and even multiple parent directories may not yet exist, so we walk up to
224 // the first real directory before calling `canonicalize`.
225 let (existing_ancestor, missing_suffix) = find_existing_ancestor(parent).ok_or_else(|| {
226 FsError::NotPermitted(format!(
227 "no existing ancestor found for: {}",
228 target.display()
229 ))
230 })?;
231
232 let existing_canon =
233 std::fs::canonicalize(existing_ancestor).map_err(|e| FsError::Backend(BoxError::new(e)))?;
234
235 let root_canon =
236 std::fs::canonicalize(workspace_root).unwrap_or_else(|_| workspace_root.to_path_buf());
237
238 if !existing_canon.starts_with(&root_canon) {
239 return Err(FsError::NotPermitted(format!(
240 "path {} escapes workspace root {}",
241 target.display(),
242 root_canon.display()
243 )));
244 }
245
246 let file_name = target.file_name().ok_or_else(|| {
247 FsError::NotPermitted(format!("path has no file component: {}", target.display()))
248 })?;
249
250 // Append the missing path segments back to the existing ancestor, then join the file
251 // name.
252 Ok(existing_canon.join(missing_suffix).join(file_name))
253}
254
255/// Walk upward from `path`, returning `(nearest existing ancestor, remaining path
256/// segments)`.
257///
258/// The remaining path segments preserve their original relative structure (not
259/// canonicalized),
260/// so that reassembly retains the original semantics.
261fn find_existing_ancestor(path: &Path) -> Option<(&Path, PathBuf)> {
262 let mut missing = Vec::new();
263 let mut current = path;
264 loop {
265 if current.exists() {
266 // The path segments collected from bottom to top need to be reversed before
267 // joining.
268 missing.reverse();
269 return Some((current, missing.into_iter().collect()));
270 }
271 missing.push(current.file_name()?.to_os_string());
272 current = current.parent()?;
273 }
274}
275
276#[cfg(test)]
277mod tests;