defect_tools/fs/
local_backend.rs1use std::borrow::Cow;
14use std::io;
15use std::path::{Path, PathBuf};
16use std::sync::atomic::{AtomicU64, Ordering};
17use tokio::fs;
18
19use defect_agent::error::BoxError;
20use defect_agent::fs::{Fingerprint, FsBackend, FsError, resolve_workspace_path};
21use futures::future::BoxFuture;
22
23pub const MAX_FS_BYTES: u64 = 10 * 1024 * 1024;
25
26static TMP_NONCE: AtomicU64 = AtomicU64::new(0);
29
30pub struct LocalFsBackend {
35 workspace_root: PathBuf,
36}
37
38impl LocalFsBackend {
39 pub fn new(workspace_root: PathBuf) -> Self {
40 Self { workspace_root }
41 }
42
43 pub fn workspace_root(&self) -> &Path {
44 &self.workspace_root
45 }
46}
47
48impl FsBackend for LocalFsBackend {
49 fn read_text(
50 &self,
51 path: PathBuf,
52 line: Option<u32>,
53 limit: Option<u32>,
54 ) -> BoxFuture<'_, Result<String, FsError>> {
55 Box::pin(async move {
56 let abs = resolve_workspace_path(&self.workspace_root, &path)?;
57
58 let metadata = fs::metadata(&abs).await.map_err(|e| match e.kind() {
59 io::ErrorKind::NotFound => FsError::NotFound(abs.clone()),
60 _ => FsError::Backend(BoxError::new(e)),
61 })?;
62
63 let windowed = line.is_some() || limit.is_some();
69 if !windowed && metadata.len() > MAX_FS_BYTES {
70 return Err(FsError::TooLarge {
71 bytes: metadata.len(),
72 limit: MAX_FS_BYTES,
73 });
74 }
75
76 if windowed {
77 return read_window_streaming(&abs, line, limit).await;
78 }
79
80 let bytes = fs::read(&abs).await.map_err(|e| match e.kind() {
81 io::ErrorKind::NotFound => FsError::NotFound(abs.clone()),
82 _ => FsError::Backend(BoxError::new(e)),
83 })?;
84
85 if looks_binary(&bytes) {
86 return Err(FsError::NotPermitted(format!(
87 "binary file: {}",
88 abs.display()
89 )));
90 }
91
92 let text = String::from_utf8(bytes)
93 .map_err(|e| FsError::NotPermitted(format!("file is not valid UTF-8: {e}")))?;
94
95 Ok(slice_lines(&text, line, limit))
96 })
97 }
98
99 fn read_bytes(&self, path: PathBuf) -> BoxFuture<'_, Result<Vec<u8>, FsError>> {
100 Box::pin(async move {
101 let abs = resolve_workspace_path(&self.workspace_root, &path)?;
102
103 let metadata = fs::metadata(&abs).await.map_err(|e| match e.kind() {
104 io::ErrorKind::NotFound => FsError::NotFound(abs.clone()),
105 _ => FsError::Backend(BoxError::new(e)),
106 })?;
107 if metadata.len() > MAX_FS_BYTES {
108 return Err(FsError::TooLarge {
109 bytes: metadata.len(),
110 limit: MAX_FS_BYTES,
111 });
112 }
113
114 fs::read(&abs).await.map_err(|e| match e.kind() {
115 io::ErrorKind::NotFound => FsError::NotFound(abs.clone()),
116 _ => FsError::Backend(BoxError::new(e)),
117 })
118 })
119 }
120
121 fn fingerprint(&self, path: PathBuf) -> BoxFuture<'_, Result<Fingerprint, FsError>> {
125 Box::pin(async move {
126 let abs = resolve_workspace_path(&self.workspace_root, &path)?;
127 let metadata = fs::metadata(&abs).await.map_err(|e| match e.kind() {
128 io::ErrorKind::NotFound => FsError::NotFound(abs.clone()),
129 _ => FsError::Backend(BoxError::new(e)),
130 })?;
131
132 let size = metadata.len();
133 let mtime_nanos = metadata
134 .modified()
135 .ok()
136 .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
137 .map(|d| d.as_nanos() as u64)
138 .unwrap_or(0);
139
140 Ok(Fingerprint {
144 bytes: size,
145 hash: mtime_nanos,
146 })
147 })
148 }
149
150 fn write_text(&self, path: PathBuf, content: String) -> BoxFuture<'_, Result<(), FsError>> {
151 Box::pin(async move {
152 let abs = resolve_workspace_path(&self.workspace_root, &path)?;
153
154 if content.len() as u64 > MAX_FS_BYTES {
155 return Err(FsError::TooLarge {
156 bytes: content.len() as u64,
157 limit: MAX_FS_BYTES,
158 });
159 }
160
161 let final_content: Cow<'_, str> = match tokio::fs::read(&abs).await {
165 Ok(prev_bytes) => {
166 let prev = String::from_utf8_lossy(&prev_bytes);
167 let target = detect_line_ending(&prev);
168 normalize(&content, target)
169 }
170 Err(e) if e.kind() == io::ErrorKind::NotFound => Cow::Borrowed(content.as_str()),
171 Err(e) => return Err(FsError::Backend(BoxError::new(e))),
172 };
173
174 atomic_write(&abs, final_content.as_bytes())
175 .await
176 .map_err(|e| FsError::Backend(BoxError::new(e)))?;
177
178 Ok(())
179 })
180 }
181}
182
183async fn read_window_streaming(
194 path: &Path,
195 line: Option<u32>,
196 limit: Option<u32>,
197) -> Result<String, FsError> {
198 use tokio::io::AsyncBufReadExt;
199
200 let file = tokio::fs::File::open(path)
201 .await
202 .map_err(|e| match e.kind() {
203 io::ErrorKind::NotFound => FsError::NotFound(path.to_path_buf()),
204 _ => FsError::Backend(BoxError::new(e)),
205 })?;
206 let mut reader = tokio::io::BufReader::new(file);
207
208 let start = line.unwrap_or(1).max(1) as usize - 1;
209 let take = limit.unwrap_or(u32::MAX) as usize;
210
211 let mut buf = Vec::new();
212 let mut out = String::new();
213 let mut idx: usize = 0;
214 let mut accepted: usize = 0;
215 let mut total_window_bytes: u64 = 0;
216
217 while accepted < take {
218 buf.clear();
219 let n = reader
220 .read_until(b'\n', &mut buf)
221 .await
222 .map_err(|e| FsError::Backend(BoxError::new(e)))?;
223 if n == 0 {
224 break; }
226 if buf.contains(&0u8) {
227 return Err(FsError::NotPermitted(format!(
228 "binary file: {}",
229 path.display()
230 )));
231 }
232
233 if idx >= start {
234 total_window_bytes = total_window_bytes.saturating_add(n as u64);
239 if total_window_bytes > MAX_FS_BYTES {
240 return Err(FsError::TooLarge {
241 bytes: total_window_bytes,
242 limit: MAX_FS_BYTES,
243 });
244 }
245 let chunk = std::str::from_utf8(&buf)
246 .map_err(|e| FsError::NotPermitted(format!("file is not valid UTF-8: {e}")))?;
247 out.push_str(chunk);
248 accepted += 1;
249 }
250 idx += 1;
251 }
252
253 Ok(out)
254}
255
256#[derive(Debug, Clone, Copy, PartialEq, Eq)]
257enum LineEnding {
258 Lf,
259 Crlf,
260}
261
262fn detect_line_ending(text: &str) -> LineEnding {
263 let crlf = text.matches("\r\n").count();
264 let total_lf = text.matches('\n').count();
265 let lone_lf = total_lf.saturating_sub(crlf);
266 if crlf > lone_lf {
267 LineEnding::Crlf
268 } else {
269 LineEnding::Lf
270 }
271}
272
273fn normalize(content: &str, target: LineEnding) -> Cow<'_, str> {
274 match target {
275 LineEnding::Lf => {
276 if content.contains("\r\n") {
277 Cow::Owned(content.replace("\r\n", "\n"))
278 } else {
279 Cow::Borrowed(content)
280 }
281 }
282 LineEnding::Crlf => {
283 let lf = content.replace("\r\n", "\n");
286 Cow::Owned(lf.replace('\n', "\r\n"))
287 }
288 }
289}
290
291fn looks_binary(bytes: &[u8]) -> bool {
294 let head = bytes.get(..8 * 1024).unwrap_or(bytes);
295 if head.is_empty() {
296 return false;
297 }
298 if head.contains(&0u8) {
299 return true;
300 }
301 let non_printable = head
302 .iter()
303 .filter(|&&b| b < 0x09 || (b > 0x0d && b < 0x20))
304 .count();
305 non_printable * 100 / head.len() > 30
306}
307
308fn slice_lines(text: &str, line: Option<u32>, limit: Option<u32>) -> String {
311 if line.is_none() && limit.is_none() {
312 return text.to_string();
313 }
314 let start = line.unwrap_or(1).max(1) as usize - 1;
315 let take = limit.unwrap_or(u32::MAX) as usize;
316 let mut out = String::new();
317 for (idx, l) in text.split_inclusive('\n').enumerate() {
318 if idx < start {
319 continue;
320 }
321 if idx >= start + take {
322 break;
323 }
324 out.push_str(l);
325 }
326 out
327}
328
329async fn atomic_write(path: &Path, bytes: &[u8]) -> io::Result<()> {
333 let parent = path
334 .parent()
335 .ok_or_else(|| io::Error::other("path has no parent"))?;
336 tokio::fs::create_dir_all(parent).await?;
337 let file_name = path
338 .file_name()
339 .ok_or_else(|| io::Error::other("path has no file component"))?;
340 let nonce = TMP_NONCE.fetch_add(1, Ordering::Relaxed);
341 let pid = std::process::id();
342 let tmp_path = parent.join(format!(
343 ".{}.defect-{pid}-{nonce}.tmp",
344 file_name.to_string_lossy()
345 ));
346
347 let cleanup = TmpCleanup {
349 path: Some(tmp_path.clone()),
350 };
351 tokio::fs::write(&tmp_path, bytes).await?;
352 tokio::fs::rename(&tmp_path, path).await?;
353 cleanup.disarm();
354 Ok(())
355}
356
357struct TmpCleanup {
358 path: Option<PathBuf>,
359}
360
361impl TmpCleanup {
362 fn disarm(mut self) {
363 self.path = None;
364 }
365}
366
367impl Drop for TmpCleanup {
368 fn drop(&mut self) {
369 if let Some(p) = self.path.take() {
370 let _ = std::fs::remove_file(&p);
373 }
374 }
375}
376
377#[cfg(test)]
378mod tests;