Skip to main content

defect_tools/fs/
read.rs

1//! `read_file` tool: reads a UTF-8 text file.
2//!
3//! Read tool — reads a file with an optional offset/limit window.
4
5use std::path::PathBuf;
6use std::pin::Pin;
7use std::sync::Arc;
8
9use agent_client_protocol_schema::{
10    Content, ContentBlock, ImageContent, TextContent, ToolCallContent, ToolCallLocation,
11    ToolCallUpdateFields, ToolKind,
12};
13use base64::Engine;
14use defect_agent::error::BoxError;
15use defect_agent::fs::{FsBackend, FsError};
16use defect_agent::tool::{
17    SafetyClass, Tool, ToolCallDescription, ToolContext, ToolError, ToolEvent, ToolSchema,
18    ToolStream,
19};
20use defect_config::FsToolConfig;
21use futures::future::BoxFuture;
22use futures::stream;
23use serde::{Deserialize, Serialize};
24use serde_json::json;
25
26const DEFAULT_LIMIT: u32 = 2000;
27const MAX_LIMIT: u32 = 5000;
28
29pub struct ReadFileTool {
30    schema: ToolSchema,
31    default_limit: u32,
32    max_limit: u32,
33}
34
35impl ReadFileTool {
36    pub fn new() -> Self {
37        Self::from_config(&FsToolConfig {
38            read_default_limit: DEFAULT_LIMIT,
39            read_max_limit: MAX_LIMIT,
40        })
41    }
42
43    pub fn from_config(config: &FsToolConfig) -> Self {
44        let default_limit = config.read_default_limit.max(1);
45        let max_limit = config.read_max_limit.max(default_limit);
46        Self {
47            schema: ToolSchema {
48                name: "read_file".to_string(),
49                description: "Read a file from the workspace. \
50                              For UTF-8 text files: optionally read a window starting at `offset` (1-based line) for `limit` lines; \
51                              returns the content with 1-based line numbers prepended. \
52                              For image files (.png/.jpg/.jpeg/.gif/.webp): returns the image itself as visual content (offset/limit ignored). \
53                              Refuses other binary files and files larger than 10 MiB."
54                    .to_string(),
55                input_schema: json!({
56                    "type": "object",
57                    "properties": {
58                        "path": {
59                            "type": "string",
60                            "description": "Absolute path or path relative to the session cwd. \
61                                            Must resolve inside the workspace root."
62                        },
63                        "offset": {
64                            "type": "integer",
65                            "minimum": 1,
66                            "description": "Optional 1-based start line (inclusive). Defaults to 1."
67                        },
68                        "limit": {
69                            "type": "integer",
70                            "minimum": 1,
71                            "maximum": max_limit,
72                            "description": format!(
73                                "Optional max number of lines to read. Defaults to {default_limit}."
74                            )
75                        }
76                    },
77                    "required": ["path"]
78                }),
79            },
80            default_limit,
81            max_limit,
82        }
83    }
84}
85
86impl Default for ReadFileTool {
87    fn default() -> Self {
88        Self::new()
89    }
90}
91
92#[derive(Debug, Deserialize)]
93struct ReadArgs {
94    path: String,
95    #[serde(default)]
96    offset: Option<u32>,
97    #[serde(default)]
98    limit: Option<u32>,
99}
100
101#[derive(Debug, Serialize)]
102struct ReadFileOutput {
103    bytes: u64,
104    lines_returned: u32,
105    /// Start line number (offset) of this window. Used by the LLM to reassemble positions
106    /// during chunked reads.
107    start_line: u32,
108    /// `true` if the backend truncated by `limit`; exact detection requires a second
109    /// read, so this uses a heuristic (lines returned == limit implies possible
110    /// truncation).
111    truncated: bool,
112}
113
114impl Tool for ReadFileTool {
115    fn schema(&self) -> &ToolSchema {
116        &self.schema
117    }
118
119    fn safety_hint(&self, _args: &serde_json::Value) -> SafetyClass {
120        SafetyClass::ReadOnly
121    }
122
123    fn describe<'a>(
124        &'a self,
125        args: &'a serde_json::Value,
126        _ctx: ToolContext<'a>,
127    ) -> BoxFuture<'a, ToolCallDescription> {
128        Box::pin(async move {
129            let path = args.get("path").and_then(|v| v.as_str()).unwrap_or("");
130            let offset = args
131                .get("offset")
132                .and_then(|v| v.as_u64())
133                .map(|n| n as u32);
134
135            let title = if path.is_empty() {
136                "Read".to_string()
137            } else {
138                format!("Read {path}")
139            };
140            let mut fields = ToolCallUpdateFields::default();
141            fields.title = Some(title);
142            fields.kind = Some(ToolKind::Read);
143            if !path.is_empty() {
144                fields.locations = Some(vec![
145                    ToolCallLocation::new(PathBuf::from(path)).line(offset),
146                ]);
147            }
148            ToolCallDescription { fields }
149        })
150    }
151
152    fn execute(&self, args: serde_json::Value, ctx: ToolContext<'_>) -> ToolStream {
153        let cancel = ctx.cancel.clone();
154        let fs = ctx.fs.clone();
155        let default_limit = self.default_limit;
156        let max_limit = self.max_limit;
157        let fut = async move { run_read(args, cancel, fs, default_limit, max_limit).await };
158        let s: Pin<Box<dyn futures::Stream<Item = ToolEvent> + Send>> = Box::pin(stream::once(fut));
159        s
160    }
161}
162
163async fn run_read(
164    args: serde_json::Value,
165    cancel: tokio_util::sync::CancellationToken,
166    fs: Arc<dyn FsBackend>,
167    default_limit: u32,
168    max_limit: u32,
169) -> ToolEvent {
170    let parsed: ReadArgs = match serde_json::from_value(args) {
171        Ok(v) => v,
172        Err(err) => return ToolEvent::Failed(ToolError::InvalidArgs(BoxError::new(err))),
173    };
174
175    // For images, detect by extension and convert via `read_bytes` → base64 →
176    // `ContentBlock::Image`.
177    // `offset`/`limit` are meaningless for images and are ignored.
178    if let Some(mime) = image_mime(&parsed.path) {
179        return run_read_image(parsed.path, mime, cancel, fs).await;
180    }
181
182    let limit = parsed.limit.unwrap_or(default_limit).min(max_limit).max(1);
183    let offset = parsed.offset.unwrap_or(1).max(1);
184
185    let path = PathBuf::from(&parsed.path);
186    let read_fut = fs.read_text(path, Some(offset), Some(limit));
187    let text = tokio::select! {
188        biased;
189        () = cancel.cancelled() => return ToolEvent::Failed(ToolError::Canceled),
190        r = read_fut => match r {
191            Ok(t) => t,
192            Err(e) => return ToolEvent::Failed(map_fs_err(e)),
193        },
194    };
195
196    let lines_returned = text.split_inclusive('\n').count() as u32;
197    let truncated = lines_returned >= limit;
198    let bytes = text.len() as u64;
199
200    let formatted = format_with_line_numbers(&text, offset);
201
202    let raw_output = serde_json::to_value(ReadFileOutput {
203        bytes,
204        lines_returned,
205        start_line: offset,
206        truncated,
207    })
208    .unwrap_or(serde_json::Value::Null);
209
210    let mut fields = ToolCallUpdateFields::default();
211    fields.content = Some(vec![ToolCallContent::Content(Content::new(
212        ContentBlock::Text(TextContent::new(formatted)),
213    ))]);
214    fields.raw_output = Some(raw_output);
215    ToolEvent::Completed(fields)
216}
217
218#[derive(Debug, Serialize)]
219struct ReadImageOutput {
220    bytes: u64,
221    mime: String,
222}
223
224/// Reads an image: fetches raw bytes → base64 → returns as a [`ContentBlock::Image`].
225///
226/// Does not reject with `looks_binary` (that check is for text paths); size limits are
227/// handled by the backend's own threshold in [`FsBackend::read_bytes`]. The delegated
228/// backend (ACP) `read_bytes` returns `NotPermitted` by default — in that case, a
229/// [`ToolError::Execution`] is raised so the model learns from the error text that the
230/// delegated environment does not support reading images.
231async fn run_read_image(
232    path: String,
233    mime: &'static str,
234    cancel: tokio_util::sync::CancellationToken,
235    fs: Arc<dyn FsBackend>,
236) -> ToolEvent {
237    let read_fut = fs.read_bytes(PathBuf::from(&path));
238    let bytes = tokio::select! {
239        biased;
240        () = cancel.cancelled() => return ToolEvent::Failed(ToolError::Canceled),
241        r = read_fut => match r {
242            Ok(b) => b,
243            Err(e) => return ToolEvent::Failed(map_fs_err(e)),
244        },
245    };
246
247    let byte_len = bytes.len() as u64;
248    let encoded = base64::engine::general_purpose::STANDARD.encode(&bytes);
249
250    let raw_output = serde_json::to_value(ReadImageOutput {
251        bytes: byte_len,
252        mime: mime.to_string(),
253    })
254    .unwrap_or(serde_json::Value::Null);
255
256    let mut fields = ToolCallUpdateFields::default();
257    fields.content = Some(vec![ToolCallContent::Content(Content::new(
258        ContentBlock::Image(ImageContent::new(encoded, mime.to_string())),
259    ))]);
260    fields.raw_output = Some(raw_output);
261    ToolEvent::Completed(fields)
262}
263
264/// Maps a file extension (case-insensitive) to an image MIME type. Returns `None` for
265/// non-image extensions.
266fn image_mime(path: &str) -> Option<&'static str> {
267    let ext = std::path::Path::new(path)
268        .extension()
269        .and_then(|e| e.to_str())?
270        .to_ascii_lowercase();
271    match ext.as_str() {
272        "png" => Some("image/png"),
273        "jpg" | "jpeg" => Some("image/jpeg"),
274        "gif" => Some("image/gif"),
275        "webp" => Some("image/webp"),
276        _ => None,
277    }
278}
279
280fn map_fs_err(e: FsError) -> ToolError {
281    ToolError::Execution(BoxError::new(e))
282}
283
284fn format_with_line_numbers(text: &str, offset: u32) -> String {
285    let mut out = String::new();
286    let mut idx = offset;
287    for line in text.split_inclusive('\n') {
288        let display = line.strip_suffix('\n').unwrap_or(line);
289        out.push_str(&format!("{idx:>4}| {display}\n"));
290        idx = idx.saturating_add(1);
291    }
292    out
293}