1use anda_core::{BoxError, FunctionDefinition, Resource, StateFeatures, Tool, ToolOutput};
2use ic_auth_types::ByteBufB64;
3use serde::{Deserialize, Serialize};
4use serde_json::json;
5use std::path::PathBuf;
6
7use super::{
8 BASE64_ENCODING, MAX_FILE_SIZE_BYTES, UTF8_ENCODING, ensure_file_size_within_limit,
9 ensure_regular_file, format_workspaces, normalize_workspaces, resolve_read_path_in_workspaces,
10 tool_workspaces,
11};
12use crate::{
13 context::BaseCtx,
14 hook::{DynToolHook, ToolHook},
15};
16
17#[derive(Debug, Clone, Default, Deserialize, Serialize)]
19pub struct ReadFileArgs {
20 pub path: String,
22 #[serde(default)]
24 pub offset: usize,
25 #[serde(default)]
27 pub limit: usize,
28}
29
30#[derive(Debug, Clone, Default, Deserialize, Serialize)]
32pub struct ReadFileOutput {
33 pub content: String,
35 pub encoding: String,
37 pub size: u64,
39 #[serde(skip_serializing_if = "Option::is_none")]
41 pub mime_type: Option<String>,
42 #[serde(skip_serializing_if = "Option::is_none")]
44 pub total_lines: Option<usize>,
45}
46
47pub type ReadFileHook = DynToolHook<ReadFileArgs, ReadFileOutput>;
48
49#[derive(Clone)]
50pub struct ReadFileTool {
51 workspaces: Vec<PathBuf>,
52 description: String,
53}
54
55impl ReadFileTool {
56 pub const NAME: &'static str = "read_file";
58
59 pub fn new(workspace: PathBuf) -> Self {
62 Self::with_workspaces([workspace])
63 }
64
65 pub fn with_workspaces<I>(workspaces: I) -> Self
68 where
69 I: IntoIterator<Item = PathBuf>,
70 {
71 let workspaces = normalize_workspaces(workspaces);
72 let description = format!(
73 "Read files from the filesystem in the workspace directories ({})",
74 format_workspaces(&workspaces)
75 );
76 Self {
77 workspaces,
78 description,
79 }
80 }
81
82 pub fn with_description(mut self, description: String) -> Self {
83 self.description = description;
84 self
85 }
86}
87
88impl Tool<BaseCtx> for ReadFileTool {
89 type Args = ReadFileArgs;
90 type Output = ReadFileOutput;
91
92 fn name(&self) -> String {
93 Self::NAME.to_string()
94 }
95
96 fn description(&self) -> String {
97 self.description.clone()
98 }
99
100 fn definition(&self) -> FunctionDefinition {
101 FunctionDefinition {
102 name: self.name(),
103 description: self.description(),
104 parameters: json!({
105 "type": "object",
106 "properties": {
107 "path": {
108 "type": "string",
109 "description": "Path to the file. Relative paths resolve from the configured workspaces in priority order; absolute paths must be inside one configured workspace."
110 },
111 "offset": {
112 "type": "integer",
113 "description": "Zero-based line offset for UTF-8 text output (default: 0)"
114 },
115 "limit": {
116 "type": "integer",
117 "description": "Maximum number of UTF-8 text lines to return (default: 0, all remaining lines)"
118 }
119 },
120 "required": ["path"]
121 }),
122 strict: Some(true),
123 }
124 }
125
126 async fn call(
127 &self,
128 ctx: BaseCtx,
129 args: Self::Args,
130 _resources: Vec<Resource>,
131 ) -> Result<ToolOutput<Self::Output>, BoxError> {
132 let hook = ctx.get_state::<ReadFileHook>();
133
134 let args = if let Some(hook) = &hook {
135 hook.before_tool_call(&ctx, args).await?
136 } else {
137 args
138 };
139
140 let workspaces = tool_workspaces(ctx.meta(), &self.workspaces);
141 let resolved = resolve_read_path_in_workspaces(&workspaces, &args.path).await?;
142 let workspace_display = resolved.workspace.display().to_string();
143 let resolved_path = resolved.path;
144
145 let meta = tokio::fs::metadata(&resolved_path)
146 .await
147 .map_err(|err| {
148 format!(
149 "Failed to read file metadata (workspace: {}, requested_path: {}, resolved_path: {}): {err}",
150 workspace_display,
151 args.path,
152 resolved_path.display()
153 )
154 })?;
155
156 ensure_regular_file(
157 &meta,
158 &resolved_path,
159 "Reading multiply-linked file is not allowed",
160 )?;
161 ensure_file_size_within_limit(&meta, &resolved_path, MAX_FILE_SIZE_BYTES)?;
162
163 let data = tokio::fs::read(&resolved_path).await.map_err(|err| {
164 format!(
165 "Failed to read file (workspace: {}, requested_path: {}, resolved_path: {}): {err}",
166 workspace_display,
167 args.path,
168 resolved_path.display()
169 )
170 })?;
171 let mut output = ReadFileOutput {
172 content: String::new(),
173 encoding: UTF8_ENCODING.to_string(),
174 size: meta.len(),
175 ..Default::default()
176 };
177 if let Some(kind) = infer2::get(&data) {
178 output.mime_type = Some(kind.mime_type().to_string());
179 }
180 match String::from_utf8(data) {
181 Ok(text) => {
182 let all_lines = text.lines();
183 output.total_lines = Some(all_lines.clone().count());
184 if args.offset == 0 && args.limit == 0 {
185 output.content = text;
186 } else if args.limit == 0 {
187 output.content = all_lines.skip(args.offset).collect::<Vec<_>>().join("\n");
188 } else {
189 output.content = all_lines
190 .skip(args.offset)
191 .take(args.limit)
192 .collect::<Vec<_>>()
193 .join("\n");
194 }
195 }
196 Err(v) => {
197 output.content = ByteBufB64(v.into_bytes()).to_base64();
198 output.encoding = BASE64_ENCODING.to_string();
199 }
200 }
201
202 if let Some(hook) = &hook {
203 return hook.after_tool_call(&ctx, ToolOutput::new(output)).await;
204 }
205
206 Ok(ToolOutput::new(output))
207 }
208}
209
210#[cfg(test)]
211mod tests {
212 use super::*;
213 use crate::engine::EngineBuilder;
214 use serde_json::json;
215 use std::path::{Path, PathBuf};
216
217 struct TestTempDir(PathBuf);
218
219 impl TestTempDir {
220 async fn new() -> Self {
221 let path = std::env::temp_dir()
222 .join(format!("anda-fs-read-test-{:016x}", rand::random::<u64>()));
223 tokio::fs::create_dir_all(&path).await.unwrap();
224 Self(path)
225 }
226
227 fn path(&self) -> &Path {
228 &self.0
229 }
230 }
231
232 impl Drop for TestTempDir {
233 fn drop(&mut self) {
234 let _ = std::fs::remove_dir_all(&self.0);
235 }
236 }
237
238 fn mock_ctx() -> BaseCtx {
239 EngineBuilder::new().mock_ctx().base
240 }
241
242 fn mock_ctx_with_workspace(workspace: &Path) -> BaseCtx {
243 let mut ctx = mock_ctx();
244 ctx.meta.extra.insert(
245 "workspace".to_string(),
246 json!(workspace.to_string_lossy().to_string()),
247 );
248 ctx
249 }
250
251 fn read_tool(workspace: &Path) -> ReadFileTool {
252 ReadFileTool::new(workspace.to_path_buf())
253 }
254
255 #[tokio::test]
256 async fn reads_from_default_workspace_when_meta_workspace_has_no_match() {
257 let temp_dir = TestTempDir::new().await;
258 let runtime_workspace = temp_dir.path().join("runtime");
259 let home_workspace = temp_dir.path().join("home");
260 tokio::fs::create_dir_all(&runtime_workspace).await.unwrap();
261 tokio::fs::create_dir_all(&home_workspace).await.unwrap();
262 tokio::fs::write(home_workspace.join("notes.txt"), "from home")
263 .await
264 .unwrap();
265
266 let result = read_tool(&home_workspace)
267 .call(
268 mock_ctx_with_workspace(&runtime_workspace),
269 ReadFileArgs {
270 path: "notes.txt".to_string(),
271 offset: 0,
272 limit: 0,
273 },
274 Vec::new(),
275 )
276 .await
277 .unwrap();
278
279 assert_eq!(result.output.content, "from home");
280 assert_eq!(result.output.encoding, "utf8");
281 }
282
283 #[tokio::test]
284 async fn applies_offset_when_limit_is_zero() {
285 let temp_dir = TestTempDir::new().await;
286 let workspace = temp_dir.path().join("workspace");
287 tokio::fs::create_dir_all(&workspace).await.unwrap();
288 tokio::fs::write(workspace.join("notes.txt"), "zero\none\ntwo\nthree\n")
289 .await
290 .unwrap();
291
292 let result = read_tool(&workspace)
293 .call(
294 mock_ctx(),
295 ReadFileArgs {
296 path: "notes.txt".to_string(),
297 offset: 1,
298 limit: 0,
299 },
300 Vec::new(),
301 )
302 .await
303 .unwrap();
304
305 assert_eq!(result.output.content, "one\ntwo\nthree");
306 assert_eq!(result.output.encoding, "utf8");
307 }
308
309 #[tokio::test]
310 async fn reads_requested_text_window() {
311 let temp_dir = TestTempDir::new().await;
312 let workspace = temp_dir.path().join("workspace");
313 tokio::fs::create_dir_all(&workspace).await.unwrap();
314 tokio::fs::write(workspace.join("notes.txt"), "zero\none\ntwo\nthree\n")
315 .await
316 .unwrap();
317
318 let result = read_tool(&workspace)
319 .call(
320 mock_ctx(),
321 ReadFileArgs {
322 path: "notes.txt".to_string(),
323 offset: 1,
324 limit: 2,
325 },
326 Vec::new(),
327 )
328 .await
329 .unwrap();
330
331 assert_eq!(result.output.content, "one\ntwo");
332 assert_eq!(result.output.size, 19);
333 }
334
335 #[tokio::test]
336 async fn returns_base64_for_non_utf8_content() {
337 let temp_dir = TestTempDir::new().await;
338 let workspace = temp_dir.path().join("workspace");
339 let binary = vec![0xff, 0x00, 0x81, 0x7f];
340 tokio::fs::create_dir_all(&workspace).await.unwrap();
341 tokio::fs::write(workspace.join("payload.bin"), &binary)
342 .await
343 .unwrap();
344
345 let result = read_tool(&workspace)
346 .call(
347 mock_ctx(),
348 ReadFileArgs {
349 path: "payload.bin".to_string(),
350 offset: 0,
351 limit: 0,
352 },
353 Vec::new(),
354 )
355 .await
356 .unwrap();
357
358 assert_eq!(result.output.content, ByteBufB64(binary).to_base64());
359 assert_eq!(result.output.encoding, "base64");
360 assert_eq!(result.output.size, 4);
361 }
362
363 #[cfg(unix)]
364 #[tokio::test]
365 async fn reads_files_from_a_symlinked_workspace_root() {
366 use std::os::unix::fs::symlink;
367
368 let temp_dir = TestTempDir::new().await;
369 let workspace = temp_dir.path().join("workspace");
370 let workspace_link = temp_dir.path().join("workspace-link");
371 tokio::fs::create_dir_all(&workspace).await.unwrap();
372 tokio::fs::write(workspace.join("notes.txt"), "hello\nworld\n")
373 .await
374 .unwrap();
375 symlink(&workspace, &workspace_link).unwrap();
376
377 let result = read_tool(&workspace_link)
378 .call(
379 mock_ctx(),
380 ReadFileArgs {
381 path: "notes.txt".to_string(),
382 offset: 0,
383 limit: 0,
384 },
385 Vec::new(),
386 )
387 .await
388 .unwrap();
389
390 assert_eq!(result.output.content, "hello\nworld\n");
391 assert_eq!(result.output.encoding, "utf8");
392 }
393
394 #[cfg(unix)]
395 #[tokio::test]
396 async fn reads_files_through_symbolic_link_target() {
397 use std::os::unix::fs::symlink;
398
399 let temp_dir = TestTempDir::new().await;
400 let workspace = temp_dir.path().join("workspace");
401 let external = temp_dir.path().join("secret.txt");
402 tokio::fs::create_dir_all(&workspace).await.unwrap();
403 tokio::fs::write(&external, "secret").await.unwrap();
404 symlink(&external, workspace.join("secret-link.txt")).unwrap();
405
406 let result = read_tool(&workspace)
407 .call(
408 mock_ctx(),
409 ReadFileArgs {
410 path: "secret-link.txt".to_string(),
411 offset: 0,
412 limit: 0,
413 },
414 Vec::new(),
415 )
416 .await
417 .unwrap();
418
419 assert_eq!(result.output.content, "secret");
420 assert_eq!(result.output.encoding, "utf8");
421 }
422
423 #[cfg(unix)]
424 #[tokio::test]
425 async fn reads_files_through_symbolic_linked_directory_target() {
426 use std::os::unix::fs::symlink;
427
428 let temp_dir = TestTempDir::new().await;
429 let workspace = temp_dir.path().join("workspace");
430 let external = temp_dir.path().join("external");
431 tokio::fs::create_dir_all(&workspace).await.unwrap();
432 tokio::fs::create_dir_all(&external).await.unwrap();
433 tokio::fs::write(external.join("secret.txt"), "secret")
434 .await
435 .unwrap();
436 symlink(&external, workspace.join("linked-dir")).unwrap();
437
438 let result = read_tool(&workspace)
439 .call(
440 mock_ctx(),
441 ReadFileArgs {
442 path: "linked-dir/secret.txt".to_string(),
443 offset: 0,
444 limit: 0,
445 },
446 Vec::new(),
447 )
448 .await
449 .unwrap();
450
451 assert_eq!(result.output.content, "secret");
452 assert_eq!(result.output.encoding, "utf8");
453 }
454
455 #[tokio::test]
456 async fn rejects_absolute_path_outside_workspace() {
457 let temp_dir = TestTempDir::new().await;
458 let workspace = temp_dir.path().join("workspace");
459 let external = temp_dir.path().join("secret.txt");
460 tokio::fs::create_dir_all(&workspace).await.unwrap();
461 tokio::fs::write(&external, "secret").await.unwrap();
462
463 let err = read_tool(&workspace)
464 .call(
465 mock_ctx(),
466 ReadFileArgs {
467 path: external.to_string_lossy().into_owned(),
468 offset: 0,
469 limit: 0,
470 },
471 Vec::new(),
472 )
473 .await
474 .unwrap_err();
475
476 assert!(
477 err.to_string()
478 .contains("Access to paths outside the workspace is not allowed")
479 );
480 }
481
482 #[tokio::test]
483 async fn rejects_parent_dir_escape_outside_workspace() {
484 let temp_dir = TestTempDir::new().await;
485 let workspace = temp_dir.path().join("workspace");
486 let external = temp_dir.path().join("secret.txt");
487 tokio::fs::create_dir_all(&workspace).await.unwrap();
488 tokio::fs::write(&external, "secret").await.unwrap();
489
490 let err = read_tool(&workspace)
491 .call(
492 mock_ctx(),
493 ReadFileArgs {
494 path: "../secret.txt".to_string(),
495 offset: 0,
496 limit: 0,
497 },
498 Vec::new(),
499 )
500 .await
501 .unwrap_err();
502
503 assert!(
504 err.to_string()
505 .contains("Access to paths outside the workspace is not allowed")
506 );
507 }
508}