Skip to main content

anda_engine/extension/fs/
read.rs

1use anda_core::{BoxError, FunctionDefinition, Resource, StateFeatures, Tool, ToolOutput};
2use ic_auth_types::ByteBufB64;
3use serde::{Deserialize, Serialize};
4use serde_json::json;
5use std::path::PathBuf;
6
7use super::{
8    BASE64_ENCODING, MAX_FILE_SIZE_BYTES, UTF8_ENCODING, ensure_file_size_within_limit,
9    ensure_regular_file, format_workspaces, normalize_workspaces, resolve_read_path_in_workspaces,
10    tool_workspaces,
11};
12use crate::{
13    context::BaseCtx,
14    hook::{DynToolHook, ToolHook},
15};
16
17/// Arguments for filesystem read operations.
18#[derive(Debug, Clone, Default, Deserialize, Serialize)]
19pub struct ReadFileArgs {
20    /// Relative or absolute path to a file inside the workspace.
21    pub path: String,
22    /// Zero-based line offset for UTF-8 text output.
23    #[serde(default)]
24    pub offset: usize,
25    /// Maximum number of UTF-8 lines to return. `0` means all remaining lines.
26    #[serde(default)]
27    pub limit: usize,
28}
29
30/// Normalized result returned by a filesystem read operation.
31#[derive(Debug, Clone, Default, Deserialize, Serialize)]
32pub struct ReadFileOutput {
33    /// File content as UTF-8 text or base64-encoded bytes for non-UTF-8 files.
34    pub content: String,
35    /// The encoding of the file content.
36    pub encoding: String,
37    /// The size of the file in bytes.
38    pub size: u64,
39    /// The MIME type of the file content.
40    #[serde(skip_serializing_if = "Option::is_none")]
41    pub mime_type: Option<String>,
42    /// The number of lines in the file content, if the content is UTF-8 text.
43    #[serde(skip_serializing_if = "Option::is_none")]
44    pub total_lines: Option<usize>,
45}
46
47pub type ReadFileHook = DynToolHook<ReadFileArgs, ReadFileOutput>;
48
49#[derive(Clone)]
50pub struct ReadFileTool {
51    workspaces: Vec<PathBuf>,
52    description: String,
53}
54
55impl ReadFileTool {
56    /// Tool name used for registration and function definition.
57    pub const NAME: &'static str = "read_file";
58
59    /// Create a new `ReadFileTool` with the default workspace directory.
60    /// You can add workspace directories for each call by including `workspace` or `workspaces` in the tool call's context meta extra.
61    pub fn new(workspace: PathBuf) -> Self {
62        Self::with_workspaces([workspace])
63    }
64
65    /// Create a new `ReadFileTool` with the default workspace directories.
66    /// Context meta workspaces take precedence over these defaults at call time.
67    pub fn with_workspaces<I>(workspaces: I) -> Self
68    where
69        I: IntoIterator<Item = PathBuf>,
70    {
71        let workspaces = normalize_workspaces(workspaces);
72        let description = format!(
73            "Read files from the filesystem in the workspace directories ({})",
74            format_workspaces(&workspaces)
75        );
76        Self {
77            workspaces,
78            description,
79        }
80    }
81
82    pub fn with_description(mut self, description: String) -> Self {
83        self.description = description;
84        self
85    }
86}
87
88impl Tool<BaseCtx> for ReadFileTool {
89    type Args = ReadFileArgs;
90    type Output = ReadFileOutput;
91
92    fn name(&self) -> String {
93        Self::NAME.to_string()
94    }
95
96    fn description(&self) -> String {
97        self.description.clone()
98    }
99
100    fn definition(&self) -> FunctionDefinition {
101        FunctionDefinition {
102            name: self.name(),
103            description: self.description(),
104            parameters: json!({
105                "type": "object",
106                "properties": {
107                    "path": {
108                        "type": "string",
109                        "description": "Path to the file. Relative paths resolve from the configured workspaces in priority order; absolute paths must be inside one configured workspace."
110                    },
111                    "offset": {
112                        "type": "integer",
113                        "description": "Zero-based line offset for UTF-8 text output (default: 0)"
114                    },
115                    "limit": {
116                        "type": "integer",
117                        "description": "Maximum number of UTF-8 text lines to return (default: 0, all remaining lines)"
118                    }
119                },
120                "required": ["path"]
121            }),
122            strict: Some(true),
123        }
124    }
125
126    async fn call(
127        &self,
128        ctx: BaseCtx,
129        args: Self::Args,
130        _resources: Vec<Resource>,
131    ) -> Result<ToolOutput<Self::Output>, BoxError> {
132        let hook = ctx.get_state::<ReadFileHook>();
133
134        let args = if let Some(hook) = &hook {
135            hook.before_tool_call(&ctx, args).await?
136        } else {
137            args
138        };
139
140        let workspaces = tool_workspaces(ctx.meta(), &self.workspaces);
141        let resolved = resolve_read_path_in_workspaces(&workspaces, &args.path).await?;
142        let workspace_display = resolved.workspace.display().to_string();
143        let resolved_path = resolved.path;
144
145        let meta = tokio::fs::metadata(&resolved_path)
146            .await
147            .map_err(|err| {
148                format!(
149                    "Failed to read file metadata (workspace: {}, requested_path: {}, resolved_path: {}): {err}",
150                    workspace_display,
151                    args.path,
152                    resolved_path.display()
153                )
154            })?;
155
156        ensure_regular_file(
157            &meta,
158            &resolved_path,
159            "Reading multiply-linked file is not allowed",
160        )?;
161        ensure_file_size_within_limit(&meta, &resolved_path, MAX_FILE_SIZE_BYTES)?;
162
163        let data = tokio::fs::read(&resolved_path).await.map_err(|err| {
164            format!(
165                "Failed to read file (workspace: {}, requested_path: {}, resolved_path: {}): {err}",
166                workspace_display,
167                args.path,
168                resolved_path.display()
169            )
170        })?;
171        let mut output = ReadFileOutput {
172            content: String::new(),
173            encoding: UTF8_ENCODING.to_string(),
174            size: meta.len(),
175            ..Default::default()
176        };
177        if let Some(kind) = infer2::get(&data) {
178            output.mime_type = Some(kind.mime_type().to_string());
179        }
180        match String::from_utf8(data) {
181            Ok(text) => {
182                let all_lines = text.lines();
183                output.total_lines = Some(all_lines.clone().count());
184                if args.offset == 0 && args.limit == 0 {
185                    output.content = text;
186                } else if args.limit == 0 {
187                    output.content = all_lines.skip(args.offset).collect::<Vec<_>>().join("\n");
188                } else {
189                    output.content = all_lines
190                        .skip(args.offset)
191                        .take(args.limit)
192                        .collect::<Vec<_>>()
193                        .join("\n");
194                }
195            }
196            Err(v) => {
197                output.content = ByteBufB64(v.into_bytes()).to_base64();
198                output.encoding = BASE64_ENCODING.to_string();
199            }
200        }
201
202        if let Some(hook) = &hook {
203            return hook.after_tool_call(&ctx, ToolOutput::new(output)).await;
204        }
205
206        Ok(ToolOutput::new(output))
207    }
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213    use crate::engine::EngineBuilder;
214    use serde_json::json;
215    use std::path::{Path, PathBuf};
216
217    struct TestTempDir(PathBuf);
218
219    impl TestTempDir {
220        async fn new() -> Self {
221            let path = std::env::temp_dir()
222                .join(format!("anda-fs-read-test-{:016x}", rand::random::<u64>()));
223            tokio::fs::create_dir_all(&path).await.unwrap();
224            Self(path)
225        }
226
227        fn path(&self) -> &Path {
228            &self.0
229        }
230    }
231
232    impl Drop for TestTempDir {
233        fn drop(&mut self) {
234            let _ = std::fs::remove_dir_all(&self.0);
235        }
236    }
237
238    fn mock_ctx() -> BaseCtx {
239        EngineBuilder::new().mock_ctx().base
240    }
241
242    fn mock_ctx_with_workspace(workspace: &Path) -> BaseCtx {
243        let mut ctx = mock_ctx();
244        ctx.meta.extra.insert(
245            "workspace".to_string(),
246            json!(workspace.to_string_lossy().to_string()),
247        );
248        ctx
249    }
250
251    fn read_tool(workspace: &Path) -> ReadFileTool {
252        ReadFileTool::new(workspace.to_path_buf())
253    }
254
255    #[tokio::test]
256    async fn reads_from_default_workspace_when_meta_workspace_has_no_match() {
257        let temp_dir = TestTempDir::new().await;
258        let runtime_workspace = temp_dir.path().join("runtime");
259        let home_workspace = temp_dir.path().join("home");
260        tokio::fs::create_dir_all(&runtime_workspace).await.unwrap();
261        tokio::fs::create_dir_all(&home_workspace).await.unwrap();
262        tokio::fs::write(home_workspace.join("notes.txt"), "from home")
263            .await
264            .unwrap();
265
266        let result = read_tool(&home_workspace)
267            .call(
268                mock_ctx_with_workspace(&runtime_workspace),
269                ReadFileArgs {
270                    path: "notes.txt".to_string(),
271                    offset: 0,
272                    limit: 0,
273                },
274                Vec::new(),
275            )
276            .await
277            .unwrap();
278
279        assert_eq!(result.output.content, "from home");
280        assert_eq!(result.output.encoding, "utf8");
281    }
282
283    #[tokio::test]
284    async fn applies_offset_when_limit_is_zero() {
285        let temp_dir = TestTempDir::new().await;
286        let workspace = temp_dir.path().join("workspace");
287        tokio::fs::create_dir_all(&workspace).await.unwrap();
288        tokio::fs::write(workspace.join("notes.txt"), "zero\none\ntwo\nthree\n")
289            .await
290            .unwrap();
291
292        let result = read_tool(&workspace)
293            .call(
294                mock_ctx(),
295                ReadFileArgs {
296                    path: "notes.txt".to_string(),
297                    offset: 1,
298                    limit: 0,
299                },
300                Vec::new(),
301            )
302            .await
303            .unwrap();
304
305        assert_eq!(result.output.content, "one\ntwo\nthree");
306        assert_eq!(result.output.encoding, "utf8");
307    }
308
309    #[tokio::test]
310    async fn reads_requested_text_window() {
311        let temp_dir = TestTempDir::new().await;
312        let workspace = temp_dir.path().join("workspace");
313        tokio::fs::create_dir_all(&workspace).await.unwrap();
314        tokio::fs::write(workspace.join("notes.txt"), "zero\none\ntwo\nthree\n")
315            .await
316            .unwrap();
317
318        let result = read_tool(&workspace)
319            .call(
320                mock_ctx(),
321                ReadFileArgs {
322                    path: "notes.txt".to_string(),
323                    offset: 1,
324                    limit: 2,
325                },
326                Vec::new(),
327            )
328            .await
329            .unwrap();
330
331        assert_eq!(result.output.content, "one\ntwo");
332        assert_eq!(result.output.size, 19);
333    }
334
335    #[tokio::test]
336    async fn returns_base64_for_non_utf8_content() {
337        let temp_dir = TestTempDir::new().await;
338        let workspace = temp_dir.path().join("workspace");
339        let binary = vec![0xff, 0x00, 0x81, 0x7f];
340        tokio::fs::create_dir_all(&workspace).await.unwrap();
341        tokio::fs::write(workspace.join("payload.bin"), &binary)
342            .await
343            .unwrap();
344
345        let result = read_tool(&workspace)
346            .call(
347                mock_ctx(),
348                ReadFileArgs {
349                    path: "payload.bin".to_string(),
350                    offset: 0,
351                    limit: 0,
352                },
353                Vec::new(),
354            )
355            .await
356            .unwrap();
357
358        assert_eq!(result.output.content, ByteBufB64(binary).to_base64());
359        assert_eq!(result.output.encoding, "base64");
360        assert_eq!(result.output.size, 4);
361    }
362
363    #[cfg(unix)]
364    #[tokio::test]
365    async fn reads_files_from_a_symlinked_workspace_root() {
366        use std::os::unix::fs::symlink;
367
368        let temp_dir = TestTempDir::new().await;
369        let workspace = temp_dir.path().join("workspace");
370        let workspace_link = temp_dir.path().join("workspace-link");
371        tokio::fs::create_dir_all(&workspace).await.unwrap();
372        tokio::fs::write(workspace.join("notes.txt"), "hello\nworld\n")
373            .await
374            .unwrap();
375        symlink(&workspace, &workspace_link).unwrap();
376
377        let result = read_tool(&workspace_link)
378            .call(
379                mock_ctx(),
380                ReadFileArgs {
381                    path: "notes.txt".to_string(),
382                    offset: 0,
383                    limit: 0,
384                },
385                Vec::new(),
386            )
387            .await
388            .unwrap();
389
390        assert_eq!(result.output.content, "hello\nworld\n");
391        assert_eq!(result.output.encoding, "utf8");
392    }
393
394    #[cfg(unix)]
395    #[tokio::test]
396    async fn reads_files_through_symbolic_link_target() {
397        use std::os::unix::fs::symlink;
398
399        let temp_dir = TestTempDir::new().await;
400        let workspace = temp_dir.path().join("workspace");
401        let external = temp_dir.path().join("secret.txt");
402        tokio::fs::create_dir_all(&workspace).await.unwrap();
403        tokio::fs::write(&external, "secret").await.unwrap();
404        symlink(&external, workspace.join("secret-link.txt")).unwrap();
405
406        let result = read_tool(&workspace)
407            .call(
408                mock_ctx(),
409                ReadFileArgs {
410                    path: "secret-link.txt".to_string(),
411                    offset: 0,
412                    limit: 0,
413                },
414                Vec::new(),
415            )
416            .await
417            .unwrap();
418
419        assert_eq!(result.output.content, "secret");
420        assert_eq!(result.output.encoding, "utf8");
421    }
422
423    #[cfg(unix)]
424    #[tokio::test]
425    async fn reads_files_through_symbolic_linked_directory_target() {
426        use std::os::unix::fs::symlink;
427
428        let temp_dir = TestTempDir::new().await;
429        let workspace = temp_dir.path().join("workspace");
430        let external = temp_dir.path().join("external");
431        tokio::fs::create_dir_all(&workspace).await.unwrap();
432        tokio::fs::create_dir_all(&external).await.unwrap();
433        tokio::fs::write(external.join("secret.txt"), "secret")
434            .await
435            .unwrap();
436        symlink(&external, workspace.join("linked-dir")).unwrap();
437
438        let result = read_tool(&workspace)
439            .call(
440                mock_ctx(),
441                ReadFileArgs {
442                    path: "linked-dir/secret.txt".to_string(),
443                    offset: 0,
444                    limit: 0,
445                },
446                Vec::new(),
447            )
448            .await
449            .unwrap();
450
451        assert_eq!(result.output.content, "secret");
452        assert_eq!(result.output.encoding, "utf8");
453    }
454
455    #[tokio::test]
456    async fn rejects_absolute_path_outside_workspace() {
457        let temp_dir = TestTempDir::new().await;
458        let workspace = temp_dir.path().join("workspace");
459        let external = temp_dir.path().join("secret.txt");
460        tokio::fs::create_dir_all(&workspace).await.unwrap();
461        tokio::fs::write(&external, "secret").await.unwrap();
462
463        let err = read_tool(&workspace)
464            .call(
465                mock_ctx(),
466                ReadFileArgs {
467                    path: external.to_string_lossy().into_owned(),
468                    offset: 0,
469                    limit: 0,
470                },
471                Vec::new(),
472            )
473            .await
474            .unwrap_err();
475
476        assert!(
477            err.to_string()
478                .contains("Access to paths outside the workspace is not allowed")
479        );
480    }
481
482    #[tokio::test]
483    async fn rejects_parent_dir_escape_outside_workspace() {
484        let temp_dir = TestTempDir::new().await;
485        let workspace = temp_dir.path().join("workspace");
486        let external = temp_dir.path().join("secret.txt");
487        tokio::fs::create_dir_all(&workspace).await.unwrap();
488        tokio::fs::write(&external, "secret").await.unwrap();
489
490        let err = read_tool(&workspace)
491            .call(
492                mock_ctx(),
493                ReadFileArgs {
494                    path: "../secret.txt".to_string(),
495                    offset: 0,
496                    limit: 0,
497                },
498                Vec::new(),
499            )
500            .await
501            .unwrap_err();
502
503        assert!(
504            err.to_string()
505                .contains("Access to paths outside the workspace is not allowed")
506        );
507    }
508}