Skip to main content

pawan/tools/
file.rs

1//! File read/write tools with safety validation
2
3use super::Tool;
4use async_trait::async_trait;
5use serde_json::{json, Value};
6use std::path::{Path, PathBuf};
7
8/// Validate a file path for write safety.
9/// Returns Err with reason if the write should be blocked.
10/// Inspired by claw-code's file_ops.rs safety checks.
11pub fn validate_file_write(path: &Path) -> Result<(), &'static str> {
12    let path_str = path.to_string_lossy();
13    let filename = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
14
15    // Block: writes inside .git directory (corrupts repository)
16    for component in path.components() {
17        if let std::path::Component::Normal(c) = component {
18            if c == ".git" {
19                return Err("refuses to write inside .git directory");
20            }
21        }
22    }
23
24    // Block: sensitive credential/secret files
25    let blocked_files = [
26        ".env", ".env.local", ".env.production",
27        "id_rsa", "id_ed25519", "id_ecdsa",
28        "credentials.json", "service-account.json",
29        ".npmrc", ".pypirc",
30    ];
31    if blocked_files.contains(&filename) {
32        return Err("refuses to overwrite credential/secret file");
33    }
34
35    // Block: critical system paths
36    if path_str.starts_with("/etc/") || path_str.starts_with("/usr/") || path_str.starts_with("/bin/")
37        || path_str.starts_with("/sbin/") || path_str.starts_with("/boot/")
38    {
39        return Err("refuses to write to system directory");
40    }
41
42    // Warn-level (allow but log): lock files
43    let warn_files = ["Cargo.lock", "package-lock.json", "yarn.lock", "pnpm-lock.yaml", "Gemfile.lock", "poetry.lock"];
44    if warn_files.contains(&filename) {
45        tracing::warn!(path = %path_str, "Writing to lock file — usually auto-generated");
46    }
47
48    Ok(())
49}
50
51/// Normalize a path relative to the workspace root.
52///
53/// Handles the double-prefix bug where the model passes an absolute path
54/// like "/home/user/ws/home/user/ws/foo.rs" — it joined the workspace
55/// root with an absolute path instead of a relative one. We detect the
56/// workspace root appearing twice and collapse to the second occurrence.
57///
58/// # Parameters
59/// - `workspace_root`: The root directory of the workspace
60/// - `path`: The path to normalize (can be relative or absolute)
61///
62/// # Returns
63/// The normalized path as a PathBuf
64pub fn normalize_path(workspace_root: &Path, path: &str) -> PathBuf {
65    let p = PathBuf::from(path);
66    if p.is_absolute() {
67        let ws = workspace_root.to_string_lossy();
68        let ps = p.to_string_lossy();
69        // If path starts with workspace_root, check if ws appears again in the remainder
70        if ps.starts_with(&*ws) {
71            let tail = &ps[ws.len()..];
72            if let Some(idx) = tail.find(&*ws) {
73                let corrected = &tail[idx..];
74                tracing::warn!(
75                    original = %ps, corrected = %corrected,
76                    "Path normalization: double workspace prefix detected"
77                );
78                return PathBuf::from(corrected.to_string());
79            }
80        }
81        p
82    } else {
83        workspace_root.join(p)
84    }
85}
86
87/// Tool for reading file contents
88pub struct ReadFileTool {
89    workspace_root: PathBuf,
90}
91
92impl ReadFileTool {
93    pub fn new(workspace_root: PathBuf) -> Self {
94        Self { workspace_root }
95    }
96
97    fn resolve_path(&self, path: &str) -> PathBuf {
98        normalize_path(&self.workspace_root, path)
99    }
100}
101
102#[async_trait]
103impl Tool for ReadFileTool {
104    fn name(&self) -> &str {
105        "read_file"
106    }
107
108    fn description(&self) -> &str {
109        "Read the contents of a file. Returns the file content with line numbers."
110    }
111
112    fn parameters_schema(&self) -> Value {
113        json!({
114            "type": "object",
115            "properties": {
116                "path": {
117                    "type": "string",
118                    "description": "Path to the file to read (relative to workspace root or absolute)"
119                },
120                "offset": {
121                    "type": "integer",
122                    "description": "Line number to start reading from (0-based, optional)"
123                },
124                "limit": {
125                    "type": "integer",
126                    "description": "Maximum number of lines to read (optional, defaults to 2000)"
127                }
128            },
129            "required": ["path"]
130        })
131    }
132
133    fn thulp_definition(&self) -> thulp_core::ToolDefinition {
134        use thulp_core::{Parameter, ParameterType};
135        thulp_core::ToolDefinition::builder("read_file")
136            .description(self.description())
137            .parameter(Parameter::builder("path").param_type(ParameterType::String).required(true)
138                .description("Path to the file to read (relative to workspace root or absolute)").build())
139            .parameter(Parameter::builder("offset").param_type(ParameterType::Integer).required(false)
140                .description("Line number to start reading from (0-based, optional)").build())
141            .parameter(Parameter::builder("limit").param_type(ParameterType::Integer).required(false)
142                .description("Maximum number of lines to read (optional, defaults to 2000)").build())
143            .build()
144    }
145
146    async fn execute(&self, args: Value) -> crate::Result<Value> {
147        let path = args["path"]
148            .as_str()
149            .ok_or_else(|| crate::PawanError::Tool("path is required".into()))?;
150
151        let offset = args["offset"].as_u64().unwrap_or(0) as usize;
152        let limit = args["limit"].as_u64().unwrap_or(200) as usize;
153
154        let full_path = self.resolve_path(path);
155
156        if !full_path.exists() {
157            return Err(crate::PawanError::NotFound(format!(
158                "File not found: {}",
159                full_path.display()
160            )));
161        }
162
163        let content = tokio::fs::read_to_string(&full_path)
164            .await
165            .map_err(crate::PawanError::Io)?;
166
167        let lines: Vec<&str> = content.lines().collect();
168        let total_lines = lines.len();
169
170        let selected_lines: Vec<String> = lines
171            .into_iter()
172            .skip(offset)
173            .take(limit)
174            .enumerate()
175            .map(|(i, line)| {
176                let line_num = offset + i + 1;
177                // Truncate very long lines
178                let display_line = if line.len() > 2000 {
179                    format!("{}...[truncated]", &line[..2000])
180                } else {
181                    line.to_string()
182                };
183                format!("{:>6}\t{}", line_num, display_line)
184            })
185            .collect();
186
187        let output = selected_lines.join("\n");
188
189        let warning = if total_lines > 300 && selected_lines.len() == total_lines {
190            Some(format!(
191                "Large file ({} lines). Consider using offset/limit to read specific sections, \
192                 or use anchor_text in edit_file_lines to avoid line-number math.",
193                total_lines
194            ))
195        } else {
196            None
197        };
198
199        Ok(json!({
200            "content": output,
201            "path": full_path.display().to_string(),
202            "total_lines": total_lines,
203            "lines_shown": selected_lines.len(),
204            "offset": offset,
205            "warning": warning
206        }))
207    }
208}
209
210/// Tool for writing file contents
211pub struct WriteFileTool {
212    workspace_root: PathBuf,
213}
214
215impl WriteFileTool {
216    pub fn new(workspace_root: PathBuf) -> Self {
217        Self { workspace_root }
218    }
219
220    fn resolve_path(&self, path: &str) -> PathBuf {
221        normalize_path(&self.workspace_root, path)
222    }
223}
224
225#[async_trait]
226impl Tool for WriteFileTool {
227    fn name(&self) -> &str {
228        "write_file"
229    }
230
231    fn description(&self) -> &str {
232        "Write content to a file. Creates parent directories automatically. \
233         PREFER edit_file or edit_file_lines for modifying existing files — \
234         write_file overwrites the entire file. Only use for creating new files \
235         or complete rewrites. Writes to .git/, .env, credential files, and \
236         system paths (/etc, /usr) are blocked for safety."
237    }
238
239    fn parameters_schema(&self) -> Value {
240        json!({
241            "type": "object",
242            "properties": {
243                "path": {
244                    "type": "string",
245                    "description": "Path to the file to write (relative to workspace root or absolute)"
246                },
247                "content": {
248                    "type": "string",
249                    "description": "Content to write to the file"
250                }
251            },
252            "required": ["path", "content"]
253        })
254    }
255
256    fn thulp_definition(&self) -> thulp_core::ToolDefinition {
257        use thulp_core::{Parameter, ParameterType};
258        thulp_core::ToolDefinition::builder("write_file")
259            .description(self.description())
260            .parameter(Parameter::builder("path").param_type(ParameterType::String).required(true)
261                .description("Path to the file to write (relative to workspace root or absolute)").build())
262            .parameter(Parameter::builder("content").param_type(ParameterType::String).required(true)
263                .description("Content to write to the file").build())
264            .build()
265    }
266
267    async fn execute(&self, args: Value) -> crate::Result<Value> {
268        let path = args["path"]
269            .as_str()
270            .ok_or_else(|| crate::PawanError::Tool("path is required".into()))?;
271
272        let content = args["content"]
273            .as_str()
274            .ok_or_else(|| crate::PawanError::Tool("content is required".into()))?;
275
276        let full_path = self.resolve_path(path);
277
278        // Validate write safety
279        if let Err(reason) = validate_file_write(&full_path) {
280            return Err(crate::PawanError::Tool(format!(
281                "Write blocked: {} — {}", full_path.display(), reason
282            )));
283        }
284
285        // Create parent directories if needed
286        if let Some(parent) = full_path.parent() {
287            tokio::fs::create_dir_all(parent)
288                .await
289                .map_err(crate::PawanError::Io)?;
290        }
291
292        // Write the file
293        tokio::fs::write(&full_path, content)
294            .await
295            .map_err(crate::PawanError::Io)?;
296
297        // Verify written size matches expected
298        let written_size = tokio::fs::metadata(&full_path)
299            .await
300            .map(|m| m.len() as usize)
301            .unwrap_or(0);
302        let line_count = content.lines().count();
303        let size_mismatch = written_size != content.len();
304
305        Ok(json!({
306            "success": true,
307            "path": full_path.display().to_string(),
308            "bytes_written": content.len(),
309            "bytes_on_disk": written_size,
310            "size_verified": !size_mismatch,
311            "lines": line_count
312        }))
313    }
314}
315
316/// Tool for listing directory contents
317pub struct ListDirectoryTool {
318    workspace_root: PathBuf,
319}
320
321impl ListDirectoryTool {
322    pub fn new(workspace_root: PathBuf) -> Self {
323        Self { workspace_root }
324    }
325
326    fn resolve_path(&self, path: &str) -> PathBuf {
327        normalize_path(&self.workspace_root, path)
328    }
329}
330
331#[async_trait]
332impl Tool for ListDirectoryTool {
333    fn name(&self) -> &str {
334        "list_directory"
335    }
336
337    fn description(&self) -> &str {
338        "List the contents of a directory."
339    }
340
341    fn parameters_schema(&self) -> Value {
342        json!({
343            "type": "object",
344            "properties": {
345                "path": {
346                    "type": "string",
347                    "description": "Path to the directory to list (relative to workspace root or absolute)"
348                },
349                "recursive": {
350                    "type": "boolean",
351                    "description": "Whether to list recursively (default: false)"
352                },
353                "max_depth": {
354                    "type": "integer",
355                    "description": "Maximum depth for recursive listing (default: 3)"
356                }
357            },
358            "required": ["path"]
359        })
360    }
361
362    fn thulp_definition(&self) -> thulp_core::ToolDefinition {
363        use thulp_core::{Parameter, ParameterType};
364        thulp_core::ToolDefinition::builder("list_directory")
365            .description(self.description())
366            .parameter(Parameter::builder("path").param_type(ParameterType::String).required(true)
367                .description("Path to the directory to list (relative to workspace root or absolute)").build())
368            .parameter(Parameter::builder("recursive").param_type(ParameterType::Boolean).required(false)
369                .description("Whether to list recursively (default: false)").build())
370            .parameter(Parameter::builder("max_depth").param_type(ParameterType::Integer).required(false)
371                .description("Maximum depth for recursive listing (default: 3)").build())
372            .build()
373    }
374
375    async fn execute(&self, args: Value) -> crate::Result<Value> {
376        let path = args["path"]
377            .as_str()
378            .ok_or_else(|| crate::PawanError::Tool("path is required".into()))?;
379
380        let recursive = args["recursive"].as_bool().unwrap_or(false);
381        let max_depth = args["max_depth"].as_u64().unwrap_or(3) as usize;
382
383        let full_path = self.resolve_path(path);
384
385        if !full_path.exists() {
386            return Err(crate::PawanError::NotFound(format!(
387                "Directory not found: {}",
388                full_path.display()
389            )));
390        }
391
392        if !full_path.is_dir() {
393            return Err(crate::PawanError::Tool(format!(
394                "Not a directory: {}",
395                full_path.display()
396            )));
397        }
398
399        let mut entries = Vec::new();
400
401        if recursive {
402            for entry in walkdir::WalkDir::new(&full_path)
403                .max_depth(max_depth)
404                .into_iter()
405                .filter_map(|e| e.ok())
406            {
407                let path = entry.path();
408                let relative = path.strip_prefix(&full_path).unwrap_or(path);
409                let is_dir = entry.file_type().is_dir();
410                let size = if is_dir {
411                    0
412                } else {
413                    entry.metadata().map(|m| m.len()).unwrap_or(0)
414                };
415
416                entries.push(json!({
417                    "path": relative.display().to_string(),
418                    "is_dir": is_dir,
419                    "size": size
420                }));
421            }
422        } else {
423            let mut read_dir = tokio::fs::read_dir(&full_path)
424                .await
425                .map_err(crate::PawanError::Io)?;
426
427            while let Some(entry) = read_dir.next_entry().await.map_err(crate::PawanError::Io)? {
428                let path = entry.path();
429                let name = entry.file_name().to_string_lossy().to_string();
430                let metadata = entry.metadata().await.ok();
431                let is_dir = metadata.as_ref().map(|m| m.is_dir()).unwrap_or(false);
432                let size = metadata.map(|m| m.len()).unwrap_or(0);
433
434                entries.push(json!({
435                    "name": name,
436                    "path": path.display().to_string(),
437                    "is_dir": is_dir,
438                    "size": size
439                }));
440            }
441        }
442
443        Ok(json!({
444            "path": full_path.display().to_string(),
445            "entries": entries,
446            "count": entries.len()
447        }))
448    }
449}
450
451#[cfg(test)]
452mod tests {
453    use super::*;
454    use tempfile::TempDir;
455
456    #[tokio::test]
457    async fn test_read_file() {
458        let temp_dir = TempDir::new().unwrap();
459        let file_path = temp_dir.path().join("test.txt");
460        std::fs::write(&file_path, "line 1\nline 2\nline 3").unwrap();
461
462        let tool = ReadFileTool::new(temp_dir.path().to_path_buf());
463        let result = tool.execute(json!({"path": "test.txt"})).await.unwrap();
464
465        assert_eq!(result["total_lines"], 3);
466        assert!(result["content"].as_str().unwrap().contains("line 1"));
467    }
468
469    #[tokio::test]
470    async fn test_write_file() {
471        let temp_dir = TempDir::new().unwrap();
472
473        let tool = WriteFileTool::new(temp_dir.path().to_path_buf());
474        let result = tool
475            .execute(json!({
476                "path": "new_file.txt",
477                "content": "hello\nworld"
478            }))
479            .await
480            .unwrap();
481
482        assert!(result["success"].as_bool().unwrap());
483        assert_eq!(result["lines"], 2);
484
485        let content = std::fs::read_to_string(temp_dir.path().join("new_file.txt")).unwrap();
486        assert_eq!(content, "hello\nworld");
487    }
488
489    #[tokio::test]
490    async fn test_list_directory() {
491        let temp_dir = TempDir::new().unwrap();
492        std::fs::write(temp_dir.path().join("file1.txt"), "content").unwrap();
493        std::fs::write(temp_dir.path().join("file2.txt"), "content").unwrap();
494        std::fs::create_dir(temp_dir.path().join("subdir")).unwrap();
495
496        let tool = ListDirectoryTool::new(temp_dir.path().to_path_buf());
497        let result = tool.execute(json!({"path": "."})).await.unwrap();
498
499        assert_eq!(result["count"], 3);
500    }
501
502    #[test]
503    fn test_normalize_path_double_prefix() {
504        let ws = PathBuf::from("/home/user/workspace");
505        // Model passes absolute path with workspace root repeated
506        let bad = "/home/user/workspace/home/user/workspace/leftist_heap/src/lib.rs";
507        let result = normalize_path(&ws, bad);
508        assert_eq!(result, PathBuf::from("/home/user/workspace/leftist_heap/src/lib.rs"));
509    }
510
511    #[test]
512    fn test_normalize_path_normal_absolute() {
513        let ws = PathBuf::from("/home/user/workspace");
514        let normal = "/home/user/workspace/trie/src/lib.rs";
515        let result = normalize_path(&ws, normal);
516        assert_eq!(result, PathBuf::from("/home/user/workspace/trie/src/lib.rs"));
517    }
518
519    #[test]
520    fn test_normalize_path_relative() {
521        let ws = PathBuf::from("/home/user/workspace");
522        let rel = "trie/src/lib.rs";
523        let result = normalize_path(&ws, rel);
524        assert_eq!(result, PathBuf::from("/home/user/workspace/trie/src/lib.rs"));
525    }
526
527    #[test]
528    fn test_normalize_path_unrelated_absolute() {
529        let ws = PathBuf::from("/home/user/workspace");
530        let other = "/tmp/foo/bar.rs";
531        let result = normalize_path(&ws, other);
532        assert_eq!(result, PathBuf::from("/tmp/foo/bar.rs"));
533    }
534}