Skip to main content

runtimo_core/capabilities/
file_read.rs

1//! FileRead capability — reads file contents with safety validation.
2//!
3//! Rejects path traversal (`..`), empty paths, non-existent files, and
4//! directories. Returns the file content along with byte count.
5//!
6//! Security: opens file with O_NOFOLLOW to prevent TOCTOU symlink escape,
7//! uses bounded reader (take) regardless of metadata to prevent size bypass,
8//! detects binary content, and handles UTF-8 boundary splits correctly.
9//!
10//! # Example
11//!
12//! ```rust
13//! use runtimo_core::capabilities::FileRead;
14//! use runtimo_core::capability::Capability;
15//! use serde_json::json;
16//!
17//! let cap = FileRead;
18//! assert_eq!(cap.name(), "FileRead");
19//!
20//! // Schema requires a "path" string:
21//! let schema = cap.schema();
22//! assert!(schema["required"].as_array().unwrap().contains(&json!("path")));
23//! ```
24
25use crate::capability::{Capability, Context, Output};
26use crate::validation::path::{validate_path, PathContext};
27use crate::{Error, Result};
28use serde::{Deserialize, Serialize};
29use serde_json::Value;
30use std::io::Read;
31
32/// Maximum file size allowed for reading (10 MB).
33const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
34
35/// Default max bytes to read when max_bytes is not specified (1 MB).
36const DEFAULT_MAX_BYTES: u64 = 1024 * 1024;
37
38/// Arguments for the [`FileRead`] capability.
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct FileReadArgs {
41    /// Absolute or relative path to the file to read.
42    pub path: String,
43    /// Maximum bytes to read (default: 1 MB, max: 10 MB).
44    pub max_bytes: Option<u64>,
45}
46
47/// Capability that reads the contents of a file.
48///
49/// Opens file with O_NOFOLLOW to prevent TOCTOU symlink escape,
50/// uses bounded reader regardless of metadata to prevent size bypass,
51/// detects binary content, and handles UTF-8 boundary splits.
52pub struct FileRead;
53
54impl Capability for FileRead {
55    fn name(&self) -> &'static str {
56        "FileRead"
57    }
58
59    fn description(&self) -> &'static str {
60        "Read the contents of a file. Validates path existence, rejects directories and path traversal."
61    }
62
63    fn schema(&self) -> Value {
64        serde_json::json!({
65            "type": "object",
66            "properties": {
67                "path": { "type": "string" },
68                "max_bytes": { "type": "integer", "minimum": 1, "maximum": 10485760 }
69            },
70            "required": ["path"]
71        })
72    }
73
74    fn validate(&self, args: &Value) -> Result<()> {
75        let args: FileReadArgs = serde_json::from_value(args.clone())
76            .map_err(|e| Error::SchemaValidationFailed(e.to_string()))?;
77
78        let ctx = PathContext {
79            require_exists: true,
80            require_file: true,
81            ..Default::default()
82        };
83
84        validate_path(&args.path, &ctx).map_err(Error::SchemaValidationFailed)?;
85
86        Ok(())
87    }
88
89    fn execute(&self, args: &Value, _ctx: &Context) -> Result<Output> {
90        let args: FileReadArgs = serde_json::from_value(args.clone())
91            .map_err(|e| Error::ExecutionFailed(e.to_string()))?;
92
93        let ctx = PathContext {
94            require_exists: true,
95            require_file: true,
96            ..Default::default()
97        };
98
99        let path = validate_path(&args.path, &ctx)
100            .map_err(|e| Error::ExecutionFailed(format!("path validation: {}", e)))?;
101
102        let max_bytes = args.max_bytes.unwrap_or(DEFAULT_MAX_BYTES);
103        if max_bytes > MAX_FILE_SIZE {
104            return Err(Error::ExecutionFailed(format!(
105                "max_bytes {} exceeds maximum allowed {}",
106                max_bytes, MAX_FILE_SIZE
107            )));
108        }
109
110        // P0 FIX: Open with O_NOFOLLOW to prevent TOCTOU symlink escape.
111        // Open immediately after validation to minimize TOCTOU window.
112        let file = open_file_nofollow(&path)
113            .map_err(|e| Error::ExecutionFailed(format!("open {}: {}", path.display(), e)))?;
114
115        // P0 FIX: Always use bounded reader (take) regardless of metadata.
116        // Prevents TOCTOU size bypass where file grows between stat and read.
117        let mut limited = file.take(max_bytes);
118
119        // Read raw bytes to handle binary detection and UTF-8 boundaries correctly.
120        let mut raw_bytes = Vec::with_capacity(
121            std::cmp::min(max_bytes as usize, 64 * 1024),
122        );
123        let bytes_read = limited
124            .read_to_end(&mut raw_bytes)
125            .map_err(|e| Error::ExecutionFailed(format!("read {}: {}", path.display(), e)))?;
126
127        let bytes_read = bytes_read as u64;
128        let truncated = bytes_read >= max_bytes;
129
130        // P1 FIX: Detect binary content (null bytes in the data).
131        let is_binary = detect_binary(&raw_bytes);
132
133        let data = if is_binary {
134            serde_json::json!({
135                "content_type": "binary",
136                "path": path.display().to_string(),
137                "bytes_read": bytes_read,
138                "truncated": truncated,
139                "message": "Binary file detected — content not returned as text",
140            })
141        } else {
142            // P1 FIX: Convert raw bytes to String, trimming to valid UTF-8 boundary.
143            let content = bytes_to_utf8_string(&raw_bytes);
144
145            // P1 FIX: Parse JSON from slice (avoids double memory vs from_str).
146            if path.extension().is_some_and(|ext| ext == "json") {
147                match serde_json::from_slice::<Value>(raw_bytes.as_slice()) {
148                    Ok(parsed) => serde_json::json!({
149                        "content": parsed,
150                        "content_type": "json",
151                        "path": path.display().to_string(),
152                        "bytes_read": bytes_read,
153                        "truncated": truncated,
154                    }),
155                    Err(_) => serde_json::json!({
156                        "content": content,
157                        "content_type": "text",
158                        "path": path.display().to_string(),
159                        "bytes_read": bytes_read,
160                        "truncated": truncated,
161                    }),
162                }
163            } else {
164                serde_json::json!({
165                    "content": content,
166                    "content_type": "text",
167                    "path": path.display().to_string(),
168                    "bytes_read": bytes_read,
169                    "truncated": truncated,
170                })
171            }
172        };
173
174        Ok(Output {
175            success: true,
176            data,
177            message: Some(format!(
178                "Read {} bytes from {}{}",
179                bytes_read,
180                path.display(),
181                if truncated { " (truncated)" } else { "" }
182            )),
183        })
184    }
185}
186
187/// Open a file with O_NOFOLLOW to prevent TOCTOU symlink replacement attacks.
188#[cfg(unix)]
189fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
190    use std::os::unix::fs::OpenOptionsExt;
191    std::fs::OpenOptions::new()
192        .read(true)
193        .custom_flags(libc::O_NOFOLLOW)
194        .open(path)
195}
196
197#[cfg(not(unix))]
198fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
199    std::fs::File::open(path)
200}
201
202/// Detect binary content by checking for null bytes.
203fn detect_binary(data: &[u8]) -> bool {
204    data.contains(&0)
205}
206
207/// Convert raw bytes to a UTF-8 String, trimming trailing bytes that would
208/// split a multibyte character boundary.
209fn bytes_to_utf8_string(bytes: &[u8]) -> String {
210    match String::from_utf8(bytes.to_vec()) {
211        Ok(s) => s,
212        Err(e) => {
213            let valid_up_to = e.utf8_error().valid_up_to();
214            String::from_utf8(bytes[..valid_up_to].to_vec())
215                .unwrap_or_else(|_| String::new())
216        }
217    }
218}
219
220#[cfg(test)]
221mod tests {
222    use super::*;
223    use std::io::Write;
224
225    #[test]
226    fn reads_existing_file() {
227        let mut tmp = std::env::temp_dir();
228        tmp.push("runtimo_test_read.txt");
229        {
230            let mut f = std::fs::File::create(&tmp).unwrap();
231            writeln!(f, "hello world").unwrap();
232        }
233
234        let result = FileRead
235            .execute(
236                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
237                &Context {
238                    dry_run: false,
239                    job_id: "test".into(),
240                    working_dir: std::env::temp_dir(),
241                },
242            )
243            .unwrap();
244
245        assert!(result.success);
246        assert!(result.data["content"]
247            .as_str()
248            .unwrap()
249            .contains("hello world"));
250        std::fs::remove_file(&tmp).ok();
251    }
252
253    #[test]
254    fn rejects_missing_file() {
255        let err = FileRead
256            .validate(&serde_json::json!({
257                "path": "/tmp/nonexistent_runtimo_test.txt"
258            }))
259            .unwrap_err();
260        assert!(err.to_string().contains("does not exist"));
261    }
262
263    #[test]
264    fn rejects_empty_path() {
265        assert!(FileRead
266            .validate(&serde_json::json!({ "path": "" }))
267            .is_err());
268    }
269
270    #[test]
271    fn test_max_bytes_limits_output() {
272        let mut tmp = std::env::temp_dir();
273        tmp.push("runtimo_test_max_bytes.txt");
274        {
275            let mut f = std::fs::File::create(&tmp).unwrap();
276            for _ in 0..100 {
277                writeln!(f, "hello world line").unwrap();
278            }
279        }
280
281        let result = FileRead
282            .execute(
283                &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 50 }),
284                &Context {
285                    dry_run: false,
286                    job_id: "test".into(),
287                    working_dir: std::env::temp_dir(),
288                },
289            )
290            .unwrap();
291
292        assert!(result.success);
293        assert!(result.data["truncated"].as_bool() == Some(true));
294        assert!(result.data["bytes_read"].as_u64().unwrap() <= 50);
295        std::fs::remove_file(&tmp).ok();
296    }
297
298    #[test]
299    fn test_max_bytes_rejects_exceeding_limit() {
300        let result = FileRead.execute(
301            &serde_json::json!({ "path": "/etc/hosts", "max_bytes": 9999999999u64 }),
302            &Context {
303                dry_run: false,
304                job_id: "test".into(),
305                working_dir: std::env::temp_dir(),
306            },
307        );
308        assert!(result.is_err());
309    }
310
311    #[test]
312    fn test_file_read_default_max_bytes() {
313        let mut tmp = std::env::temp_dir();
314        tmp.push("runtimo_test_default_max.txt");
315        std::fs::write(&tmp, "small content").unwrap();
316
317        let result = FileRead
318            .execute(
319                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
320                &Context {
321                    dry_run: false,
322                    job_id: "test".into(),
323                    working_dir: std::env::temp_dir(),
324                },
325            )
326            .unwrap();
327
328        assert!(result.success);
329        assert!(result.data["truncated"].as_bool() == Some(false));
330        std::fs::remove_file(&tmp).ok();
331    }
332
333    #[test]
334    fn test_file_read_json_parsed_for_agents() {
335        let mut tmp = std::env::temp_dir();
336        tmp.push("runtimo_test_agent.json");
337        std::fs::write(&tmp, r#"{"key": "value", "nested": {"a": 1}}"#).unwrap();
338
339        let result = FileRead
340            .execute(
341                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
342                &Context {
343                    dry_run: false,
344                    job_id: "test".into(),
345                    working_dir: std::env::temp_dir(),
346                },
347            )
348            .unwrap();
349
350        assert!(result.success);
351        assert!(result.data["content"].is_object());
352        assert_eq!(result.data["content"]["key"].as_str(), Some("value"));
353        assert_eq!(result.data["content"]["nested"]["a"].as_u64(), Some(1));
354        assert_eq!(result.data["content_type"].as_str(), Some("json"));
355        std::fs::remove_file(&tmp).ok();
356    }
357
358    #[test]
359    fn test_binary_file_detected() {
360        let mut tmp = std::env::temp_dir();
361        tmp.push("runtimo_test_binary.bin");
362        std::fs::write(&tmp, b"hello\x00world").unwrap();
363
364        let result = FileRead
365            .execute(
366                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
367                &Context {
368                    dry_run: false,
369                    job_id: "test".into(),
370                    working_dir: std::env::temp_dir(),
371                },
372            )
373            .unwrap();
374
375        assert!(result.success);
376        assert_eq!(result.data["content_type"].as_str(), Some("binary"));
377        assert_eq!(result.data["bytes_read"].as_u64(), Some(11));
378        std::fs::remove_file(&tmp).ok();
379    }
380
381    #[test]
382    fn test_utf8_boundary_truncation() {
383        // "café" = [99, 97, 102, 195, 169] — é is 2 bytes
384        // Truncate at 4 bytes would split the é character
385        let mut tmp = std::env::temp_dir();
386        tmp.push("runtimo_test_utf8.txt");
387        std::fs::write(&tmp, b"caf\xc3\xa9").unwrap();
388
389        let result = FileRead
390            .execute(
391                &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 4 }),
392                &Context {
393                    dry_run: false,
394                    job_id: "test".into(),
395                    working_dir: std::env::temp_dir(),
396                },
397            )
398            .unwrap();
399
400        assert!(result.success);
401        let content = result.data["content"].as_str().unwrap();
402        assert_eq!(content, "caf");
403        std::fs::remove_file(&tmp).ok();
404    }
405
406    #[test]
407    fn test_bytes_read_reports_raw_bytes() {
408        let mut tmp = std::env::temp_dir();
409        tmp.push("runtimo_test_bytes_read.txt");
410        // UTF-8: "café\n" = 6 bytes (é is 2 bytes)
411        std::fs::write(&tmp, "café\n").unwrap();
412
413        let result = FileRead
414            .execute(
415                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
416                &Context {
417                    dry_run: false,
418                    job_id: "test".into(),
419                    working_dir: std::env::temp_dir(),
420                },
421            )
422            .unwrap();
423
424        assert!(result.success);
425        // bytes_read should be 6 (raw file bytes), not String::len() which is 5
426        assert_eq!(result.data["bytes_read"].as_u64(), Some(6));
427        std::fs::remove_file(&tmp).ok();
428    }
429
430    #[test]
431    fn test_symlink_rejected_by_nofollow() {
432        let link_path = std::env::temp_dir().join("runtimo_nofollow_test");
433        let _ = std::fs::remove_file(&link_path);
434        #[cfg(unix)]
435        {
436            use std::os::unix::fs::symlink;
437            if symlink("/etc/hostname", &link_path).is_ok() {
438                let result = FileRead.execute(
439                    &serde_json::json!({ "path": link_path.to_str().unwrap() }),
440                    &Context {
441                        dry_run: false,
442                        job_id: "test".into(),
443                        working_dir: std::env::temp_dir(),
444                    },
445                );
446                assert!(result.is_err(), "symlink should be rejected by O_NOFOLLOW");
447                std::fs::remove_file(&link_path).ok();
448            }
449        }
450    }
451}