Skip to main content

runtimo_core/capabilities/
file_read.rs

1//! FileRead capability — reads file contents with safety validation.
2//!
3//! Rejects path traversal (`..`), empty paths, non-existent files, and
4//! directories. Returns the file content along with byte count.
5//!
6//! Security: opens file with O_NOFOLLOW to prevent TOCTOU symlink escape,
7//! uses bounded reader (take) regardless of metadata to prevent size bypass,
8//! detects binary content, and handles UTF-8 boundary splits correctly.
9//!
10//! # Example
11//!
12//! ```rust
13//! use runtimo_core::capabilities::FileRead;
14//! use runtimo_core::capability::Capability;
15//! use serde_json::json;
16//!
17//! let cap = FileRead;
18//! assert_eq!(cap.name(), "FileRead");
19//!
20//! // Schema requires a "path" string:
21//! let schema = cap.schema();
22//! assert!(schema["required"].as_array().unwrap().contains(&json!("path")));
23//! ```
24
25use crate::capability::{Capability, Context, Output};
26use crate::validation::path::{validate_path, PathContext};
27use crate::{Error, Result};
28use serde::{Deserialize, Serialize};
29use serde_json::Value;
30use std::io::Read;
31
32/// Maximum file size allowed for reading (10 MB).
33const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
34
35/// Default max bytes to read when max_bytes is not specified (1 MB).
36const DEFAULT_MAX_BYTES: u64 = 1024 * 1024;
37
38/// Arguments for the [`FileRead`] capability.
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct FileReadArgs {
41    /// Absolute or relative path to the file to read.
42    pub path: String,
43    /// Maximum bytes to read (default: 1 MB, max: 10 MB).
44    pub max_bytes: Option<u64>,
45}
46
47/// Capability that reads the contents of a file.
48///
49/// Opens file with O_NOFOLLOW to prevent TOCTOU symlink escape,
50/// uses bounded reader regardless of metadata to prevent size bypass,
51/// detects binary content, and handles UTF-8 boundary splits.
52#[allow(clippy::exhaustive_structs)] // unit struct used as trait-object marker
53pub struct FileRead;
54
55impl Capability for FileRead {
56    fn name(&self) -> &'static str {
57        "FileRead"
58    }
59
60    fn description(&self) -> &'static str {
61        "read file. path validated. no dirs, no traversal."
62    }
63
64    fn schema(&self) -> Value {
65        serde_json::json!({
66            "type": "object",
67            "properties": {
68                "path": { "type": "string" },
69                "max_bytes": { "type": "integer", "minimum": 1, "maximum": 10485760 }
70            },
71            "required": ["path"]
72        })
73    }
74
75    fn validate(&self, args: &Value) -> Result<()> {
76        let args: FileReadArgs = serde_json::from_value(args.clone())
77            .map_err(|e| Error::SchemaValidationFailed(e.to_string()))?;
78
79        let ctx = PathContext {
80            require_exists: true,
81            require_file: true,
82            ..Default::default()
83        };
84
85        validate_path(&args.path, &ctx).map_err(Error::SchemaValidationFailed)?;
86
87        Ok(())
88    }
89
90    fn execute(&self, args: &Value, _ctx: &Context) -> Result<Output> {
91        let args: FileReadArgs = serde_json::from_value(args.clone())
92            .map_err(|e| Error::ExecutionFailed(e.to_string()))?;
93
94        let ctx = PathContext {
95            require_exists: true,
96            require_file: true,
97            ..Default::default()
98        };
99
100        let path = validate_path(&args.path, &ctx)
101            .map_err(|e| Error::ExecutionFailed(format!("path validation: {}", e)))?;
102
103        let max_bytes = args.max_bytes.unwrap_or(DEFAULT_MAX_BYTES);
104        if max_bytes > MAX_FILE_SIZE {
105            return Err(Error::ExecutionFailed(format!(
106                "max_bytes {} exceeds maximum allowed {}",
107                max_bytes, MAX_FILE_SIZE
108            )));
109        }
110
111        // P0 FIX: Open with O_NOFOLLOW to prevent TOCTOU symlink escape.
112        // Open immediately after validation to minimize TOCTOU window.
113        let file = open_file_nofollow(&path)
114            .map_err(|e| Error::ExecutionFailed(format!("open {}: {}", path.display(), e)))?;
115
116        // P0 FIX: Always use bounded reader (take) regardless of metadata.
117        // Prevents TOCTOU size bypass where file grows between stat and read.
118        let mut limited = file.take(max_bytes);
119
120        // Read raw bytes to handle binary detection and UTF-8 boundaries correctly.
121        let mut raw_bytes = Vec::with_capacity(
122            std::cmp::min(usize::try_from(max_bytes).unwrap_or(usize::MAX), 64 * 1024)
123        );
124        let bytes_read = limited
125            .read_to_end(&mut raw_bytes)
126            .map_err(|e| Error::ExecutionFailed(format!("read {}: {}", path.display(), e)))?;
127
128        let bytes_read = bytes_read as u64;
129        let truncated = bytes_read >= max_bytes;
130
131        // P1 FIX: Detect binary content (null bytes in the data).
132        let is_binary = detect_binary(&raw_bytes);
133
134        let data = if is_binary {
135            serde_json::json!({
136                "content_type": "binary",
137                "path": path.display().to_string(),
138                "bytes_read": bytes_read,
139                "truncated": truncated,
140                "message": "Binary file detected — content not returned as text",
141            })
142        } else {
143            // P1 FIX: Convert raw bytes to String, trimming to valid UTF-8 boundary.
144            let content = bytes_to_utf8_string(&raw_bytes);
145
146            // P1 FIX: Parse JSON from slice (avoids double memory vs from_str).
147            if path.extension().is_some_and(|ext| ext == "json") {
148                match serde_json::from_slice::<Value>(raw_bytes.as_slice()) {
149                    Ok(parsed) => serde_json::json!({
150                        "content": parsed,
151                        "content_type": "json",
152                        "path": path.display().to_string(),
153                        "bytes_read": bytes_read,
154                        "truncated": truncated,
155                    }),
156                    Err(_) => serde_json::json!({
157                        "content": content,
158                        "content_type": "text",
159                        "path": path.display().to_string(),
160                        "bytes_read": bytes_read,
161                        "truncated": truncated,
162                    }),
163                }
164            } else {
165                serde_json::json!({
166                    "content": content,
167                    "content_type": "text",
168                    "path": path.display().to_string(),
169                    "bytes_read": bytes_read,
170                    "truncated": truncated,
171                })
172            }
173        };
174
175        Ok(Output {
176            success: true,
177            data,
178            message: Some(format!(
179                "Read {} bytes from {}{}",
180                bytes_read,
181                path.display(),
182                if truncated { " (truncated)" } else { "" }
183            )),
184        })
185    }
186}
187
188/// Open a file with O_NOFOLLOW to prevent TOCTOU symlink replacement attacks.
189#[cfg(unix)]
190fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
191    use std::os::unix::fs::OpenOptionsExt;
192    std::fs::OpenOptions::new()
193        .read(true)
194        .custom_flags(libc::O_NOFOLLOW)
195        .open(path)
196}
197
198#[cfg(not(unix))]
199fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
200    std::fs::File::open(path)
201}
202
203/// Detect binary content by checking for null bytes.
204fn detect_binary(data: &[u8]) -> bool {
205    data.contains(&0)
206}
207
208/// Convert raw bytes to a UTF-8 String, trimming trailing bytes that would
209/// split a multibyte character boundary.
210fn bytes_to_utf8_string(bytes: &[u8]) -> String {
211    match String::from_utf8(bytes.to_vec()) {
212        Ok(s) => s,
213        Err(e) => {
214            let valid_up_to = e.utf8_error().valid_up_to();
215            bytes.get(..valid_up_to)
216                .map(|s| String::from_utf8(s.to_vec()).unwrap_or_default())
217                .unwrap_or_default()
218        }
219    }
220}
221
222#[cfg(test)]
223mod tests {
224    use super::*;
225    use std::io::Write;
226
227    #[allow(clippy::unwrap_used, clippy::unused_result_ok)]
228    #[test]
229    fn reads_existing_file() {
230        let mut tmp = std::env::temp_dir();
231        tmp.push("runtimo_test_read.txt");
232        {
233            let mut f = std::fs::File::create(&tmp).unwrap();
234            writeln!(f, "hello world").unwrap();
235        }
236
237        let result = FileRead
238            .execute(
239                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
240                &Context {
241                    dry_run: false,
242                    job_id: "test".into(),
243                    working_dir: std::env::temp_dir(),
244                },
245            )
246            .unwrap();
247
248        assert!(result.success);
249        assert!(result.data.get("content")
250            .and_then(|v| v.as_str())
251            .unwrap()
252            .contains("hello world"));
253        std::fs::remove_file(&tmp).ok();
254    }
255
256    #[allow(clippy::unwrap_used)]
257    #[test]
258    fn rejects_missing_file() {
259        let err = FileRead
260            .validate(&serde_json::json!({
261                "path": "/tmp/nonexistent_runtimo_test.txt"
262            }))
263            .unwrap_err();
264        assert!(err.to_string().contains("does not exist"));
265    }
266
267    #[test]
268    fn rejects_empty_path() {
269        assert!(FileRead
270            .validate(&serde_json::json!({ "path": "" }))
271            .is_err());
272    }
273
274    #[allow(clippy::indexing_slicing)]
275    #[allow(clippy::unused_result_ok)]
276    #[test]
277    #[allow(clippy::unwrap_used, clippy::unused_result_ok)]
278    fn test_max_bytes_limits_output() {
279        let mut tmp = std::env::temp_dir();
280        tmp.push("runtimo_test_max_bytes.txt");
281        {
282            let mut f = std::fs::File::create(&tmp).unwrap();
283            for _ in 0..100 {
284                writeln!(f, "hello world line").unwrap();
285            }
286        }
287
288        let result = FileRead
289            .execute(
290                &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 50 }),
291                &Context {
292                    dry_run: false,
293                    job_id: "test".into(),
294                    working_dir: std::env::temp_dir(),
295                },
296            )
297            .unwrap();
298
299        assert!(result.success);
300        assert!(result.data["truncated"].as_bool() == Some(true));
301        assert!(result.data["bytes_read"].as_u64().unwrap() <= 50);
302        std::fs::remove_file(&tmp).ok();
303    }
304
305    #[test]
306    fn test_max_bytes_rejects_exceeding_limit() {
307        let result = FileRead.execute(
308            &serde_json::json!({ "path": "/etc/hosts", "max_bytes": 9999999999u64 }),
309            &Context {
310                dry_run: false,
311                job_id: "test".into(),
312                working_dir: std::env::temp_dir(),
313            },
314        );
315        assert!(result.is_err());
316    }
317
318    #[allow(clippy::indexing_slicing)]
319    #[test]
320    fn test_file_read_default_max_bytes() {
321        let mut tmp = std::env::temp_dir();
322        tmp.push("runtimo_test_default_max.txt");
323        std::fs::write(&tmp, "small content").unwrap();
324
325        let result = FileRead
326            .execute(
327                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
328                &Context {
329                    dry_run: false,
330                    job_id: "test".into(),
331                    working_dir: std::env::temp_dir(),
332                },
333            )
334            .unwrap();
335
336        assert!(result.success);
337        assert!(result.data["truncated"].as_bool() == Some(false));
338        std::fs::remove_file(&tmp).ok();
339    }
340
341    #[test]
342    #[allow(clippy::indexing_slicing)]
343    fn test_file_read_json_parsed_for_agents() {
344        let mut tmp = std::env::temp_dir();
345        tmp.push("runtimo_test_agent.json");
346        std::fs::write(&tmp, r#"{"key": "value", "nested": {"a": 1}}"#).unwrap();
347
348        let result = FileRead
349            .execute(
350                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
351                &Context {
352                    dry_run: false,
353                    job_id: "test".into(),
354                    working_dir: std::env::temp_dir(),
355                },
356            )
357            .unwrap();
358
359        assert!(result.success);
360        assert!(result.data["content"].is_object());
361        assert_eq!(result.data["content"]["key"].as_str(), Some("value"));
362        assert_eq!(result.data["content"]["nested"]["a"].as_u64(), Some(1));
363        assert_eq!(result.data["content_type"].as_str(), Some("json"));
364        std::fs::remove_file(&tmp).ok();
365    }
366
367    #[test]
368    fn test_binary_file_detected() {
369        let mut tmp = std::env::temp_dir();
370        tmp.push("runtimo_test_binary.bin");
371        std::fs::write(&tmp, b"hello\x00world").unwrap();
372
373        let result = FileRead
374            .execute(
375                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
376                &Context {
377                    dry_run: false,
378                    job_id: "test".into(),
379                    working_dir: std::env::temp_dir(),
380                },
381            )
382            .unwrap();
383
384        assert!(result.success);
385        assert_eq!(result.data["content_type"].as_str(), Some("binary"));
386        assert_eq!(result.data["bytes_read"].as_u64(), Some(11));
387        std::fs::remove_file(&tmp).ok();
388    }
389
390    #[test]
391    fn test_utf8_boundary_truncation() {
392        // "café" = [99, 97, 102, 195, 169] — é is 2 bytes
393        // Truncate at 4 bytes would split the é character
394        let mut tmp = std::env::temp_dir();
395        tmp.push("runtimo_test_utf8.txt");
396        std::fs::write(&tmp, b"caf\xc3\xa9").unwrap();
397
398        let result = FileRead
399            .execute(
400                &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 4 }),
401                &Context {
402                    dry_run: false,
403                    job_id: "test".into(),
404                    working_dir: std::env::temp_dir(),
405                },
406            )
407            .unwrap();
408
409        assert!(result.success);
410        let content = result.data["content"].as_str().unwrap();
411        assert_eq!(content, "caf");
412        std::fs::remove_file(&tmp).ok();
413    }
414
415    #[test]
416    fn test_bytes_read_reports_raw_bytes() {
417        let mut tmp = std::env::temp_dir();
418        tmp.push("runtimo_test_bytes_read.txt");
419        // UTF-8: "café\n" = 6 bytes (é is 2 bytes)
420        std::fs::write(&tmp, "café\n").unwrap();
421
422        let result = FileRead
423            .execute(
424                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
425                &Context {
426                    dry_run: false,
427                    job_id: "test".into(),
428                    working_dir: std::env::temp_dir(),
429                },
430            )
431            .unwrap();
432
433        assert!(result.success);
434        // bytes_read should be 6 (raw file bytes), not String::len() which is 5
435        assert_eq!(result.data["bytes_read"].as_u64(), Some(6));
436        std::fs::remove_file(&tmp).ok();
437    }
438
439    #[test]
440    fn test_symlink_rejected_by_nofollow() {
441        let link_path = std::env::temp_dir().join("runtimo_nofollow_test");
442        let _ = std::fs::remove_file(&link_path);
443        #[cfg(unix)]
444        {
445            use std::os::unix::fs::symlink;
446            if symlink("/etc/hostname", &link_path).is_ok() {
447                let result = FileRead.execute(
448                    &serde_json::json!({ "path": link_path.to_str().unwrap() }),
449                    &Context {
450                        dry_run: false,
451                        job_id: "test".into(),
452                        working_dir: std::env::temp_dir(),
453                    },
454                );
455                assert!(result.is_err(), "symlink should be rejected by O_NOFOLLOW");
456                std::fs::remove_file(&link_path).ok();
457            }
458        }
459    }
460}