Skip to main content

runtimo_core/capabilities/
file_read.rs

1//! FileRead capability — reads file contents with safety validation.
2//!
3//! Rejects path traversal (`..`), empty paths, non-existent files, and
4//! directories. Returns the file content along with byte count.
5//!
6//! Security: opens file with O_NOFOLLOW to prevent TOCTOU symlink escape,
7//! uses bounded reader (take) regardless of metadata to prevent size bypass,
8//! detects binary content, and handles UTF-8 boundary splits correctly.
9//!
10//! # Example
11//!
12//! ```rust
13//! use runtimo_core::capabilities::FileRead;
14//! use runtimo_core::capability::Capability;
15//! use serde_json::json;
16//!
17//! let cap = FileRead;
18//! assert_eq!(cap.name(), "FileRead");
19//!
20//! // Schema requires a "path" string:
21//! let schema = cap.schema();
22//! assert!(schema["required"].as_array().unwrap().contains(&json!("path")));
23//! ```
24
25use crate::capability::{Capability, Context, Output};
26use crate::validation::path::{validate_path, PathContext};
27use crate::{Error, Result};
28use serde::{Deserialize, Serialize};
29use serde_json::Value;
30use std::io::Read;
31
32/// Maximum file size allowed for reading (10 MB).
33const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
34
35/// Default max bytes to read when max_bytes is not specified (1 MB).
36const DEFAULT_MAX_BYTES: u64 = 1024 * 1024;
37
38/// Input parameters for [`FileRead::execute`].
39///
40/// Accepts a file path and an optional byte limit. The path is validated
41/// against the configured allowed-prefix list before any I/O occurs.
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct FileReadArgs {
44    /// Absolute or relative path to the file to read.
45    pub path: String,
46    /// Maximum bytes to read (default: 1 MB, max: 10 MB).
47    pub max_bytes: Option<u64>,
48}
49
50/// Capability that reads the contents of a file.
51///
52/// Opens file with O_NOFOLLOW to prevent TOCTOU symlink escape,
53/// uses bounded reader regardless of metadata to prevent size bypass,
54/// detects binary content, and handles UTF-8 boundary splits.
55#[allow(clippy::exhaustive_structs)] // unit struct used as trait-object marker
56pub struct FileRead;
57
58impl Capability for FileRead {
59    fn name(&self) -> &'static str {
60        "FileRead"
61    }
62
63    fn description(&self) -> &'static str {
64        "read file. path validated. no dirs, no traversal."
65    }
66
67    fn schema(&self) -> Value {
68        serde_json::json!({
69            "type": "object",
70            "properties": {
71                "path": { "type": "string" },
72                "max_bytes": { "type": "integer", "minimum": 1, "maximum": 10485760 }
73            },
74            "required": ["path"]
75        })
76    }
77
78    fn validate(&self, args: &Value) -> Result<()> {
79        let args: FileReadArgs = serde_json::from_value(args.clone())
80            .map_err(|e| Error::SchemaValidationFailed(e.to_string()))?;
81
82        let ctx = PathContext {
83            require_exists: true,
84            require_file: true,
85            ..Default::default()
86        };
87
88        validate_path(&args.path, &ctx).map_err(Error::SchemaValidationFailed)?;
89
90        Ok(())
91    }
92
93    fn execute(&self, args: &Value, _ctx: &Context) -> Result<Output> {
94        let args: FileReadArgs = serde_json::from_value(args.clone())
95            .map_err(|e| Error::ExecutionFailed(e.to_string()))?;
96
97        let ctx = PathContext {
98            require_exists: true,
99            require_file: true,
100            ..Default::default()
101        };
102
103        let path = validate_path(&args.path, &ctx)
104            .map_err(|e| Error::ExecutionFailed(format!("path validation: {}", e)))?;
105
106        let max_bytes = args.max_bytes.unwrap_or(DEFAULT_MAX_BYTES);
107        if max_bytes > MAX_FILE_SIZE {
108            return Err(Error::ExecutionFailed(format!(
109                "max_bytes {} exceeds maximum allowed {}",
110                max_bytes, MAX_FILE_SIZE
111            )));
112        }
113
114        // P0 FIX: Open with O_NOFOLLOW to prevent TOCTOU symlink escape.
115        // Open immediately after validation to minimize TOCTOU window.
116        let file = open_file_nofollow(&path)
117            .map_err(|e| Error::ExecutionFailed(format!("open {}: {}", path.display(), e)))?;
118
119        // P0 FIX: Always use bounded reader (take) regardless of metadata.
120        // Prevents TOCTOU size bypass where file grows between stat and read.
121        let mut limited = file.take(max_bytes);
122
123        // Read raw bytes to handle binary detection and UTF-8 boundaries correctly.
124        let mut raw_bytes = Vec::with_capacity(std::cmp::min(
125            usize::try_from(max_bytes).unwrap_or(usize::MAX),
126            64 * 1024,
127        ));
128        let bytes_read = limited
129            .read_to_end(&mut raw_bytes)
130            .map_err(|e| Error::ExecutionFailed(format!("read {}: {}", path.display(), e)))?;
131
132        let bytes_read = bytes_read as u64;
133        let truncated = bytes_read >= max_bytes;
134
135        // P1 FIX: Detect binary content (null bytes in the data).
136        let is_binary = detect_binary(&raw_bytes);
137
138        let data = if is_binary {
139            serde_json::json!({
140                "content_type": "binary",
141                "path": path.display().to_string(),
142                "bytes_read": bytes_read,
143                "truncated": truncated,
144                "message": "Binary file detected — content not returned as text",
145            })
146        } else {
147            // P1 FIX: Convert raw bytes to String, trimming to valid UTF-8 boundary.
148            let content = bytes_to_utf8_string(&raw_bytes);
149
150            // P1 FIX: Parse JSON from slice (avoids double memory vs from_str).
151            if path.extension().is_some_and(|ext| ext == "json") {
152                match serde_json::from_slice::<Value>(raw_bytes.as_slice()) {
153                    Ok(parsed) => serde_json::json!({
154                        "content": parsed,
155                        "content_type": "json",
156                        "path": path.display().to_string(),
157                        "bytes_read": bytes_read,
158                        "truncated": truncated,
159                    }),
160                    Err(_) => serde_json::json!({
161                        "content": content,
162                        "content_type": "text",
163                        "path": path.display().to_string(),
164                        "bytes_read": bytes_read,
165                        "truncated": truncated,
166                    }),
167                }
168            } else {
169                serde_json::json!({
170                    "content": content,
171                    "content_type": "text",
172                    "path": path.display().to_string(),
173                    "bytes_read": bytes_read,
174                    "truncated": truncated,
175                })
176            }
177        };
178
179        Ok(Output {
180            success: true,
181            data,
182            message: Some(format!(
183                "Read {} bytes from {}{}",
184                bytes_read,
185                path.display(),
186                if truncated { " (truncated)" } else { "" }
187            )),
188        })
189    }
190}
191
192/// Open a file with O_NOFOLLOW to prevent TOCTOU symlink replacement attacks.
193#[cfg(unix)]
194fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
195    use std::os::unix::fs::OpenOptionsExt;
196    std::fs::OpenOptions::new()
197        .read(true)
198        .custom_flags(libc::O_NOFOLLOW)
199        .open(path)
200}
201
202#[cfg(not(unix))]
203fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
204    std::fs::File::open(path)
205}
206
207/// Detect binary content by checking for null bytes.
208fn detect_binary(data: &[u8]) -> bool {
209    data.contains(&0)
210}
211
212/// Convert raw bytes to a UTF-8 String, trimming trailing bytes that would
213/// split a multibyte character boundary.
214fn bytes_to_utf8_string(bytes: &[u8]) -> String {
215    match String::from_utf8(bytes.to_vec()) {
216        Ok(s) => s,
217        Err(e) => {
218            let valid_up_to = e.utf8_error().valid_up_to();
219            bytes
220                .get(..valid_up_to)
221                .map(|s| String::from_utf8(s.to_vec()).unwrap_or_default())
222                .unwrap_or_default()
223        }
224    }
225}
226
227#[cfg(test)]
228mod tests {
229    use super::*;
230    use std::io::Write;
231
232    #[allow(clippy::unwrap_used, clippy::unused_result_ok)]
233    #[test]
234    fn reads_existing_file() {
235        let mut tmp = std::env::temp_dir();
236        tmp.push("runtimo_test_read.txt");
237        {
238            let mut f = std::fs::File::create(&tmp).unwrap();
239            writeln!(f, "hello world").unwrap();
240        }
241
242        let result = FileRead
243            .execute(
244                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
245                &Context {
246                    dry_run: false,
247                    job_id: "test".into(),
248                    working_dir: std::env::temp_dir(),
249                },
250            )
251            .unwrap();
252
253        assert!(result.success);
254        assert!(result
255            .data
256            .get("content")
257            .and_then(|v| v.as_str())
258            .unwrap()
259            .contains("hello world"));
260        std::fs::remove_file(&tmp).ok();
261    }
262
263    #[allow(clippy::unwrap_used)]
264    #[test]
265    fn rejects_missing_file() {
266        let err = FileRead
267            .validate(&serde_json::json!({
268                "path": "/tmp/nonexistent_runtimo_test.txt"
269            }))
270            .unwrap_err();
271        assert!(err.to_string().contains("does not exist"));
272    }
273
274    #[test]
275    fn rejects_empty_path() {
276        assert!(FileRead
277            .validate(&serde_json::json!({ "path": "" }))
278            .is_err());
279    }
280
281    #[allow(clippy::indexing_slicing)]
282    #[allow(clippy::unused_result_ok)]
283    #[test]
284    #[allow(clippy::unwrap_used)]
285    fn test_max_bytes_limits_output() {
286        let mut tmp = std::env::temp_dir();
287        tmp.push("runtimo_test_max_bytes.txt");
288        {
289            let mut f = std::fs::File::create(&tmp).unwrap();
290            for _ in 0..100 {
291                writeln!(f, "hello world line").unwrap();
292            }
293        }
294
295        let result = FileRead
296            .execute(
297                &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 50 }),
298                &Context {
299                    dry_run: false,
300                    job_id: "test".into(),
301                    working_dir: std::env::temp_dir(),
302                },
303            )
304            .unwrap();
305
306        assert!(result.success);
307        assert!(result.data["truncated"].as_bool() == Some(true));
308        assert!(result.data["bytes_read"].as_u64().unwrap() <= 50);
309        std::fs::remove_file(&tmp).ok();
310    }
311
312    #[test]
313    fn test_max_bytes_rejects_exceeding_limit() {
314        let result = FileRead.execute(
315            &serde_json::json!({ "path": "/etc/hosts", "max_bytes": 9999999999u64 }),
316            &Context {
317                dry_run: false,
318                job_id: "test".into(),
319                working_dir: std::env::temp_dir(),
320            },
321        );
322        assert!(result.is_err());
323    }
324
325    #[allow(clippy::indexing_slicing)]
326    #[test]
327    fn test_file_read_default_max_bytes() {
328        let mut tmp = std::env::temp_dir();
329        tmp.push("runtimo_test_default_max.txt");
330        std::fs::write(&tmp, "small content").unwrap();
331
332        let result = FileRead
333            .execute(
334                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
335                &Context {
336                    dry_run: false,
337                    job_id: "test".into(),
338                    working_dir: std::env::temp_dir(),
339                },
340            )
341            .unwrap();
342
343        assert!(result.success);
344        assert!(result.data["truncated"].as_bool() == Some(false));
345        std::fs::remove_file(&tmp).ok();
346    }
347
348    #[test]
349    #[allow(clippy::indexing_slicing)]
350    fn test_file_read_json_parsed_for_agents() {
351        let mut tmp = std::env::temp_dir();
352        tmp.push("runtimo_test_agent.json");
353        std::fs::write(&tmp, r#"{"key": "value", "nested": {"a": 1}}"#).unwrap();
354
355        let result = FileRead
356            .execute(
357                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
358                &Context {
359                    dry_run: false,
360                    job_id: "test".into(),
361                    working_dir: std::env::temp_dir(),
362                },
363            )
364            .unwrap();
365
366        assert!(result.success);
367        assert!(result.data["content"].is_object());
368        assert_eq!(result.data["content"]["key"].as_str(), Some("value"));
369        assert_eq!(result.data["content"]["nested"]["a"].as_u64(), Some(1));
370        assert_eq!(result.data["content_type"].as_str(), Some("json"));
371        std::fs::remove_file(&tmp).ok();
372    }
373
374    #[test]
375    fn test_binary_file_detected() {
376        let mut tmp = std::env::temp_dir();
377        tmp.push("runtimo_test_binary.bin");
378        std::fs::write(&tmp, b"hello\x00world").unwrap();
379
380        let result = FileRead
381            .execute(
382                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
383                &Context {
384                    dry_run: false,
385                    job_id: "test".into(),
386                    working_dir: std::env::temp_dir(),
387                },
388            )
389            .unwrap();
390
391        assert!(result.success);
392        assert_eq!(result.data["content_type"].as_str(), Some("binary"));
393        assert_eq!(result.data["bytes_read"].as_u64(), Some(11));
394        std::fs::remove_file(&tmp).ok();
395    }
396
397    #[test]
398    fn test_utf8_boundary_truncation() {
399        // "café" = [99, 97, 102, 195, 169] — é is 2 bytes
400        // Truncate at 4 bytes would split the é character
401        let mut tmp = std::env::temp_dir();
402        tmp.push("runtimo_test_utf8.txt");
403        std::fs::write(&tmp, b"caf\xc3\xa9").unwrap();
404
405        let result = FileRead
406            .execute(
407                &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 4 }),
408                &Context {
409                    dry_run: false,
410                    job_id: "test".into(),
411                    working_dir: std::env::temp_dir(),
412                },
413            )
414            .unwrap();
415
416        assert!(result.success);
417        let content = result.data["content"].as_str().unwrap();
418        assert_eq!(content, "caf");
419        std::fs::remove_file(&tmp).ok();
420    }
421
422    #[test]
423    fn test_bytes_read_reports_raw_bytes() {
424        let mut tmp = std::env::temp_dir();
425        tmp.push("runtimo_test_bytes_read.txt");
426        // UTF-8: "café\n" = 6 bytes (é is 2 bytes)
427        std::fs::write(&tmp, "café\n").unwrap();
428
429        let result = FileRead
430            .execute(
431                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
432                &Context {
433                    dry_run: false,
434                    job_id: "test".into(),
435                    working_dir: std::env::temp_dir(),
436                },
437            )
438            .unwrap();
439
440        assert!(result.success);
441        // bytes_read should be 6 (raw file bytes), not String::len() which is 5
442        assert_eq!(result.data["bytes_read"].as_u64(), Some(6));
443        std::fs::remove_file(&tmp).ok();
444    }
445
446    #[test]
447    fn test_symlink_rejected_by_nofollow() {
448        let link_path = std::env::temp_dir().join("runtimo_nofollow_test");
449        let _ = std::fs::remove_file(&link_path);
450        #[cfg(unix)]
451        {
452            use std::os::unix::fs::symlink;
453            if symlink("/etc/hostname", &link_path).is_ok() {
454                let result = FileRead.execute(
455                    &serde_json::json!({ "path": link_path.to_str().unwrap() }),
456                    &Context {
457                        dry_run: false,
458                        job_id: "test".into(),
459                        working_dir: std::env::temp_dir(),
460                    },
461                );
462                assert!(result.is_err(), "symlink should be rejected by O_NOFOLLOW");
463                std::fs::remove_file(&link_path).ok();
464            }
465        }
466    }
467}