Skip to main content

runtimo_core/capabilities/
file_read.rs

1//! FileRead capability — reads file contents with safety validation.
2//!
3//! Rejects path traversal (`..`), empty paths, non-existent files, and
4//! directories. Returns the file content along with byte count.
5//!
6//! Security: opens file with O_NOFOLLOW to prevent TOCTOU symlink escape,
7//! uses bounded reader (take) regardless of metadata to prevent size bypass,
8//! detects binary content, and handles UTF-8 boundary splits correctly.
9//!
10//! # Example
11//!
12//! ```rust
13//! use runtimo_core::capabilities::FileRead;
14//! use runtimo_core::capability::Capability;
15//! use serde_json::json;
16//!
17//! let cap = FileRead;
18//! assert_eq!(cap.name(), "FileRead");
19//!
20//! // Schema requires a "path" string:
21//! let schema = cap.schema();
22//! assert!(schema["required"].as_array().unwrap().contains(&json!("path")));
23//! ```
24
25use crate::capability::{Capability, Context, Output};
26use crate::validation::path::{validate_path, PathContext};
27use crate::{Error, Result};
28use serde::{Deserialize, Serialize};
29use serde_json::Value;
30use std::io::Read;
31
32/// Maximum file size allowed for reading (10 MB).
33const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
34
35/// Default max bytes to read when max_bytes is not specified (1 MB).
36const DEFAULT_MAX_BYTES: u64 = 1024 * 1024;
37
38/// Arguments for the [`FileRead`] capability.
39#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct FileReadArgs {
41    /// Absolute or relative path to the file to read.
42    pub path: String,
43    /// Maximum bytes to read (default: 1 MB, max: 10 MB).
44    pub max_bytes: Option<u64>,
45}
46
47/// Capability that reads the contents of a file.
48///
49/// Opens file with O_NOFOLLOW to prevent TOCTOU symlink escape,
50/// uses bounded reader regardless of metadata to prevent size bypass,
51/// detects binary content, and handles UTF-8 boundary splits.
52#[allow(clippy::exhaustive_structs)] // unit struct used as trait-object marker
53pub struct FileRead;
54
55impl Capability for FileRead {
56    fn name(&self) -> &'static str {
57        "FileRead"
58    }
59
60    fn description(&self) -> &'static str {
61        "read file. path validated. no dirs, no traversal."
62    }
63
64    fn schema(&self) -> Value {
65        serde_json::json!({
66            "type": "object",
67            "properties": {
68                "path": { "type": "string" },
69                "max_bytes": { "type": "integer", "minimum": 1, "maximum": 10485760 }
70            },
71            "required": ["path"]
72        })
73    }
74
75    fn validate(&self, args: &Value) -> Result<()> {
76        let args: FileReadArgs = serde_json::from_value(args.clone())
77            .map_err(|e| Error::SchemaValidationFailed(e.to_string()))?;
78
79        let ctx = PathContext {
80            require_exists: true,
81            require_file: true,
82            ..Default::default()
83        };
84
85        validate_path(&args.path, &ctx).map_err(Error::SchemaValidationFailed)?;
86
87        Ok(())
88    }
89
90    fn execute(&self, args: &Value, _ctx: &Context) -> Result<Output> {
91        let args: FileReadArgs = serde_json::from_value(args.clone())
92            .map_err(|e| Error::ExecutionFailed(e.to_string()))?;
93
94        let ctx = PathContext {
95            require_exists: true,
96            require_file: true,
97            ..Default::default()
98        };
99
100        let path = validate_path(&args.path, &ctx)
101            .map_err(|e| Error::ExecutionFailed(format!("path validation: {}", e)))?;
102
103        let max_bytes = args.max_bytes.unwrap_or(DEFAULT_MAX_BYTES);
104        if max_bytes > MAX_FILE_SIZE {
105            return Err(Error::ExecutionFailed(format!(
106                "max_bytes {} exceeds maximum allowed {}",
107                max_bytes, MAX_FILE_SIZE
108            )));
109        }
110
111        // P0 FIX: Open with O_NOFOLLOW to prevent TOCTOU symlink escape.
112        // Open immediately after validation to minimize TOCTOU window.
113        let file = open_file_nofollow(&path)
114            .map_err(|e| Error::ExecutionFailed(format!("open {}: {}", path.display(), e)))?;
115
116        // P0 FIX: Always use bounded reader (take) regardless of metadata.
117        // Prevents TOCTOU size bypass where file grows between stat and read.
118        let mut limited = file.take(max_bytes);
119
120        // Read raw bytes to handle binary detection and UTF-8 boundaries correctly.
121        let mut raw_bytes = Vec::with_capacity(std::cmp::min(
122            usize::try_from(max_bytes).unwrap_or(usize::MAX),
123            64 * 1024,
124        ));
125        let bytes_read = limited
126            .read_to_end(&mut raw_bytes)
127            .map_err(|e| Error::ExecutionFailed(format!("read {}: {}", path.display(), e)))?;
128
129        let bytes_read = bytes_read as u64;
130        let truncated = bytes_read >= max_bytes;
131
132        // P1 FIX: Detect binary content (null bytes in the data).
133        let is_binary = detect_binary(&raw_bytes);
134
135        let data = if is_binary {
136            serde_json::json!({
137                "content_type": "binary",
138                "path": path.display().to_string(),
139                "bytes_read": bytes_read,
140                "truncated": truncated,
141                "message": "Binary file detected — content not returned as text",
142            })
143        } else {
144            // P1 FIX: Convert raw bytes to String, trimming to valid UTF-8 boundary.
145            let content = bytes_to_utf8_string(&raw_bytes);
146
147            // P1 FIX: Parse JSON from slice (avoids double memory vs from_str).
148            if path.extension().is_some_and(|ext| ext == "json") {
149                match serde_json::from_slice::<Value>(raw_bytes.as_slice()) {
150                    Ok(parsed) => serde_json::json!({
151                        "content": parsed,
152                        "content_type": "json",
153                        "path": path.display().to_string(),
154                        "bytes_read": bytes_read,
155                        "truncated": truncated,
156                    }),
157                    Err(_) => serde_json::json!({
158                        "content": content,
159                        "content_type": "text",
160                        "path": path.display().to_string(),
161                        "bytes_read": bytes_read,
162                        "truncated": truncated,
163                    }),
164                }
165            } else {
166                serde_json::json!({
167                    "content": content,
168                    "content_type": "text",
169                    "path": path.display().to_string(),
170                    "bytes_read": bytes_read,
171                    "truncated": truncated,
172                })
173            }
174        };
175
176        Ok(Output {
177            success: true,
178            data,
179            message: Some(format!(
180                "Read {} bytes from {}{}",
181                bytes_read,
182                path.display(),
183                if truncated { " (truncated)" } else { "" }
184            )),
185        })
186    }
187}
188
189/// Open a file with O_NOFOLLOW to prevent TOCTOU symlink replacement attacks.
190#[cfg(unix)]
191fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
192    use std::os::unix::fs::OpenOptionsExt;
193    std::fs::OpenOptions::new()
194        .read(true)
195        .custom_flags(libc::O_NOFOLLOW)
196        .open(path)
197}
198
199#[cfg(not(unix))]
200fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
201    std::fs::File::open(path)
202}
203
204/// Detect binary content by checking for null bytes.
205fn detect_binary(data: &[u8]) -> bool {
206    data.contains(&0)
207}
208
209/// Convert raw bytes to a UTF-8 String, trimming trailing bytes that would
210/// split a multibyte character boundary.
211fn bytes_to_utf8_string(bytes: &[u8]) -> String {
212    match String::from_utf8(bytes.to_vec()) {
213        Ok(s) => s,
214        Err(e) => {
215            let valid_up_to = e.utf8_error().valid_up_to();
216            bytes
217                .get(..valid_up_to)
218                .map(|s| String::from_utf8(s.to_vec()).unwrap_or_default())
219                .unwrap_or_default()
220        }
221    }
222}
223
224#[cfg(test)]
225mod tests {
226    use super::*;
227    use std::io::Write;
228
229    #[allow(clippy::unwrap_used, clippy::unused_result_ok)]
230    #[test]
231    fn reads_existing_file() {
232        let mut tmp = std::env::temp_dir();
233        tmp.push("runtimo_test_read.txt");
234        {
235            let mut f = std::fs::File::create(&tmp).unwrap();
236            writeln!(f, "hello world").unwrap();
237        }
238
239        let result = FileRead
240            .execute(
241                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
242                &Context {
243                    dry_run: false,
244                    job_id: "test".into(),
245                    working_dir: std::env::temp_dir(),
246                },
247            )
248            .unwrap();
249
250        assert!(result.success);
251        assert!(result
252            .data
253            .get("content")
254            .and_then(|v| v.as_str())
255            .unwrap()
256            .contains("hello world"));
257        std::fs::remove_file(&tmp).ok();
258    }
259
260    #[allow(clippy::unwrap_used)]
261    #[test]
262    fn rejects_missing_file() {
263        let err = FileRead
264            .validate(&serde_json::json!({
265                "path": "/tmp/nonexistent_runtimo_test.txt"
266            }))
267            .unwrap_err();
268        assert!(err.to_string().contains("does not exist"));
269    }
270
271    #[test]
272    fn rejects_empty_path() {
273        assert!(FileRead
274            .validate(&serde_json::json!({ "path": "" }))
275            .is_err());
276    }
277
278    #[allow(clippy::indexing_slicing)]
279    #[allow(clippy::unused_result_ok)]
280    #[test]
281    #[allow(clippy::unwrap_used, clippy::unused_result_ok)]
282    fn test_max_bytes_limits_output() {
283        let mut tmp = std::env::temp_dir();
284        tmp.push("runtimo_test_max_bytes.txt");
285        {
286            let mut f = std::fs::File::create(&tmp).unwrap();
287            for _ in 0..100 {
288                writeln!(f, "hello world line").unwrap();
289            }
290        }
291
292        let result = FileRead
293            .execute(
294                &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 50 }),
295                &Context {
296                    dry_run: false,
297                    job_id: "test".into(),
298                    working_dir: std::env::temp_dir(),
299                },
300            )
301            .unwrap();
302
303        assert!(result.success);
304        assert!(result.data["truncated"].as_bool() == Some(true));
305        assert!(result.data["bytes_read"].as_u64().unwrap() <= 50);
306        std::fs::remove_file(&tmp).ok();
307    }
308
309    #[test]
310    fn test_max_bytes_rejects_exceeding_limit() {
311        let result = FileRead.execute(
312            &serde_json::json!({ "path": "/etc/hosts", "max_bytes": 9999999999u64 }),
313            &Context {
314                dry_run: false,
315                job_id: "test".into(),
316                working_dir: std::env::temp_dir(),
317            },
318        );
319        assert!(result.is_err());
320    }
321
322    #[allow(clippy::indexing_slicing)]
323    #[test]
324    fn test_file_read_default_max_bytes() {
325        let mut tmp = std::env::temp_dir();
326        tmp.push("runtimo_test_default_max.txt");
327        std::fs::write(&tmp, "small content").unwrap();
328
329        let result = FileRead
330            .execute(
331                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
332                &Context {
333                    dry_run: false,
334                    job_id: "test".into(),
335                    working_dir: std::env::temp_dir(),
336                },
337            )
338            .unwrap();
339
340        assert!(result.success);
341        assert!(result.data["truncated"].as_bool() == Some(false));
342        std::fs::remove_file(&tmp).ok();
343    }
344
345    #[test]
346    #[allow(clippy::indexing_slicing)]
347    fn test_file_read_json_parsed_for_agents() {
348        let mut tmp = std::env::temp_dir();
349        tmp.push("runtimo_test_agent.json");
350        std::fs::write(&tmp, r#"{"key": "value", "nested": {"a": 1}}"#).unwrap();
351
352        let result = FileRead
353            .execute(
354                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
355                &Context {
356                    dry_run: false,
357                    job_id: "test".into(),
358                    working_dir: std::env::temp_dir(),
359                },
360            )
361            .unwrap();
362
363        assert!(result.success);
364        assert!(result.data["content"].is_object());
365        assert_eq!(result.data["content"]["key"].as_str(), Some("value"));
366        assert_eq!(result.data["content"]["nested"]["a"].as_u64(), Some(1));
367        assert_eq!(result.data["content_type"].as_str(), Some("json"));
368        std::fs::remove_file(&tmp).ok();
369    }
370
371    #[test]
372    fn test_binary_file_detected() {
373        let mut tmp = std::env::temp_dir();
374        tmp.push("runtimo_test_binary.bin");
375        std::fs::write(&tmp, b"hello\x00world").unwrap();
376
377        let result = FileRead
378            .execute(
379                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
380                &Context {
381                    dry_run: false,
382                    job_id: "test".into(),
383                    working_dir: std::env::temp_dir(),
384                },
385            )
386            .unwrap();
387
388        assert!(result.success);
389        assert_eq!(result.data["content_type"].as_str(), Some("binary"));
390        assert_eq!(result.data["bytes_read"].as_u64(), Some(11));
391        std::fs::remove_file(&tmp).ok();
392    }
393
394    #[test]
395    fn test_utf8_boundary_truncation() {
396        // "café" = [99, 97, 102, 195, 169] — é is 2 bytes
397        // Truncate at 4 bytes would split the é character
398        let mut tmp = std::env::temp_dir();
399        tmp.push("runtimo_test_utf8.txt");
400        std::fs::write(&tmp, b"caf\xc3\xa9").unwrap();
401
402        let result = FileRead
403            .execute(
404                &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 4 }),
405                &Context {
406                    dry_run: false,
407                    job_id: "test".into(),
408                    working_dir: std::env::temp_dir(),
409                },
410            )
411            .unwrap();
412
413        assert!(result.success);
414        let content = result.data["content"].as_str().unwrap();
415        assert_eq!(content, "caf");
416        std::fs::remove_file(&tmp).ok();
417    }
418
419    #[test]
420    fn test_bytes_read_reports_raw_bytes() {
421        let mut tmp = std::env::temp_dir();
422        tmp.push("runtimo_test_bytes_read.txt");
423        // UTF-8: "café\n" = 6 bytes (é is 2 bytes)
424        std::fs::write(&tmp, "café\n").unwrap();
425
426        let result = FileRead
427            .execute(
428                &serde_json::json!({ "path": tmp.to_str().unwrap() }),
429                &Context {
430                    dry_run: false,
431                    job_id: "test".into(),
432                    working_dir: std::env::temp_dir(),
433                },
434            )
435            .unwrap();
436
437        assert!(result.success);
438        // bytes_read should be 6 (raw file bytes), not String::len() which is 5
439        assert_eq!(result.data["bytes_read"].as_u64(), Some(6));
440        std::fs::remove_file(&tmp).ok();
441    }
442
443    #[test]
444    fn test_symlink_rejected_by_nofollow() {
445        let link_path = std::env::temp_dir().join("runtimo_nofollow_test");
446        let _ = std::fs::remove_file(&link_path);
447        #[cfg(unix)]
448        {
449            use std::os::unix::fs::symlink;
450            if symlink("/etc/hostname", &link_path).is_ok() {
451                let result = FileRead.execute(
452                    &serde_json::json!({ "path": link_path.to_str().unwrap() }),
453                    &Context {
454                        dry_run: false,
455                        job_id: "test".into(),
456                        working_dir: std::env::temp_dir(),
457                    },
458                );
459                assert!(result.is_err(), "symlink should be rejected by O_NOFOLLOW");
460                std::fs::remove_file(&link_path).ok();
461            }
462        }
463    }
464}