Skip to main content

runtimo_core/capabilities/
file_read.rs

1//! FileRead capability — reads file contents with safety validation.
2//!
3//! Rejects path traversal (`..`), empty paths, non-existent files, and
4//! directories. Returns the file content along with byte count.
5//!
6//! Security: opens file with O_NOFOLLOW to prevent TOCTOU symlink escape,
7//! uses bounded reader (take) regardless of metadata to prevent size bypass,
8//! detects binary content (null bytes or >10% control chars), and handles
9//! UTF-8 boundary splits correctly.
10//!
11//! # Example
12//!
13//! ```rust
14//! use runtimo_core::capabilities::FileRead;
15//! use runtimo_core::capability::Capability;
16//! use serde_json::json;
17//!
18//! let cap = FileRead;
19//! assert_eq!(cap.name(), "FileRead");
20//!
21//! // Schema requires a "path" string:
22//! let schema = cap.schema();
23//! assert!(schema["required"].as_array().unwrap().contains(&json!("path")));
24//! ```
25
26use crate::capability::{CapabilityError, Context, Output, TypedCapability};
27use crate::validation::path::{validate_path, PathContext};
28use serde::{Deserialize, Serialize};
29use serde_json::Value;
30use std::io::Read;
31
32/// Maximum file size allowed for reading (10 MB).
33const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
34
35/// Default max bytes to read when max_bytes is not specified (1 MB).
36const DEFAULT_MAX_BYTES: u64 = 1024 * 1024;
37
38/// Input parameters for [`FileRead::execute`].
39///
40/// Accepts a file path and an optional byte limit. The path is validated
41/// against the configured allowed-prefix list before any I/O occurs.
42#[derive(Debug, Clone, Serialize, Deserialize)]
43#[allow(clippy::exhaustive_structs)] // args struct — fields are the contract
44pub struct FileReadArgs {
45    /// Absolute or relative path to the file to read.
46    pub path: String,
47    /// Maximum bytes to read (default: 1 MB, max: 10 MB, minimum: 1).
48    pub max_bytes: Option<u64>,
49}
50
51/// Capability that reads the contents of a file.
52///
53/// Opens file with O_NOFOLLOW to prevent TOCTOU symlink escape,
54/// uses bounded reader regardless of metadata to prevent size bypass,
55/// detects binary content, and handles UTF-8 boundary splits.
56#[allow(clippy::exhaustive_structs)] // unit struct used as trait-object marker
57pub struct FileRead;
58
59impl TypedCapability for FileRead {
60    type Args = FileReadArgs;
61
62    fn name(&self) -> &'static str {
63        "FileRead"
64    }
65
66    fn description(&self) -> &'static str {
67        "read file. path validated. no dirs, no traversal."
68    }
69
70    fn schema(&self) -> Value {
71        serde_json::json!({
72            "type": "object",
73            "properties": {
74                "path": { "type": "string" },
75                "max_bytes": { "type": "integer", "minimum": 1, "maximum": 10485760 }
76            },
77            "required": ["path"]
78        })
79    }
80
81    fn execute(
82        &self,
83        args: FileReadArgs,
84        _ctx: &Context,
85    ) -> std::result::Result<Output, CapabilityError> {
86        let ctx = PathContext {
87            require_exists: true,
88            require_file: true,
89            ..Default::default()
90        };
91
92        let path = validate_path(&args.path, &ctx)
93            .map_err(|e| CapabilityError::PermissionDenied(format!("path validation: {}", e)))?;
94
95        let max_bytes = args.max_bytes.unwrap_or(DEFAULT_MAX_BYTES);
96        if max_bytes == 0 {
97            return Err(CapabilityError::InvalidArgs(
98                "max_bytes must be >= 1".into(),
99            ));
100        }
101        if max_bytes > MAX_FILE_SIZE {
102            return Err(CapabilityError::InvalidArgs(format!(
103                "max_bytes {} exceeds maximum allowed {}",
104                max_bytes, MAX_FILE_SIZE
105            )));
106        }
107
108        // P0 FIX: Open with O_NOFOLLOW to prevent TOCTOU symlink escape.
109        // Open immediately after validation to minimize TOCTOU window.
110        let file = open_file_nofollow(&path).map_err(CapabilityError::Io)?;
111
112        // P0 FIX: Always use bounded reader (take) regardless of metadata.
113        // Prevents TOCTOU size bypass where file grows between stat and read.
114        let mut limited = file.take(max_bytes);
115
116        // Read raw bytes to handle binary detection and UTF-8 boundaries correctly.
117        let mut raw_bytes = Vec::with_capacity(std::cmp::min(
118            usize::try_from(max_bytes).unwrap_or(usize::MAX),
119            64 * 1024,
120        ));
121        let bytes_read = limited
122            .read_to_end(&mut raw_bytes)
123            .map_err(CapabilityError::Io)?;
124
125        let bytes_read = bytes_read as u64;
126        let truncated = bytes_read >= max_bytes;
127
128        // P1 FIX: Detect binary content (null bytes in the data).
129        let is_binary = detect_binary(&raw_bytes);
130
131        let data = if is_binary {
132            serde_json::json!({
133                "content_type": "binary",
134                "path": path.display().to_string(),
135                "bytes_read": bytes_read,
136                "truncated": truncated,
137                "message": "Binary file detected — content not returned as text",
138            })
139        } else {
140            // P1 FIX: Convert raw bytes to String, trimming to valid UTF-8 boundary.
141            let content = bytes_to_utf8_string(&raw_bytes);
142
143            // P1 FIX: Parse JSON from slice (avoids double memory vs from_str).
144            if path.extension().is_some_and(|ext| ext == "json") {
145                match serde_json::from_slice::<Value>(raw_bytes.as_slice()) {
146                    Ok(parsed) => serde_json::json!({
147                        "content": parsed,
148                        "content_type": "json",
149                        "path": path.display().to_string(),
150                        "bytes_read": bytes_read,
151                        "truncated": truncated,
152                    }),
153                    Err(_) => serde_json::json!({
154                        "content": content,
155                        "content_type": "text",
156                        "path": path.display().to_string(),
157                        "bytes_read": bytes_read,
158                        "truncated": truncated,
159                    }),
160                }
161            } else {
162                serde_json::json!({
163                    "content": content,
164                    "content_type": "text",
165                    "path": path.display().to_string(),
166                    "bytes_read": bytes_read,
167                    "truncated": truncated,
168                })
169            }
170        };
171
172        let mut out = Output::ok(format!(
173            "Read {} bytes from {}{}",
174            bytes_read,
175            path.display(),
176            if truncated { " (truncated)" } else { "" }
177        ));
178        out.data = Some(data);
179        Ok(out)
180    }
181}
182
183/// Open a file with O_NOFOLLOW to prevent TOCTOU symlink replacement attacks.
184#[cfg(unix)]
185fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
186    use std::os::unix::fs::OpenOptionsExt;
187    std::fs::OpenOptions::new()
188        .read(true)
189        .custom_flags(libc::O_NOFOLLOW)
190        .open(path)
191}
192
193#[cfg(not(unix))]
194fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
195    std::fs::File::open(path)
196}
197
198/// Detect binary content by checking for null bytes and high ratio of
199/// non-printable control characters. Uses a threshold of >10% control chars
200/// (excluding common whitespace: \n, \r, \t) to classify as binary.
201fn detect_binary(data: &[u8]) -> bool {
202    if data.is_empty() {
203        return false;
204    }
205    // Fast path: null byte = definitely binary
206    if data.contains(&0) {
207        return true;
208    }
209    // Count control characters (excluding common whitespace)
210    let control_count = data
211        .iter()
212        .filter(|&&b| b < 0x20 && b != b'\n' && b != b'\r' && b != b'\t')
213        .count();
214    // If more than 10% are control chars, treat as binary
215    // Use division to avoid potential multiplication overflow
216    control_count > data.len() / 10
217}
218
219/// Convert raw bytes to a UTF-8 String, trimming trailing bytes that would
220/// split a multibyte character boundary.
221fn bytes_to_utf8_string(bytes: &[u8]) -> String {
222    match String::from_utf8(bytes.to_vec()) {
223        Ok(s) => s,
224        Err(e) => {
225            let valid_up_to = e.utf8_error().valid_up_to();
226            bytes
227                .get(..valid_up_to)
228                .map(|s| String::from_utf8(s.to_vec()).unwrap_or_default())
229                .unwrap_or_default()
230        }
231    }
232}
233
234#[cfg(test)]
235mod tests {
236    use super::*;
237    use std::io::Write;
238
239    fn test_ctx() -> Context {
240        Context {
241            dry_run: false,
242            job_id: "test".into(),
243            working_dir: std::env::temp_dir(),
244        }
245    }
246
247    #[allow(clippy::unwrap_used, clippy::unused_result_ok)]
248    #[test]
249    fn reads_existing_file() {
250        let mut tmp = std::env::temp_dir();
251        tmp.push("runtimo_test_read.txt");
252        {
253            let mut f = std::fs::File::create(&tmp).unwrap();
254            writeln!(f, "hello world").unwrap();
255        }
256
257        let result = TypedCapability::execute(
258            &FileRead,
259            FileReadArgs {
260                path: tmp.to_str().unwrap().to_string(),
261                max_bytes: None,
262            },
263            &test_ctx(),
264        )
265        .unwrap();
266
267        assert!(result.status == "ok");
268        let content = result
269            .data
270            .as_ref()
271            .and_then(|d| d.get("content"))
272            .and_then(|v| v.as_str())
273            .unwrap_or("")
274            .to_string();
275        assert!(content.contains("hello world"));
276        std::fs::remove_file(&tmp).ok();
277    }
278
279    #[allow(clippy::unwrap_used)]
280    #[test]
281    fn rejects_missing_file() {
282        let result = TypedCapability::execute(
283            &FileRead,
284            FileReadArgs {
285                path: "/tmp/nonexistent_runtimo_test.txt".to_string(),
286                max_bytes: None,
287            },
288            &test_ctx(),
289        );
290        let err = result.unwrap_err().to_string();
291        assert!(
292            err.contains("does not exist") || err.contains("not found"),
293            "Expected error about missing file, got: {}",
294            err
295        );
296    }
297
298    #[test]
299    fn rejects_empty_path() {
300        assert!(TypedCapability::execute(
301            &FileRead,
302            FileReadArgs {
303                path: String::new(),
304                max_bytes: None
305            },
306            &test_ctx()
307        )
308        .is_err());
309    }
310
311    #[allow(clippy::indexing_slicing)]
312    #[allow(clippy::unused_result_ok)]
313    #[test]
314    #[allow(clippy::unwrap_used)]
315    fn test_max_bytes_limits_output() {
316        let mut tmp = std::env::temp_dir();
317        tmp.push("runtimo_test_max_bytes.txt");
318        {
319            let mut f = std::fs::File::create(&tmp).unwrap();
320            for _ in 0..100 {
321                writeln!(f, "hello world line").unwrap();
322            }
323        }
324
325        let result = TypedCapability::execute(
326            &FileRead,
327            FileReadArgs {
328                path: tmp.to_str().unwrap().to_string(),
329                max_bytes: Some(50),
330            },
331            &test_ctx(),
332        )
333        .unwrap();
334
335        assert!(result.status == "ok");
336        assert_eq!(
337            result
338                .data
339                .as_ref()
340                .and_then(|d| d.get("truncated"))
341                .and_then(|v| v.as_bool()),
342            Some(true)
343        );
344        assert!(
345            result
346                .data
347                .as_ref()
348                .and_then(|d| d.get("bytes_read"))
349                .and_then(|v| v.as_u64())
350                .unwrap_or(9999)
351                <= 50
352        );
353        std::fs::remove_file(&tmp).ok();
354    }
355
356    #[test]
357    fn test_max_bytes_rejects_exceeding_limit() {
358        let result = TypedCapability::execute(
359            &FileRead,
360            FileReadArgs {
361                path: "/etc/hosts".to_string(),
362                max_bytes: Some(9999999999u64),
363            },
364            &test_ctx(),
365        );
366        assert!(result.is_err());
367    }
368
369    #[allow(clippy::indexing_slicing)]
370    #[test]
371    fn test_file_read_default_max_bytes() {
372        let mut tmp = std::env::temp_dir();
373        tmp.push("runtimo_test_default_max.txt");
374        std::fs::write(&tmp, "small content").unwrap();
375
376        let result = TypedCapability::execute(
377            &FileRead,
378            FileReadArgs {
379                path: tmp.to_str().unwrap().to_string(),
380                max_bytes: None,
381            },
382            &test_ctx(),
383        )
384        .unwrap();
385
386        assert!(result.status == "ok");
387        assert_eq!(
388            result
389                .data
390                .as_ref()
391                .and_then(|d| d.get("truncated"))
392                .and_then(|v| v.as_bool()),
393            Some(false)
394        );
395        std::fs::remove_file(&tmp).ok();
396    }
397
398    #[test]
399    #[allow(clippy::indexing_slicing)]
400    fn test_file_read_json_parsed_for_agents() {
401        let mut tmp = std::env::temp_dir();
402        tmp.push("runtimo_test_agent.json");
403        std::fs::write(&tmp, r#"{"key": "value", "nested": {"a": 1}}"#).unwrap();
404
405        let result = TypedCapability::execute(
406            &FileRead,
407            FileReadArgs {
408                path: tmp.to_str().unwrap().to_string(),
409                max_bytes: None,
410            },
411            &test_ctx(),
412        )
413        .unwrap();
414
415        assert!(result.status == "ok");
416        let data = result.data.as_ref().unwrap();
417        assert!(data.get("content").unwrap().is_object());
418        assert_eq!(
419            data.get("content")
420                .unwrap()
421                .get("key")
422                .and_then(|v| v.as_str()),
423            Some("value")
424        );
425        assert_eq!(
426            data.get("content")
427                .unwrap()
428                .get("nested")
429                .unwrap()
430                .get("a")
431                .and_then(|v| v.as_u64()),
432            Some(1)
433        );
434        assert_eq!(
435            data.get("content_type").and_then(|v| v.as_str()),
436            Some("json")
437        );
438        std::fs::remove_file(&tmp).ok();
439    }
440
441    #[test]
442    fn test_binary_file_detected() {
443        let mut tmp = std::env::temp_dir();
444        tmp.push("runtimo_test_binary.bin");
445        std::fs::write(&tmp, b"hello\x00world").unwrap();
446
447        let result = TypedCapability::execute(
448            &FileRead,
449            FileReadArgs {
450                path: tmp.to_str().unwrap().to_string(),
451                max_bytes: None,
452            },
453            &test_ctx(),
454        )
455        .unwrap();
456
457        assert!(result.status == "ok");
458        let data = result.data.as_ref().unwrap();
459        assert_eq!(
460            data.get("content_type").and_then(|v| v.as_str()),
461            Some("binary")
462        );
463        assert_eq!(data.get("bytes_read").and_then(|v| v.as_u64()), Some(11));
464        std::fs::remove_file(&tmp).ok();
465    }
466
467    #[test]
468    fn test_utf8_boundary_truncation() {
469        // "café" = [99, 97, 102, 195, 169] — é is 2 bytes
470        // Truncate at 4 bytes would split the é character
471        let mut tmp = std::env::temp_dir();
472        tmp.push("runtimo_test_utf8.txt");
473        std::fs::write(&tmp, b"caf\xc3\xa9").unwrap();
474
475        let result = TypedCapability::execute(
476            &FileRead,
477            FileReadArgs {
478                path: tmp.to_str().unwrap().to_string(),
479                max_bytes: Some(4),
480            },
481            &test_ctx(),
482        )
483        .unwrap();
484
485        assert!(result.status == "ok");
486        let content = result
487            .data
488            .as_ref()
489            .and_then(|d| d.get("content"))
490            .and_then(|v| v.as_str())
491            .unwrap_or("");
492        assert_eq!(content, "caf");
493        std::fs::remove_file(&tmp).ok();
494    }
495
496    #[test]
497    fn test_bytes_read_reports_raw_bytes() {
498        let mut tmp = std::env::temp_dir();
499        tmp.push("runtimo_test_bytes_read.txt");
500        // UTF-8: "café\n" = 6 bytes (é is 2 bytes)
501        std::fs::write(&tmp, "café\n").unwrap();
502
503        let result = TypedCapability::execute(
504            &FileRead,
505            FileReadArgs {
506                path: tmp.to_str().unwrap().to_string(),
507                max_bytes: None,
508            },
509            &test_ctx(),
510        )
511        .unwrap();
512
513        assert!(result.status == "ok");
514        // bytes_read should be 6 (raw file bytes), not String::len() which is 5
515        assert_eq!(
516            result
517                .data
518                .as_ref()
519                .and_then(|d| d.get("bytes_read"))
520                .and_then(|v| v.as_u64()),
521            Some(6)
522        );
523        std::fs::remove_file(&tmp).ok();
524    }
525
526    #[test]
527    fn test_symlink_rejected_by_nofollow() {
528        let link_path = std::env::temp_dir().join("runtimo_nofollow_test");
529        let _ = std::fs::remove_file(&link_path);
530        #[cfg(unix)]
531        {
532            use std::os::unix::fs::symlink;
533            if symlink("/etc/hostname", &link_path).is_ok() {
534                let result = TypedCapability::execute(
535                    &FileRead,
536                    FileReadArgs {
537                        path: link_path.to_str().unwrap().to_string(),
538                        max_bytes: None,
539                    },
540                    &test_ctx(),
541                );
542                assert!(result.is_err(), "symlink should be rejected by O_NOFOLLOW");
543                std::fs::remove_file(&link_path).ok();
544            }
545        }
546    }
547}