runtimo_core/capabilities/
file_read.rs1use crate::capability::{Capability, Context, Output};
26use crate::validation::path::{validate_path, PathContext};
27use crate::{Error, Result};
28use serde::{Deserialize, Serialize};
29use serde_json::Value;
30use std::io::Read;
31
32const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
34
35const DEFAULT_MAX_BYTES: u64 = 1024 * 1024;
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct FileReadArgs {
41 pub path: String,
43 pub max_bytes: Option<u64>,
45}
46
47pub struct FileRead;
53
54impl Capability for FileRead {
55 fn name(&self) -> &'static str {
56 "FileRead"
57 }
58
59 fn description(&self) -> &'static str {
60 "Read the contents of a file. Validates path existence, rejects directories and path traversal."
61 }
62
63 fn schema(&self) -> Value {
64 serde_json::json!({
65 "type": "object",
66 "properties": {
67 "path": { "type": "string" },
68 "max_bytes": { "type": "integer", "minimum": 1, "maximum": 10485760 }
69 },
70 "required": ["path"]
71 })
72 }
73
74 fn validate(&self, args: &Value) -> Result<()> {
75 let args: FileReadArgs = serde_json::from_value(args.clone())
76 .map_err(|e| Error::SchemaValidationFailed(e.to_string()))?;
77
78 let ctx = PathContext {
79 require_exists: true,
80 require_file: true,
81 ..Default::default()
82 };
83
84 validate_path(&args.path, &ctx).map_err(Error::SchemaValidationFailed)?;
85
86 Ok(())
87 }
88
89 fn execute(&self, args: &Value, _ctx: &Context) -> Result<Output> {
90 let args: FileReadArgs = serde_json::from_value(args.clone())
91 .map_err(|e| Error::ExecutionFailed(e.to_string()))?;
92
93 let ctx = PathContext {
94 require_exists: true,
95 require_file: true,
96 ..Default::default()
97 };
98
99 let path = validate_path(&args.path, &ctx)
100 .map_err(|e| Error::ExecutionFailed(format!("path validation: {}", e)))?;
101
102 let max_bytes = args.max_bytes.unwrap_or(DEFAULT_MAX_BYTES);
103 if max_bytes > MAX_FILE_SIZE {
104 return Err(Error::ExecutionFailed(format!(
105 "max_bytes {} exceeds maximum allowed {}",
106 max_bytes, MAX_FILE_SIZE
107 )));
108 }
109
110 let file = open_file_nofollow(&path)
113 .map_err(|e| Error::ExecutionFailed(format!("open {}: {}", path.display(), e)))?;
114
115 let mut limited = file.take(max_bytes);
118
119 let mut raw_bytes = Vec::with_capacity(
121 std::cmp::min(max_bytes as usize, 64 * 1024),
122 );
123 let bytes_read = limited
124 .read_to_end(&mut raw_bytes)
125 .map_err(|e| Error::ExecutionFailed(format!("read {}: {}", path.display(), e)))?;
126
127 let bytes_read = bytes_read as u64;
128 let truncated = bytes_read >= max_bytes;
129
130 let is_binary = detect_binary(&raw_bytes);
132
133 let data = if is_binary {
134 serde_json::json!({
135 "content_type": "binary",
136 "path": path.display().to_string(),
137 "bytes_read": bytes_read,
138 "truncated": truncated,
139 "message": "Binary file detected — content not returned as text",
140 })
141 } else {
142 let content = bytes_to_utf8_string(&raw_bytes);
144
145 if path.extension().is_some_and(|ext| ext == "json") {
147 match serde_json::from_slice::<Value>(raw_bytes.as_slice()) {
148 Ok(parsed) => serde_json::json!({
149 "content": parsed,
150 "content_type": "json",
151 "path": path.display().to_string(),
152 "bytes_read": bytes_read,
153 "truncated": truncated,
154 }),
155 Err(_) => serde_json::json!({
156 "content": content,
157 "content_type": "text",
158 "path": path.display().to_string(),
159 "bytes_read": bytes_read,
160 "truncated": truncated,
161 }),
162 }
163 } else {
164 serde_json::json!({
165 "content": content,
166 "content_type": "text",
167 "path": path.display().to_string(),
168 "bytes_read": bytes_read,
169 "truncated": truncated,
170 })
171 }
172 };
173
174 Ok(Output {
175 success: true,
176 data,
177 message: Some(format!(
178 "Read {} bytes from {}{}",
179 bytes_read,
180 path.display(),
181 if truncated { " (truncated)" } else { "" }
182 )),
183 })
184 }
185}
186
187#[cfg(unix)]
189fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
190 use std::os::unix::fs::OpenOptionsExt;
191 std::fs::OpenOptions::new()
192 .read(true)
193 .custom_flags(libc::O_NOFOLLOW)
194 .open(path)
195}
196
197#[cfg(not(unix))]
198fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
199 std::fs::File::open(path)
200}
201
202fn detect_binary(data: &[u8]) -> bool {
204 data.contains(&0)
205}
206
207fn bytes_to_utf8_string(bytes: &[u8]) -> String {
210 match String::from_utf8(bytes.to_vec()) {
211 Ok(s) => s,
212 Err(e) => {
213 let valid_up_to = e.utf8_error().valid_up_to();
214 String::from_utf8(bytes[..valid_up_to].to_vec())
215 .unwrap_or_else(|_| String::new())
216 }
217 }
218}
219
220#[cfg(test)]
221mod tests {
222 use super::*;
223 use std::io::Write;
224
225 #[test]
226 fn reads_existing_file() {
227 let mut tmp = std::env::temp_dir();
228 tmp.push("runtimo_test_read.txt");
229 {
230 let mut f = std::fs::File::create(&tmp).unwrap();
231 writeln!(f, "hello world").unwrap();
232 }
233
234 let result = FileRead
235 .execute(
236 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
237 &Context {
238 dry_run: false,
239 job_id: "test".into(),
240 working_dir: std::env::temp_dir(),
241 },
242 )
243 .unwrap();
244
245 assert!(result.success);
246 assert!(result.data["content"]
247 .as_str()
248 .unwrap()
249 .contains("hello world"));
250 std::fs::remove_file(&tmp).ok();
251 }
252
253 #[test]
254 fn rejects_missing_file() {
255 let err = FileRead
256 .validate(&serde_json::json!({
257 "path": "/tmp/nonexistent_runtimo_test.txt"
258 }))
259 .unwrap_err();
260 assert!(err.to_string().contains("does not exist"));
261 }
262
263 #[test]
264 fn rejects_empty_path() {
265 assert!(FileRead
266 .validate(&serde_json::json!({ "path": "" }))
267 .is_err());
268 }
269
270 #[test]
271 fn test_max_bytes_limits_output() {
272 let mut tmp = std::env::temp_dir();
273 tmp.push("runtimo_test_max_bytes.txt");
274 {
275 let mut f = std::fs::File::create(&tmp).unwrap();
276 for _ in 0..100 {
277 writeln!(f, "hello world line").unwrap();
278 }
279 }
280
281 let result = FileRead
282 .execute(
283 &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 50 }),
284 &Context {
285 dry_run: false,
286 job_id: "test".into(),
287 working_dir: std::env::temp_dir(),
288 },
289 )
290 .unwrap();
291
292 assert!(result.success);
293 assert!(result.data["truncated"].as_bool() == Some(true));
294 assert!(result.data["bytes_read"].as_u64().unwrap() <= 50);
295 std::fs::remove_file(&tmp).ok();
296 }
297
298 #[test]
299 fn test_max_bytes_rejects_exceeding_limit() {
300 let result = FileRead.execute(
301 &serde_json::json!({ "path": "/etc/hosts", "max_bytes": 9999999999u64 }),
302 &Context {
303 dry_run: false,
304 job_id: "test".into(),
305 working_dir: std::env::temp_dir(),
306 },
307 );
308 assert!(result.is_err());
309 }
310
311 #[test]
312 fn test_file_read_default_max_bytes() {
313 let mut tmp = std::env::temp_dir();
314 tmp.push("runtimo_test_default_max.txt");
315 std::fs::write(&tmp, "small content").unwrap();
316
317 let result = FileRead
318 .execute(
319 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
320 &Context {
321 dry_run: false,
322 job_id: "test".into(),
323 working_dir: std::env::temp_dir(),
324 },
325 )
326 .unwrap();
327
328 assert!(result.success);
329 assert!(result.data["truncated"].as_bool() == Some(false));
330 std::fs::remove_file(&tmp).ok();
331 }
332
333 #[test]
334 fn test_file_read_json_parsed_for_agents() {
335 let mut tmp = std::env::temp_dir();
336 tmp.push("runtimo_test_agent.json");
337 std::fs::write(&tmp, r#"{"key": "value", "nested": {"a": 1}}"#).unwrap();
338
339 let result = FileRead
340 .execute(
341 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
342 &Context {
343 dry_run: false,
344 job_id: "test".into(),
345 working_dir: std::env::temp_dir(),
346 },
347 )
348 .unwrap();
349
350 assert!(result.success);
351 assert!(result.data["content"].is_object());
352 assert_eq!(result.data["content"]["key"].as_str(), Some("value"));
353 assert_eq!(result.data["content"]["nested"]["a"].as_u64(), Some(1));
354 assert_eq!(result.data["content_type"].as_str(), Some("json"));
355 std::fs::remove_file(&tmp).ok();
356 }
357
358 #[test]
359 fn test_binary_file_detected() {
360 let mut tmp = std::env::temp_dir();
361 tmp.push("runtimo_test_binary.bin");
362 std::fs::write(&tmp, b"hello\x00world").unwrap();
363
364 let result = FileRead
365 .execute(
366 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
367 &Context {
368 dry_run: false,
369 job_id: "test".into(),
370 working_dir: std::env::temp_dir(),
371 },
372 )
373 .unwrap();
374
375 assert!(result.success);
376 assert_eq!(result.data["content_type"].as_str(), Some("binary"));
377 assert_eq!(result.data["bytes_read"].as_u64(), Some(11));
378 std::fs::remove_file(&tmp).ok();
379 }
380
381 #[test]
382 fn test_utf8_boundary_truncation() {
383 let mut tmp = std::env::temp_dir();
386 tmp.push("runtimo_test_utf8.txt");
387 std::fs::write(&tmp, b"caf\xc3\xa9").unwrap();
388
389 let result = FileRead
390 .execute(
391 &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 4 }),
392 &Context {
393 dry_run: false,
394 job_id: "test".into(),
395 working_dir: std::env::temp_dir(),
396 },
397 )
398 .unwrap();
399
400 assert!(result.success);
401 let content = result.data["content"].as_str().unwrap();
402 assert_eq!(content, "caf");
403 std::fs::remove_file(&tmp).ok();
404 }
405
406 #[test]
407 fn test_bytes_read_reports_raw_bytes() {
408 let mut tmp = std::env::temp_dir();
409 tmp.push("runtimo_test_bytes_read.txt");
410 std::fs::write(&tmp, "café\n").unwrap();
412
413 let result = FileRead
414 .execute(
415 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
416 &Context {
417 dry_run: false,
418 job_id: "test".into(),
419 working_dir: std::env::temp_dir(),
420 },
421 )
422 .unwrap();
423
424 assert!(result.success);
425 assert_eq!(result.data["bytes_read"].as_u64(), Some(6));
427 std::fs::remove_file(&tmp).ok();
428 }
429
430 #[test]
431 fn test_symlink_rejected_by_nofollow() {
432 let link_path = std::env::temp_dir().join("runtimo_nofollow_test");
433 let _ = std::fs::remove_file(&link_path);
434 #[cfg(unix)]
435 {
436 use std::os::unix::fs::symlink;
437 if symlink("/etc/hostname", &link_path).is_ok() {
438 let result = FileRead.execute(
439 &serde_json::json!({ "path": link_path.to_str().unwrap() }),
440 &Context {
441 dry_run: false,
442 job_id: "test".into(),
443 working_dir: std::env::temp_dir(),
444 },
445 );
446 assert!(result.is_err(), "symlink should be rejected by O_NOFOLLOW");
447 std::fs::remove_file(&link_path).ok();
448 }
449 }
450 }
451}