runtimo_core/capabilities/
file_read.rs1use crate::capability::{CapabilityError, Context, Output, TypedCapability};
27use crate::validation::path::{validate_path, PathContext};
28use serde::{Deserialize, Serialize};
29use serde_json::Value;
30use std::io::Read;
31
32const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
34
35const DEFAULT_MAX_BYTES: u64 = 1024 * 1024;
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
43#[allow(clippy::exhaustive_structs)] pub struct FileReadArgs {
45 pub path: String,
47 pub max_bytes: Option<u64>,
49}
50
51#[allow(clippy::exhaustive_structs)] pub struct FileRead;
58
59impl TypedCapability for FileRead {
60 type Args = FileReadArgs;
61
62 fn name(&self) -> &'static str {
63 "FileRead"
64 }
65
66 fn description(&self) -> &'static str {
67 "read file. path validated. no dirs, no traversal."
68 }
69
70 fn schema(&self) -> Value {
71 serde_json::json!({
72 "type": "object",
73 "properties": {
74 "path": { "type": "string" },
75 "max_bytes": { "type": "integer", "minimum": 1, "maximum": 10485760 }
76 },
77 "required": ["path"]
78 })
79 }
80
81 fn execute(
82 &self,
83 args: FileReadArgs,
84 _ctx: &Context,
85 ) -> std::result::Result<Output, CapabilityError> {
86 let ctx = PathContext {
87 require_exists: true,
88 require_file: true,
89 ..Default::default()
90 };
91
92 let path = validate_path(&args.path, &ctx)
93 .map_err(|e| CapabilityError::PermissionDenied(format!("path validation: {}", e)))?;
94
95 let max_bytes = args.max_bytes.unwrap_or(DEFAULT_MAX_BYTES);
96 if max_bytes == 0 {
97 return Err(CapabilityError::InvalidArgs(
98 "max_bytes must be >= 1".into(),
99 ));
100 }
101 if max_bytes > MAX_FILE_SIZE {
102 return Err(CapabilityError::InvalidArgs(format!(
103 "max_bytes {} exceeds maximum allowed {}",
104 max_bytes, MAX_FILE_SIZE
105 )));
106 }
107
108 let file = open_file_nofollow(&path).map_err(CapabilityError::Io)?;
111
112 let mut limited = file.take(max_bytes);
115
116 let mut raw_bytes = Vec::with_capacity(std::cmp::min(
118 usize::try_from(max_bytes).unwrap_or(usize::MAX),
119 64 * 1024,
120 ));
121 let bytes_read = limited
122 .read_to_end(&mut raw_bytes)
123 .map_err(CapabilityError::Io)?;
124
125 let bytes_read = bytes_read as u64;
126 let truncated = bytes_read >= max_bytes;
127
128 let is_binary = detect_binary(&raw_bytes);
130
131 let data = if is_binary {
132 serde_json::json!({
133 "content_type": "binary",
134 "path": path.display().to_string(),
135 "bytes_read": bytes_read,
136 "truncated": truncated,
137 "message": "Binary file detected — content not returned as text",
138 })
139 } else {
140 let content = bytes_to_utf8_string(&raw_bytes);
142
143 if path.extension().is_some_and(|ext| ext == "json") {
145 match serde_json::from_slice::<Value>(raw_bytes.as_slice()) {
146 Ok(parsed) => serde_json::json!({
147 "content": parsed,
148 "content_type": "json",
149 "path": path.display().to_string(),
150 "bytes_read": bytes_read,
151 "truncated": truncated,
152 }),
153 Err(_) => serde_json::json!({
154 "content": content,
155 "content_type": "text",
156 "path": path.display().to_string(),
157 "bytes_read": bytes_read,
158 "truncated": truncated,
159 }),
160 }
161 } else {
162 serde_json::json!({
163 "content": content,
164 "content_type": "text",
165 "path": path.display().to_string(),
166 "bytes_read": bytes_read,
167 "truncated": truncated,
168 })
169 }
170 };
171
172 let mut out = Output::ok(format!(
173 "Read {} bytes from {}{}",
174 bytes_read,
175 path.display(),
176 if truncated { " (truncated)" } else { "" }
177 ));
178 out.data = Some(data);
179 Ok(out)
180 }
181}
182
183#[cfg(unix)]
185fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
186 use std::os::unix::fs::OpenOptionsExt;
187 std::fs::OpenOptions::new()
188 .read(true)
189 .custom_flags(libc::O_NOFOLLOW)
190 .open(path)
191}
192
193#[cfg(not(unix))]
194fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
195 std::fs::File::open(path)
196}
197
198fn detect_binary(data: &[u8]) -> bool {
202 if data.is_empty() {
203 return false;
204 }
205 if data.contains(&0) {
207 return true;
208 }
209 let control_count = data
211 .iter()
212 .filter(|&&b| b < 0x20 && b != b'\n' && b != b'\r' && b != b'\t')
213 .count();
214 control_count > data.len() / 10
217}
218
219fn bytes_to_utf8_string(bytes: &[u8]) -> String {
222 match String::from_utf8(bytes.to_vec()) {
223 Ok(s) => s,
224 Err(e) => {
225 let valid_up_to = e.utf8_error().valid_up_to();
226 bytes
227 .get(..valid_up_to)
228 .map(|s| String::from_utf8(s.to_vec()).unwrap_or_default())
229 .unwrap_or_default()
230 }
231 }
232}
233
234#[cfg(test)]
235mod tests {
236 use super::*;
237 use std::io::Write;
238
239 fn test_ctx() -> Context {
240 Context {
241 dry_run: false,
242 job_id: "test".into(),
243 working_dir: std::env::temp_dir(),
244 }
245 }
246
247 #[allow(clippy::unwrap_used, clippy::unused_result_ok)]
248 #[test]
249 fn reads_existing_file() {
250 let mut tmp = std::env::temp_dir();
251 tmp.push("runtimo_test_read.txt");
252 {
253 let mut f = std::fs::File::create(&tmp).unwrap();
254 writeln!(f, "hello world").unwrap();
255 }
256
257 let result = TypedCapability::execute(
258 &FileRead,
259 FileReadArgs {
260 path: tmp.to_str().unwrap().to_string(),
261 max_bytes: None,
262 },
263 &test_ctx(),
264 )
265 .unwrap();
266
267 assert!(result.status == "ok");
268 let content = result
269 .data
270 .as_ref()
271 .and_then(|d| d.get("content"))
272 .and_then(|v| v.as_str())
273 .unwrap_or("")
274 .to_string();
275 assert!(content.contains("hello world"));
276 std::fs::remove_file(&tmp).ok();
277 }
278
279 #[allow(clippy::unwrap_used)]
280 #[test]
281 fn rejects_missing_file() {
282 let result = TypedCapability::execute(
283 &FileRead,
284 FileReadArgs {
285 path: "/tmp/nonexistent_runtimo_test.txt".to_string(),
286 max_bytes: None,
287 },
288 &test_ctx(),
289 );
290 let err = result.unwrap_err().to_string();
291 assert!(
292 err.contains("does not exist") || err.contains("not found"),
293 "Expected error about missing file, got: {}",
294 err
295 );
296 }
297
298 #[test]
299 fn rejects_empty_path() {
300 assert!(TypedCapability::execute(
301 &FileRead,
302 FileReadArgs {
303 path: String::new(),
304 max_bytes: None
305 },
306 &test_ctx()
307 )
308 .is_err());
309 }
310
311 #[allow(clippy::indexing_slicing)]
312 #[allow(clippy::unused_result_ok)]
313 #[test]
314 #[allow(clippy::unwrap_used)]
315 fn test_max_bytes_limits_output() {
316 let mut tmp = std::env::temp_dir();
317 tmp.push("runtimo_test_max_bytes.txt");
318 {
319 let mut f = std::fs::File::create(&tmp).unwrap();
320 for _ in 0..100 {
321 writeln!(f, "hello world line").unwrap();
322 }
323 }
324
325 let result = TypedCapability::execute(
326 &FileRead,
327 FileReadArgs {
328 path: tmp.to_str().unwrap().to_string(),
329 max_bytes: Some(50),
330 },
331 &test_ctx(),
332 )
333 .unwrap();
334
335 assert!(result.status == "ok");
336 assert_eq!(
337 result
338 .data
339 .as_ref()
340 .and_then(|d| d.get("truncated"))
341 .and_then(|v| v.as_bool()),
342 Some(true)
343 );
344 assert!(
345 result
346 .data
347 .as_ref()
348 .and_then(|d| d.get("bytes_read"))
349 .and_then(|v| v.as_u64())
350 .unwrap_or(9999)
351 <= 50
352 );
353 std::fs::remove_file(&tmp).ok();
354 }
355
356 #[test]
357 fn test_max_bytes_rejects_exceeding_limit() {
358 let result = TypedCapability::execute(
359 &FileRead,
360 FileReadArgs {
361 path: "/etc/hosts".to_string(),
362 max_bytes: Some(9999999999u64),
363 },
364 &test_ctx(),
365 );
366 assert!(result.is_err());
367 }
368
369 #[allow(clippy::indexing_slicing)]
370 #[test]
371 fn test_file_read_default_max_bytes() {
372 let mut tmp = std::env::temp_dir();
373 tmp.push("runtimo_test_default_max.txt");
374 std::fs::write(&tmp, "small content").unwrap();
375
376 let result = TypedCapability::execute(
377 &FileRead,
378 FileReadArgs {
379 path: tmp.to_str().unwrap().to_string(),
380 max_bytes: None,
381 },
382 &test_ctx(),
383 )
384 .unwrap();
385
386 assert!(result.status == "ok");
387 assert_eq!(
388 result
389 .data
390 .as_ref()
391 .and_then(|d| d.get("truncated"))
392 .and_then(|v| v.as_bool()),
393 Some(false)
394 );
395 std::fs::remove_file(&tmp).ok();
396 }
397
398 #[test]
399 #[allow(clippy::indexing_slicing)]
400 fn test_file_read_json_parsed_for_agents() {
401 let mut tmp = std::env::temp_dir();
402 tmp.push("runtimo_test_agent.json");
403 std::fs::write(&tmp, r#"{"key": "value", "nested": {"a": 1}}"#).unwrap();
404
405 let result = TypedCapability::execute(
406 &FileRead,
407 FileReadArgs {
408 path: tmp.to_str().unwrap().to_string(),
409 max_bytes: None,
410 },
411 &test_ctx(),
412 )
413 .unwrap();
414
415 assert!(result.status == "ok");
416 let data = result.data.as_ref().unwrap();
417 assert!(data.get("content").unwrap().is_object());
418 assert_eq!(
419 data.get("content")
420 .unwrap()
421 .get("key")
422 .and_then(|v| v.as_str()),
423 Some("value")
424 );
425 assert_eq!(
426 data.get("content")
427 .unwrap()
428 .get("nested")
429 .unwrap()
430 .get("a")
431 .and_then(|v| v.as_u64()),
432 Some(1)
433 );
434 assert_eq!(
435 data.get("content_type").and_then(|v| v.as_str()),
436 Some("json")
437 );
438 std::fs::remove_file(&tmp).ok();
439 }
440
441 #[test]
442 fn test_binary_file_detected() {
443 let mut tmp = std::env::temp_dir();
444 tmp.push("runtimo_test_binary.bin");
445 std::fs::write(&tmp, b"hello\x00world").unwrap();
446
447 let result = TypedCapability::execute(
448 &FileRead,
449 FileReadArgs {
450 path: tmp.to_str().unwrap().to_string(),
451 max_bytes: None,
452 },
453 &test_ctx(),
454 )
455 .unwrap();
456
457 assert!(result.status == "ok");
458 let data = result.data.as_ref().unwrap();
459 assert_eq!(
460 data.get("content_type").and_then(|v| v.as_str()),
461 Some("binary")
462 );
463 assert_eq!(data.get("bytes_read").and_then(|v| v.as_u64()), Some(11));
464 std::fs::remove_file(&tmp).ok();
465 }
466
467 #[test]
468 fn test_utf8_boundary_truncation() {
469 let mut tmp = std::env::temp_dir();
472 tmp.push("runtimo_test_utf8.txt");
473 std::fs::write(&tmp, b"caf\xc3\xa9").unwrap();
474
475 let result = TypedCapability::execute(
476 &FileRead,
477 FileReadArgs {
478 path: tmp.to_str().unwrap().to_string(),
479 max_bytes: Some(4),
480 },
481 &test_ctx(),
482 )
483 .unwrap();
484
485 assert!(result.status == "ok");
486 let content = result
487 .data
488 .as_ref()
489 .and_then(|d| d.get("content"))
490 .and_then(|v| v.as_str())
491 .unwrap_or("");
492 assert_eq!(content, "caf");
493 std::fs::remove_file(&tmp).ok();
494 }
495
496 #[test]
497 fn test_bytes_read_reports_raw_bytes() {
498 let mut tmp = std::env::temp_dir();
499 tmp.push("runtimo_test_bytes_read.txt");
500 std::fs::write(&tmp, "café\n").unwrap();
502
503 let result = TypedCapability::execute(
504 &FileRead,
505 FileReadArgs {
506 path: tmp.to_str().unwrap().to_string(),
507 max_bytes: None,
508 },
509 &test_ctx(),
510 )
511 .unwrap();
512
513 assert!(result.status == "ok");
514 assert_eq!(
516 result
517 .data
518 .as_ref()
519 .and_then(|d| d.get("bytes_read"))
520 .and_then(|v| v.as_u64()),
521 Some(6)
522 );
523 std::fs::remove_file(&tmp).ok();
524 }
525
526 #[test]
527 fn test_symlink_rejected_by_nofollow() {
528 let link_path = std::env::temp_dir().join("runtimo_nofollow_test");
529 let _ = std::fs::remove_file(&link_path);
530 #[cfg(unix)]
531 {
532 use std::os::unix::fs::symlink;
533 if symlink("/etc/hostname", &link_path).is_ok() {
534 let result = TypedCapability::execute(
535 &FileRead,
536 FileReadArgs {
537 path: link_path.to_str().unwrap().to_string(),
538 max_bytes: None,
539 },
540 &test_ctx(),
541 );
542 assert!(result.is_err(), "symlink should be rejected by O_NOFOLLOW");
543 std::fs::remove_file(&link_path).ok();
544 }
545 }
546 }
547}