runtimo_core/capabilities/
file_read.rs1use crate::capability::{Capability, Context, Output};
26use crate::validation::path::{validate_path, PathContext};
27use crate::{Error, Result};
28use serde::{Deserialize, Serialize};
29use serde_json::Value;
30use std::io::Read;
31
32const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
34
35const DEFAULT_MAX_BYTES: u64 = 1024 * 1024;
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct FileReadArgs {
44 pub path: String,
46 pub max_bytes: Option<u64>,
48}
49
50#[allow(clippy::exhaustive_structs)] pub struct FileRead;
57
58impl Capability for FileRead {
59 fn name(&self) -> &'static str {
60 "FileRead"
61 }
62
63 fn description(&self) -> &'static str {
64 "read file. path validated. no dirs, no traversal."
65 }
66
67 fn schema(&self) -> Value {
68 serde_json::json!({
69 "type": "object",
70 "properties": {
71 "path": { "type": "string" },
72 "max_bytes": { "type": "integer", "minimum": 1, "maximum": 10485760 }
73 },
74 "required": ["path"]
75 })
76 }
77
78 fn validate(&self, args: &Value) -> Result<()> {
79 let args: FileReadArgs = serde_json::from_value(args.clone())
80 .map_err(|e| Error::SchemaValidationFailed(e.to_string()))?;
81
82 let ctx = PathContext {
83 require_exists: true,
84 require_file: true,
85 ..Default::default()
86 };
87
88 validate_path(&args.path, &ctx).map_err(Error::SchemaValidationFailed)?;
89
90 Ok(())
91 }
92
93 fn execute(&self, args: &Value, _ctx: &Context) -> Result<Output> {
94 let args: FileReadArgs = serde_json::from_value(args.clone())
95 .map_err(|e| Error::ExecutionFailed(e.to_string()))?;
96
97 let ctx = PathContext {
98 require_exists: true,
99 require_file: true,
100 ..Default::default()
101 };
102
103 let path = validate_path(&args.path, &ctx)
104 .map_err(|e| Error::ExecutionFailed(format!("path validation: {}", e)))?;
105
106 let max_bytes = args.max_bytes.unwrap_or(DEFAULT_MAX_BYTES);
107 if max_bytes > MAX_FILE_SIZE {
108 return Err(Error::ExecutionFailed(format!(
109 "max_bytes {} exceeds maximum allowed {}",
110 max_bytes, MAX_FILE_SIZE
111 )));
112 }
113
114 let file = open_file_nofollow(&path)
117 .map_err(|e| Error::ExecutionFailed(format!("open {}: {}", path.display(), e)))?;
118
119 let mut limited = file.take(max_bytes);
122
123 let mut raw_bytes = Vec::with_capacity(std::cmp::min(
125 usize::try_from(max_bytes).unwrap_or(usize::MAX),
126 64 * 1024,
127 ));
128 let bytes_read = limited
129 .read_to_end(&mut raw_bytes)
130 .map_err(|e| Error::ExecutionFailed(format!("read {}: {}", path.display(), e)))?;
131
132 let bytes_read = bytes_read as u64;
133 let truncated = bytes_read >= max_bytes;
134
135 let is_binary = detect_binary(&raw_bytes);
137
138 let data = if is_binary {
139 serde_json::json!({
140 "content_type": "binary",
141 "path": path.display().to_string(),
142 "bytes_read": bytes_read,
143 "truncated": truncated,
144 "message": "Binary file detected — content not returned as text",
145 })
146 } else {
147 let content = bytes_to_utf8_string(&raw_bytes);
149
150 if path.extension().is_some_and(|ext| ext == "json") {
152 match serde_json::from_slice::<Value>(raw_bytes.as_slice()) {
153 Ok(parsed) => serde_json::json!({
154 "content": parsed,
155 "content_type": "json",
156 "path": path.display().to_string(),
157 "bytes_read": bytes_read,
158 "truncated": truncated,
159 }),
160 Err(_) => serde_json::json!({
161 "content": content,
162 "content_type": "text",
163 "path": path.display().to_string(),
164 "bytes_read": bytes_read,
165 "truncated": truncated,
166 }),
167 }
168 } else {
169 serde_json::json!({
170 "content": content,
171 "content_type": "text",
172 "path": path.display().to_string(),
173 "bytes_read": bytes_read,
174 "truncated": truncated,
175 })
176 }
177 };
178
179 Ok(Output {
180 success: true,
181 data,
182 message: Some(format!(
183 "Read {} bytes from {}{}",
184 bytes_read,
185 path.display(),
186 if truncated { " (truncated)" } else { "" }
187 )),
188 })
189 }
190}
191
192#[cfg(unix)]
194fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
195 use std::os::unix::fs::OpenOptionsExt;
196 std::fs::OpenOptions::new()
197 .read(true)
198 .custom_flags(libc::O_NOFOLLOW)
199 .open(path)
200}
201
202#[cfg(not(unix))]
203fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
204 std::fs::File::open(path)
205}
206
207fn detect_binary(data: &[u8]) -> bool {
209 data.contains(&0)
210}
211
212fn bytes_to_utf8_string(bytes: &[u8]) -> String {
215 match String::from_utf8(bytes.to_vec()) {
216 Ok(s) => s,
217 Err(e) => {
218 let valid_up_to = e.utf8_error().valid_up_to();
219 bytes
220 .get(..valid_up_to)
221 .map(|s| String::from_utf8(s.to_vec()).unwrap_or_default())
222 .unwrap_or_default()
223 }
224 }
225}
226
227#[cfg(test)]
228mod tests {
229 use super::*;
230 use std::io::Write;
231
232 #[allow(clippy::unwrap_used, clippy::unused_result_ok)]
233 #[test]
234 fn reads_existing_file() {
235 let mut tmp = std::env::temp_dir();
236 tmp.push("runtimo_test_read.txt");
237 {
238 let mut f = std::fs::File::create(&tmp).unwrap();
239 writeln!(f, "hello world").unwrap();
240 }
241
242 let result = FileRead
243 .execute(
244 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
245 &Context {
246 dry_run: false,
247 job_id: "test".into(),
248 working_dir: std::env::temp_dir(),
249 },
250 )
251 .unwrap();
252
253 assert!(result.success);
254 assert!(result
255 .data
256 .get("content")
257 .and_then(|v| v.as_str())
258 .unwrap()
259 .contains("hello world"));
260 std::fs::remove_file(&tmp).ok();
261 }
262
263 #[allow(clippy::unwrap_used)]
264 #[test]
265 fn rejects_missing_file() {
266 let err = FileRead
267 .validate(&serde_json::json!({
268 "path": "/tmp/nonexistent_runtimo_test.txt"
269 }))
270 .unwrap_err();
271 assert!(err.to_string().contains("does not exist"));
272 }
273
274 #[test]
275 fn rejects_empty_path() {
276 assert!(FileRead
277 .validate(&serde_json::json!({ "path": "" }))
278 .is_err());
279 }
280
281 #[allow(clippy::indexing_slicing)]
282 #[allow(clippy::unused_result_ok)]
283 #[test]
284 #[allow(clippy::unwrap_used)]
285 fn test_max_bytes_limits_output() {
286 let mut tmp = std::env::temp_dir();
287 tmp.push("runtimo_test_max_bytes.txt");
288 {
289 let mut f = std::fs::File::create(&tmp).unwrap();
290 for _ in 0..100 {
291 writeln!(f, "hello world line").unwrap();
292 }
293 }
294
295 let result = FileRead
296 .execute(
297 &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 50 }),
298 &Context {
299 dry_run: false,
300 job_id: "test".into(),
301 working_dir: std::env::temp_dir(),
302 },
303 )
304 .unwrap();
305
306 assert!(result.success);
307 assert!(result.data["truncated"].as_bool() == Some(true));
308 assert!(result.data["bytes_read"].as_u64().unwrap() <= 50);
309 std::fs::remove_file(&tmp).ok();
310 }
311
312 #[test]
313 fn test_max_bytes_rejects_exceeding_limit() {
314 let result = FileRead.execute(
315 &serde_json::json!({ "path": "/etc/hosts", "max_bytes": 9999999999u64 }),
316 &Context {
317 dry_run: false,
318 job_id: "test".into(),
319 working_dir: std::env::temp_dir(),
320 },
321 );
322 assert!(result.is_err());
323 }
324
325 #[allow(clippy::indexing_slicing)]
326 #[test]
327 fn test_file_read_default_max_bytes() {
328 let mut tmp = std::env::temp_dir();
329 tmp.push("runtimo_test_default_max.txt");
330 std::fs::write(&tmp, "small content").unwrap();
331
332 let result = FileRead
333 .execute(
334 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
335 &Context {
336 dry_run: false,
337 job_id: "test".into(),
338 working_dir: std::env::temp_dir(),
339 },
340 )
341 .unwrap();
342
343 assert!(result.success);
344 assert!(result.data["truncated"].as_bool() == Some(false));
345 std::fs::remove_file(&tmp).ok();
346 }
347
348 #[test]
349 #[allow(clippy::indexing_slicing)]
350 fn test_file_read_json_parsed_for_agents() {
351 let mut tmp = std::env::temp_dir();
352 tmp.push("runtimo_test_agent.json");
353 std::fs::write(&tmp, r#"{"key": "value", "nested": {"a": 1}}"#).unwrap();
354
355 let result = FileRead
356 .execute(
357 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
358 &Context {
359 dry_run: false,
360 job_id: "test".into(),
361 working_dir: std::env::temp_dir(),
362 },
363 )
364 .unwrap();
365
366 assert!(result.success);
367 assert!(result.data["content"].is_object());
368 assert_eq!(result.data["content"]["key"].as_str(), Some("value"));
369 assert_eq!(result.data["content"]["nested"]["a"].as_u64(), Some(1));
370 assert_eq!(result.data["content_type"].as_str(), Some("json"));
371 std::fs::remove_file(&tmp).ok();
372 }
373
374 #[test]
375 fn test_binary_file_detected() {
376 let mut tmp = std::env::temp_dir();
377 tmp.push("runtimo_test_binary.bin");
378 std::fs::write(&tmp, b"hello\x00world").unwrap();
379
380 let result = FileRead
381 .execute(
382 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
383 &Context {
384 dry_run: false,
385 job_id: "test".into(),
386 working_dir: std::env::temp_dir(),
387 },
388 )
389 .unwrap();
390
391 assert!(result.success);
392 assert_eq!(result.data["content_type"].as_str(), Some("binary"));
393 assert_eq!(result.data["bytes_read"].as_u64(), Some(11));
394 std::fs::remove_file(&tmp).ok();
395 }
396
397 #[test]
398 fn test_utf8_boundary_truncation() {
399 let mut tmp = std::env::temp_dir();
402 tmp.push("runtimo_test_utf8.txt");
403 std::fs::write(&tmp, b"caf\xc3\xa9").unwrap();
404
405 let result = FileRead
406 .execute(
407 &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 4 }),
408 &Context {
409 dry_run: false,
410 job_id: "test".into(),
411 working_dir: std::env::temp_dir(),
412 },
413 )
414 .unwrap();
415
416 assert!(result.success);
417 let content = result.data["content"].as_str().unwrap();
418 assert_eq!(content, "caf");
419 std::fs::remove_file(&tmp).ok();
420 }
421
422 #[test]
423 fn test_bytes_read_reports_raw_bytes() {
424 let mut tmp = std::env::temp_dir();
425 tmp.push("runtimo_test_bytes_read.txt");
426 std::fs::write(&tmp, "café\n").unwrap();
428
429 let result = FileRead
430 .execute(
431 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
432 &Context {
433 dry_run: false,
434 job_id: "test".into(),
435 working_dir: std::env::temp_dir(),
436 },
437 )
438 .unwrap();
439
440 assert!(result.success);
441 assert_eq!(result.data["bytes_read"].as_u64(), Some(6));
443 std::fs::remove_file(&tmp).ok();
444 }
445
446 #[test]
447 fn test_symlink_rejected_by_nofollow() {
448 let link_path = std::env::temp_dir().join("runtimo_nofollow_test");
449 let _ = std::fs::remove_file(&link_path);
450 #[cfg(unix)]
451 {
452 use std::os::unix::fs::symlink;
453 if symlink("/etc/hostname", &link_path).is_ok() {
454 let result = FileRead.execute(
455 &serde_json::json!({ "path": link_path.to_str().unwrap() }),
456 &Context {
457 dry_run: false,
458 job_id: "test".into(),
459 working_dir: std::env::temp_dir(),
460 },
461 );
462 assert!(result.is_err(), "symlink should be rejected by O_NOFOLLOW");
463 std::fs::remove_file(&link_path).ok();
464 }
465 }
466 }
467}