runtimo_core/capabilities/
file_read.rs1use crate::capability::{Capability, Context, Output};
26use crate::validation::path::{validate_path, PathContext};
27use crate::{Error, Result};
28use serde::{Deserialize, Serialize};
29use serde_json::Value;
30use std::io::Read;
31
32const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
34
35const DEFAULT_MAX_BYTES: u64 = 1024 * 1024;
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct FileReadArgs {
41 pub path: String,
43 pub max_bytes: Option<u64>,
45}
46
47#[allow(clippy::exhaustive_structs)] pub struct FileRead;
54
55impl Capability for FileRead {
56 fn name(&self) -> &'static str {
57 "FileRead"
58 }
59
60 fn description(&self) -> &'static str {
61 "read file. path validated. no dirs, no traversal."
62 }
63
64 fn schema(&self) -> Value {
65 serde_json::json!({
66 "type": "object",
67 "properties": {
68 "path": { "type": "string" },
69 "max_bytes": { "type": "integer", "minimum": 1, "maximum": 10485760 }
70 },
71 "required": ["path"]
72 })
73 }
74
75 fn validate(&self, args: &Value) -> Result<()> {
76 let args: FileReadArgs = serde_json::from_value(args.clone())
77 .map_err(|e| Error::SchemaValidationFailed(e.to_string()))?;
78
79 let ctx = PathContext {
80 require_exists: true,
81 require_file: true,
82 ..Default::default()
83 };
84
85 validate_path(&args.path, &ctx).map_err(Error::SchemaValidationFailed)?;
86
87 Ok(())
88 }
89
90 fn execute(&self, args: &Value, _ctx: &Context) -> Result<Output> {
91 let args: FileReadArgs = serde_json::from_value(args.clone())
92 .map_err(|e| Error::ExecutionFailed(e.to_string()))?;
93
94 let ctx = PathContext {
95 require_exists: true,
96 require_file: true,
97 ..Default::default()
98 };
99
100 let path = validate_path(&args.path, &ctx)
101 .map_err(|e| Error::ExecutionFailed(format!("path validation: {}", e)))?;
102
103 let max_bytes = args.max_bytes.unwrap_or(DEFAULT_MAX_BYTES);
104 if max_bytes > MAX_FILE_SIZE {
105 return Err(Error::ExecutionFailed(format!(
106 "max_bytes {} exceeds maximum allowed {}",
107 max_bytes, MAX_FILE_SIZE
108 )));
109 }
110
111 let file = open_file_nofollow(&path)
114 .map_err(|e| Error::ExecutionFailed(format!("open {}: {}", path.display(), e)))?;
115
116 let mut limited = file.take(max_bytes);
119
120 let mut raw_bytes = Vec::with_capacity(std::cmp::min(
122 usize::try_from(max_bytes).unwrap_or(usize::MAX),
123 64 * 1024,
124 ));
125 let bytes_read = limited
126 .read_to_end(&mut raw_bytes)
127 .map_err(|e| Error::ExecutionFailed(format!("read {}: {}", path.display(), e)))?;
128
129 let bytes_read = bytes_read as u64;
130 let truncated = bytes_read >= max_bytes;
131
132 let is_binary = detect_binary(&raw_bytes);
134
135 let data = if is_binary {
136 serde_json::json!({
137 "content_type": "binary",
138 "path": path.display().to_string(),
139 "bytes_read": bytes_read,
140 "truncated": truncated,
141 "message": "Binary file detected — content not returned as text",
142 })
143 } else {
144 let content = bytes_to_utf8_string(&raw_bytes);
146
147 if path.extension().is_some_and(|ext| ext == "json") {
149 match serde_json::from_slice::<Value>(raw_bytes.as_slice()) {
150 Ok(parsed) => serde_json::json!({
151 "content": parsed,
152 "content_type": "json",
153 "path": path.display().to_string(),
154 "bytes_read": bytes_read,
155 "truncated": truncated,
156 }),
157 Err(_) => serde_json::json!({
158 "content": content,
159 "content_type": "text",
160 "path": path.display().to_string(),
161 "bytes_read": bytes_read,
162 "truncated": truncated,
163 }),
164 }
165 } else {
166 serde_json::json!({
167 "content": content,
168 "content_type": "text",
169 "path": path.display().to_string(),
170 "bytes_read": bytes_read,
171 "truncated": truncated,
172 })
173 }
174 };
175
176 Ok(Output {
177 success: true,
178 data,
179 message: Some(format!(
180 "Read {} bytes from {}{}",
181 bytes_read,
182 path.display(),
183 if truncated { " (truncated)" } else { "" }
184 )),
185 })
186 }
187}
188
189#[cfg(unix)]
191fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
192 use std::os::unix::fs::OpenOptionsExt;
193 std::fs::OpenOptions::new()
194 .read(true)
195 .custom_flags(libc::O_NOFOLLOW)
196 .open(path)
197}
198
199#[cfg(not(unix))]
200fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
201 std::fs::File::open(path)
202}
203
204fn detect_binary(data: &[u8]) -> bool {
206 data.contains(&0)
207}
208
209fn bytes_to_utf8_string(bytes: &[u8]) -> String {
212 match String::from_utf8(bytes.to_vec()) {
213 Ok(s) => s,
214 Err(e) => {
215 let valid_up_to = e.utf8_error().valid_up_to();
216 bytes
217 .get(..valid_up_to)
218 .map(|s| String::from_utf8(s.to_vec()).unwrap_or_default())
219 .unwrap_or_default()
220 }
221 }
222}
223
224#[cfg(test)]
225mod tests {
226 use super::*;
227 use std::io::Write;
228
229 #[allow(clippy::unwrap_used, clippy::unused_result_ok)]
230 #[test]
231 fn reads_existing_file() {
232 let mut tmp = std::env::temp_dir();
233 tmp.push("runtimo_test_read.txt");
234 {
235 let mut f = std::fs::File::create(&tmp).unwrap();
236 writeln!(f, "hello world").unwrap();
237 }
238
239 let result = FileRead
240 .execute(
241 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
242 &Context {
243 dry_run: false,
244 job_id: "test".into(),
245 working_dir: std::env::temp_dir(),
246 },
247 )
248 .unwrap();
249
250 assert!(result.success);
251 assert!(result
252 .data
253 .get("content")
254 .and_then(|v| v.as_str())
255 .unwrap()
256 .contains("hello world"));
257 std::fs::remove_file(&tmp).ok();
258 }
259
260 #[allow(clippy::unwrap_used)]
261 #[test]
262 fn rejects_missing_file() {
263 let err = FileRead
264 .validate(&serde_json::json!({
265 "path": "/tmp/nonexistent_runtimo_test.txt"
266 }))
267 .unwrap_err();
268 assert!(err.to_string().contains("does not exist"));
269 }
270
271 #[test]
272 fn rejects_empty_path() {
273 assert!(FileRead
274 .validate(&serde_json::json!({ "path": "" }))
275 .is_err());
276 }
277
278 #[allow(clippy::indexing_slicing)]
279 #[allow(clippy::unused_result_ok)]
280 #[test]
281 #[allow(clippy::unwrap_used, clippy::unused_result_ok)]
282 fn test_max_bytes_limits_output() {
283 let mut tmp = std::env::temp_dir();
284 tmp.push("runtimo_test_max_bytes.txt");
285 {
286 let mut f = std::fs::File::create(&tmp).unwrap();
287 for _ in 0..100 {
288 writeln!(f, "hello world line").unwrap();
289 }
290 }
291
292 let result = FileRead
293 .execute(
294 &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 50 }),
295 &Context {
296 dry_run: false,
297 job_id: "test".into(),
298 working_dir: std::env::temp_dir(),
299 },
300 )
301 .unwrap();
302
303 assert!(result.success);
304 assert!(result.data["truncated"].as_bool() == Some(true));
305 assert!(result.data["bytes_read"].as_u64().unwrap() <= 50);
306 std::fs::remove_file(&tmp).ok();
307 }
308
309 #[test]
310 fn test_max_bytes_rejects_exceeding_limit() {
311 let result = FileRead.execute(
312 &serde_json::json!({ "path": "/etc/hosts", "max_bytes": 9999999999u64 }),
313 &Context {
314 dry_run: false,
315 job_id: "test".into(),
316 working_dir: std::env::temp_dir(),
317 },
318 );
319 assert!(result.is_err());
320 }
321
322 #[allow(clippy::indexing_slicing)]
323 #[test]
324 fn test_file_read_default_max_bytes() {
325 let mut tmp = std::env::temp_dir();
326 tmp.push("runtimo_test_default_max.txt");
327 std::fs::write(&tmp, "small content").unwrap();
328
329 let result = FileRead
330 .execute(
331 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
332 &Context {
333 dry_run: false,
334 job_id: "test".into(),
335 working_dir: std::env::temp_dir(),
336 },
337 )
338 .unwrap();
339
340 assert!(result.success);
341 assert!(result.data["truncated"].as_bool() == Some(false));
342 std::fs::remove_file(&tmp).ok();
343 }
344
345 #[test]
346 #[allow(clippy::indexing_slicing)]
347 fn test_file_read_json_parsed_for_agents() {
348 let mut tmp = std::env::temp_dir();
349 tmp.push("runtimo_test_agent.json");
350 std::fs::write(&tmp, r#"{"key": "value", "nested": {"a": 1}}"#).unwrap();
351
352 let result = FileRead
353 .execute(
354 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
355 &Context {
356 dry_run: false,
357 job_id: "test".into(),
358 working_dir: std::env::temp_dir(),
359 },
360 )
361 .unwrap();
362
363 assert!(result.success);
364 assert!(result.data["content"].is_object());
365 assert_eq!(result.data["content"]["key"].as_str(), Some("value"));
366 assert_eq!(result.data["content"]["nested"]["a"].as_u64(), Some(1));
367 assert_eq!(result.data["content_type"].as_str(), Some("json"));
368 std::fs::remove_file(&tmp).ok();
369 }
370
371 #[test]
372 fn test_binary_file_detected() {
373 let mut tmp = std::env::temp_dir();
374 tmp.push("runtimo_test_binary.bin");
375 std::fs::write(&tmp, b"hello\x00world").unwrap();
376
377 let result = FileRead
378 .execute(
379 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
380 &Context {
381 dry_run: false,
382 job_id: "test".into(),
383 working_dir: std::env::temp_dir(),
384 },
385 )
386 .unwrap();
387
388 assert!(result.success);
389 assert_eq!(result.data["content_type"].as_str(), Some("binary"));
390 assert_eq!(result.data["bytes_read"].as_u64(), Some(11));
391 std::fs::remove_file(&tmp).ok();
392 }
393
394 #[test]
395 fn test_utf8_boundary_truncation() {
396 let mut tmp = std::env::temp_dir();
399 tmp.push("runtimo_test_utf8.txt");
400 std::fs::write(&tmp, b"caf\xc3\xa9").unwrap();
401
402 let result = FileRead
403 .execute(
404 &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 4 }),
405 &Context {
406 dry_run: false,
407 job_id: "test".into(),
408 working_dir: std::env::temp_dir(),
409 },
410 )
411 .unwrap();
412
413 assert!(result.success);
414 let content = result.data["content"].as_str().unwrap();
415 assert_eq!(content, "caf");
416 std::fs::remove_file(&tmp).ok();
417 }
418
419 #[test]
420 fn test_bytes_read_reports_raw_bytes() {
421 let mut tmp = std::env::temp_dir();
422 tmp.push("runtimo_test_bytes_read.txt");
423 std::fs::write(&tmp, "café\n").unwrap();
425
426 let result = FileRead
427 .execute(
428 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
429 &Context {
430 dry_run: false,
431 job_id: "test".into(),
432 working_dir: std::env::temp_dir(),
433 },
434 )
435 .unwrap();
436
437 assert!(result.success);
438 assert_eq!(result.data["bytes_read"].as_u64(), Some(6));
440 std::fs::remove_file(&tmp).ok();
441 }
442
443 #[test]
444 fn test_symlink_rejected_by_nofollow() {
445 let link_path = std::env::temp_dir().join("runtimo_nofollow_test");
446 let _ = std::fs::remove_file(&link_path);
447 #[cfg(unix)]
448 {
449 use std::os::unix::fs::symlink;
450 if symlink("/etc/hostname", &link_path).is_ok() {
451 let result = FileRead.execute(
452 &serde_json::json!({ "path": link_path.to_str().unwrap() }),
453 &Context {
454 dry_run: false,
455 job_id: "test".into(),
456 working_dir: std::env::temp_dir(),
457 },
458 );
459 assert!(result.is_err(), "symlink should be rejected by O_NOFOLLOW");
460 std::fs::remove_file(&link_path).ok();
461 }
462 }
463 }
464}