runtimo_core/capabilities/
file_read.rs1use crate::capability::{Capability, Context, Output};
26use crate::validation::path::{validate_path, PathContext};
27use crate::{Error, Result};
28use serde::{Deserialize, Serialize};
29use serde_json::Value;
30use std::io::Read;
31
32const MAX_FILE_SIZE: u64 = 10 * 1024 * 1024;
34
35const DEFAULT_MAX_BYTES: u64 = 1024 * 1024;
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
40pub struct FileReadArgs {
41 pub path: String,
43 pub max_bytes: Option<u64>,
45}
46
47#[allow(clippy::exhaustive_structs)] pub struct FileRead;
54
55impl Capability for FileRead {
56 fn name(&self) -> &'static str {
57 "FileRead"
58 }
59
60 fn description(&self) -> &'static str {
61 "read file. path validated. no dirs, no traversal."
62 }
63
64 fn schema(&self) -> Value {
65 serde_json::json!({
66 "type": "object",
67 "properties": {
68 "path": { "type": "string" },
69 "max_bytes": { "type": "integer", "minimum": 1, "maximum": 10485760 }
70 },
71 "required": ["path"]
72 })
73 }
74
75 fn validate(&self, args: &Value) -> Result<()> {
76 let args: FileReadArgs = serde_json::from_value(args.clone())
77 .map_err(|e| Error::SchemaValidationFailed(e.to_string()))?;
78
79 let ctx = PathContext {
80 require_exists: true,
81 require_file: true,
82 ..Default::default()
83 };
84
85 validate_path(&args.path, &ctx).map_err(Error::SchemaValidationFailed)?;
86
87 Ok(())
88 }
89
90 fn execute(&self, args: &Value, _ctx: &Context) -> Result<Output> {
91 let args: FileReadArgs = serde_json::from_value(args.clone())
92 .map_err(|e| Error::ExecutionFailed(e.to_string()))?;
93
94 let ctx = PathContext {
95 require_exists: true,
96 require_file: true,
97 ..Default::default()
98 };
99
100 let path = validate_path(&args.path, &ctx)
101 .map_err(|e| Error::ExecutionFailed(format!("path validation: {}", e)))?;
102
103 let max_bytes = args.max_bytes.unwrap_or(DEFAULT_MAX_BYTES);
104 if max_bytes > MAX_FILE_SIZE {
105 return Err(Error::ExecutionFailed(format!(
106 "max_bytes {} exceeds maximum allowed {}",
107 max_bytes, MAX_FILE_SIZE
108 )));
109 }
110
111 let file = open_file_nofollow(&path)
114 .map_err(|e| Error::ExecutionFailed(format!("open {}: {}", path.display(), e)))?;
115
116 let mut limited = file.take(max_bytes);
119
120 let mut raw_bytes = Vec::with_capacity(
122 std::cmp::min(usize::try_from(max_bytes).unwrap_or(usize::MAX), 64 * 1024)
123 );
124 let bytes_read = limited
125 .read_to_end(&mut raw_bytes)
126 .map_err(|e| Error::ExecutionFailed(format!("read {}: {}", path.display(), e)))?;
127
128 let bytes_read = bytes_read as u64;
129 let truncated = bytes_read >= max_bytes;
130
131 let is_binary = detect_binary(&raw_bytes);
133
134 let data = if is_binary {
135 serde_json::json!({
136 "content_type": "binary",
137 "path": path.display().to_string(),
138 "bytes_read": bytes_read,
139 "truncated": truncated,
140 "message": "Binary file detected — content not returned as text",
141 })
142 } else {
143 let content = bytes_to_utf8_string(&raw_bytes);
145
146 if path.extension().is_some_and(|ext| ext == "json") {
148 match serde_json::from_slice::<Value>(raw_bytes.as_slice()) {
149 Ok(parsed) => serde_json::json!({
150 "content": parsed,
151 "content_type": "json",
152 "path": path.display().to_string(),
153 "bytes_read": bytes_read,
154 "truncated": truncated,
155 }),
156 Err(_) => serde_json::json!({
157 "content": content,
158 "content_type": "text",
159 "path": path.display().to_string(),
160 "bytes_read": bytes_read,
161 "truncated": truncated,
162 }),
163 }
164 } else {
165 serde_json::json!({
166 "content": content,
167 "content_type": "text",
168 "path": path.display().to_string(),
169 "bytes_read": bytes_read,
170 "truncated": truncated,
171 })
172 }
173 };
174
175 Ok(Output {
176 success: true,
177 data,
178 message: Some(format!(
179 "Read {} bytes from {}{}",
180 bytes_read,
181 path.display(),
182 if truncated { " (truncated)" } else { "" }
183 )),
184 })
185 }
186}
187
188#[cfg(unix)]
190fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
191 use std::os::unix::fs::OpenOptionsExt;
192 std::fs::OpenOptions::new()
193 .read(true)
194 .custom_flags(libc::O_NOFOLLOW)
195 .open(path)
196}
197
198#[cfg(not(unix))]
199fn open_file_nofollow(path: &std::path::Path) -> std::io::Result<std::fs::File> {
200 std::fs::File::open(path)
201}
202
203fn detect_binary(data: &[u8]) -> bool {
205 data.contains(&0)
206}
207
208fn bytes_to_utf8_string(bytes: &[u8]) -> String {
211 match String::from_utf8(bytes.to_vec()) {
212 Ok(s) => s,
213 Err(e) => {
214 let valid_up_to = e.utf8_error().valid_up_to();
215 bytes.get(..valid_up_to)
216 .map(|s| String::from_utf8(s.to_vec()).unwrap_or_default())
217 .unwrap_or_default()
218 }
219 }
220}
221
222#[cfg(test)]
223mod tests {
224 use super::*;
225 use std::io::Write;
226
227 #[allow(clippy::unwrap_used, clippy::unused_result_ok)]
228 #[test]
229 fn reads_existing_file() {
230 let mut tmp = std::env::temp_dir();
231 tmp.push("runtimo_test_read.txt");
232 {
233 let mut f = std::fs::File::create(&tmp).unwrap();
234 writeln!(f, "hello world").unwrap();
235 }
236
237 let result = FileRead
238 .execute(
239 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
240 &Context {
241 dry_run: false,
242 job_id: "test".into(),
243 working_dir: std::env::temp_dir(),
244 },
245 )
246 .unwrap();
247
248 assert!(result.success);
249 assert!(result.data.get("content")
250 .and_then(|v| v.as_str())
251 .unwrap()
252 .contains("hello world"));
253 std::fs::remove_file(&tmp).ok();
254 }
255
256 #[allow(clippy::unwrap_used)]
257 #[test]
258 fn rejects_missing_file() {
259 let err = FileRead
260 .validate(&serde_json::json!({
261 "path": "/tmp/nonexistent_runtimo_test.txt"
262 }))
263 .unwrap_err();
264 assert!(err.to_string().contains("does not exist"));
265 }
266
267 #[test]
268 fn rejects_empty_path() {
269 assert!(FileRead
270 .validate(&serde_json::json!({ "path": "" }))
271 .is_err());
272 }
273
274 #[allow(clippy::indexing_slicing)]
275 #[allow(clippy::unused_result_ok)]
276 #[test]
277 #[allow(clippy::unwrap_used, clippy::unused_result_ok)]
278 fn test_max_bytes_limits_output() {
279 let mut tmp = std::env::temp_dir();
280 tmp.push("runtimo_test_max_bytes.txt");
281 {
282 let mut f = std::fs::File::create(&tmp).unwrap();
283 for _ in 0..100 {
284 writeln!(f, "hello world line").unwrap();
285 }
286 }
287
288 let result = FileRead
289 .execute(
290 &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 50 }),
291 &Context {
292 dry_run: false,
293 job_id: "test".into(),
294 working_dir: std::env::temp_dir(),
295 },
296 )
297 .unwrap();
298
299 assert!(result.success);
300 assert!(result.data["truncated"].as_bool() == Some(true));
301 assert!(result.data["bytes_read"].as_u64().unwrap() <= 50);
302 std::fs::remove_file(&tmp).ok();
303 }
304
305 #[test]
306 fn test_max_bytes_rejects_exceeding_limit() {
307 let result = FileRead.execute(
308 &serde_json::json!({ "path": "/etc/hosts", "max_bytes": 9999999999u64 }),
309 &Context {
310 dry_run: false,
311 job_id: "test".into(),
312 working_dir: std::env::temp_dir(),
313 },
314 );
315 assert!(result.is_err());
316 }
317
318 #[allow(clippy::indexing_slicing)]
319 #[test]
320 fn test_file_read_default_max_bytes() {
321 let mut tmp = std::env::temp_dir();
322 tmp.push("runtimo_test_default_max.txt");
323 std::fs::write(&tmp, "small content").unwrap();
324
325 let result = FileRead
326 .execute(
327 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
328 &Context {
329 dry_run: false,
330 job_id: "test".into(),
331 working_dir: std::env::temp_dir(),
332 },
333 )
334 .unwrap();
335
336 assert!(result.success);
337 assert!(result.data["truncated"].as_bool() == Some(false));
338 std::fs::remove_file(&tmp).ok();
339 }
340
341 #[test]
342 #[allow(clippy::indexing_slicing)]
343 fn test_file_read_json_parsed_for_agents() {
344 let mut tmp = std::env::temp_dir();
345 tmp.push("runtimo_test_agent.json");
346 std::fs::write(&tmp, r#"{"key": "value", "nested": {"a": 1}}"#).unwrap();
347
348 let result = FileRead
349 .execute(
350 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
351 &Context {
352 dry_run: false,
353 job_id: "test".into(),
354 working_dir: std::env::temp_dir(),
355 },
356 )
357 .unwrap();
358
359 assert!(result.success);
360 assert!(result.data["content"].is_object());
361 assert_eq!(result.data["content"]["key"].as_str(), Some("value"));
362 assert_eq!(result.data["content"]["nested"]["a"].as_u64(), Some(1));
363 assert_eq!(result.data["content_type"].as_str(), Some("json"));
364 std::fs::remove_file(&tmp).ok();
365 }
366
367 #[test]
368 fn test_binary_file_detected() {
369 let mut tmp = std::env::temp_dir();
370 tmp.push("runtimo_test_binary.bin");
371 std::fs::write(&tmp, b"hello\x00world").unwrap();
372
373 let result = FileRead
374 .execute(
375 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
376 &Context {
377 dry_run: false,
378 job_id: "test".into(),
379 working_dir: std::env::temp_dir(),
380 },
381 )
382 .unwrap();
383
384 assert!(result.success);
385 assert_eq!(result.data["content_type"].as_str(), Some("binary"));
386 assert_eq!(result.data["bytes_read"].as_u64(), Some(11));
387 std::fs::remove_file(&tmp).ok();
388 }
389
390 #[test]
391 fn test_utf8_boundary_truncation() {
392 let mut tmp = std::env::temp_dir();
395 tmp.push("runtimo_test_utf8.txt");
396 std::fs::write(&tmp, b"caf\xc3\xa9").unwrap();
397
398 let result = FileRead
399 .execute(
400 &serde_json::json!({ "path": tmp.to_str().unwrap(), "max_bytes": 4 }),
401 &Context {
402 dry_run: false,
403 job_id: "test".into(),
404 working_dir: std::env::temp_dir(),
405 },
406 )
407 .unwrap();
408
409 assert!(result.success);
410 let content = result.data["content"].as_str().unwrap();
411 assert_eq!(content, "caf");
412 std::fs::remove_file(&tmp).ok();
413 }
414
415 #[test]
416 fn test_bytes_read_reports_raw_bytes() {
417 let mut tmp = std::env::temp_dir();
418 tmp.push("runtimo_test_bytes_read.txt");
419 std::fs::write(&tmp, "café\n").unwrap();
421
422 let result = FileRead
423 .execute(
424 &serde_json::json!({ "path": tmp.to_str().unwrap() }),
425 &Context {
426 dry_run: false,
427 job_id: "test".into(),
428 working_dir: std::env::temp_dir(),
429 },
430 )
431 .unwrap();
432
433 assert!(result.success);
434 assert_eq!(result.data["bytes_read"].as_u64(), Some(6));
436 std::fs::remove_file(&tmp).ok();
437 }
438
439 #[test]
440 fn test_symlink_rejected_by_nofollow() {
441 let link_path = std::env::temp_dir().join("runtimo_nofollow_test");
442 let _ = std::fs::remove_file(&link_path);
443 #[cfg(unix)]
444 {
445 use std::os::unix::fs::symlink;
446 if symlink("/etc/hostname", &link_path).is_ok() {
447 let result = FileRead.execute(
448 &serde_json::json!({ "path": link_path.to_str().unwrap() }),
449 &Context {
450 dry_run: false,
451 job_id: "test".into(),
452 working_dir: std::env::temp_dir(),
453 },
454 );
455 assert!(result.is_err(), "symlink should be rejected by O_NOFOLLOW");
456 std::fs::remove_file(&link_path).ok();
457 }
458 }
459 }
460}