use std::path::Path;
use async_trait::async_trait;
use serde_json::json;
use super::{suggest_similar_files, truncate_head, Tool, ToolContext, ToolOutput};
use crate::error::Result;
const MAX_BYTES: usize = 50_000;
const MAX_TEXT_BYTES: u64 = 5 * 1024 * 1024;
const MAX_IMAGE_BYTES: u64 = 10 * 1024 * 1024;
const IMAGE_EXTENSIONS: &[&str] = &["png", "jpg", "jpeg", "gif", "webp", "svg"];
pub struct ReadTool;
#[async_trait]
impl Tool for ReadTool {
fn name(&self) -> &str {
"read"
}
fn label(&self) -> &str {
"Read File"
}
fn description(&self) -> &str {
"Read a file with stable line-oriented output. Supports start_line/end_line ranges, anchors, and images."
}
fn parameters(&self) -> serde_json::Value {
json!({
"type": "object",
"properties": {
"path": { "type": "string" },
"start_line": {
"type": "integer",
"minimum": 1,
"description": "1-indexed first line to read."
},
"end_line": {
"type": "integer",
"minimum": 1,
"description": "1-indexed inclusive last line to read."
},
"anchors": {
"type": "boolean",
"description": "When true, include opaque per-line anchors for stale-safe anchored edits. Anchors are session-local integrity markers, not security tokens."
}
},
"required": ["path"]
})
}
fn is_readonly(&self) -> bool {
true
}
async fn execute(
&self,
_call_id: &str,
params: serde_json::Value,
ctx: ToolContext,
) -> Result<ToolOutput> {
let raw_path = params["path"]
.as_str()
.unwrap_or("")
.trim_start_matches('@');
if raw_path.is_empty() {
return Ok(ToolOutput::error("Missing required parameter: path"));
}
let path = super::resolve_path(&ctx.cwd, raw_path);
let range = parse_line_range(¶ms)?;
if !path.exists() {
let suggestions = suggest_similar_files(&ctx.cwd, raw_path);
let mut msg = format!("File not found: {}", path.display());
if !suggestions.is_empty() {
msg.push_str("\n\nDid you mean:");
for s in &suggestions {
msg.push_str(&format!("\n {s}"));
}
}
return Ok(ToolOutput::error(msg));
}
if path.is_dir() {
return Ok(ToolOutput::error(format!(
"Path is a directory, not a file: {}",
path.display()
)));
}
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
if IMAGE_EXTENSIONS.contains(&ext.to_lowercase().as_str()) {
return read_image(&path).await;
}
}
let metadata = tokio::fs::metadata(&path).await?;
if metadata.len() > MAX_TEXT_BYTES && range.is_none() {
return Ok(ToolOutput::error(format!(
"File is too large to read without a line range: {} ({} bytes). Use start_line/end_line to read a smaller range.",
path.display(),
metadata.len()
)));
}
let bytes = tokio::fs::read(&path).await?;
let check_len = bytes.len().min(8192);
if bytes[..check_len].contains(&0) {
return Ok(ToolOutput::error(format!(
"Binary file detected: {}. Cannot display binary content.",
path.display()
)));
}
let content = String::from_utf8_lossy(&bytes).into_owned();
let include_anchors = params["anchors"].as_bool().unwrap_or(false);
let sliced = apply_line_range(&content, range);
let start_line = range.map(|r| r.start).unwrap_or(1);
let requested_end_line = range.and_then(|r| r.end);
let total_file_lines = content.lines().count();
let line_ending = detect_line_ending(&content);
let max_lines = ctx.read_max_lines;
let result = if max_lines == 0 {
super::TruncationResult {
content: sliced.clone(),
truncated: false,
output_lines: sliced.lines().count(),
total_lines: sliced.lines().count(),
output_bytes: sliced.len(),
total_bytes: sliced.len(),
temp_file: None,
}
} else {
truncate_head(&sliced, max_lines, MAX_BYTES)
};
let mut output = result.content.clone();
let mut anchors_json = serde_json::Value::Null;
if include_anchors {
let visible_lines = result.content.lines().collect::<Vec<_>>();
let anchors = ctx.anchor_store.record_lines(
&path,
super::stable_hash(&content),
start_line,
&visible_lines,
);
anchors_json = json!(anchors
.iter()
.map(|anchor| json!({
"line": anchor.line,
"anchor": anchor.id,
"content_hash": format!("{:016x}", anchor.content_hash),
}))
.collect::<Vec<_>>());
if !anchors.is_empty() {
output.push_str("\n\nAnchors:");
for anchor in &anchors {
output.push_str(&format!("\n{:>6} {}", anchor.line, anchor.id));
}
}
}
if result.truncated {
let note = format!(
"\n[…truncated: showing {}/{} lines, {}/{} bytes",
result.output_lines, result.total_lines, result.output_bytes, result.total_bytes,
);
if let Some(ref tf) = result.temp_file {
output.push_str(&format!("{note}, full output: {}]", tf.display()));
} else {
output.push_str(&format!("{note}]"));
}
}
if let Ok(mut tracker) = ctx.file_tracker.lock() {
tracker.record_read(&path);
}
Ok(ToolOutput {
content: vec![imp_llm::ContentBlock::Text { text: output }],
details: json!({
"action": "read",
"path": path.display().to_string(),
"start_line": start_line,
"end_line": if result.output_lines == 0 { start_line.saturating_sub(1) } else { start_line + result.output_lines - 1 },
"requested_end_line": requested_end_line,
"truncated": result.truncated,
"lines": result.output_lines,
"total_lines": total_file_lines,
"range_total_lines": result.total_lines,
"bytes": result.output_bytes,
"total_bytes": metadata.len(),
"range_total_bytes": result.total_bytes,
"temp_file": result.temp_file.as_ref().map(|path| path.display().to_string()),
"encoding": "utf-8-lossy",
"line_ending": line_ending,
"anchors": anchors_json,
"anchor_count": anchors_json.as_array().map(|anchors| anchors.len()).unwrap_or(0),
}),
is_error: false,
})
}
}
#[derive(Clone, Copy)]
struct LineRange {
start: usize,
end: Option<usize>,
}
fn parse_line_range(params: &serde_json::Value) -> Result<Option<LineRange>> {
let start_line = parse_positive_usize(params.get("start_line"), "start_line")?;
let end_line = parse_positive_usize(params.get("end_line"), "end_line")?;
if let (Some(start), Some(end)) = (start_line, end_line) {
if start > end {
return Err(crate::error::Error::Tool(
"start_line must be <= end_line".to_string(),
));
}
}
Ok(match (start_line, end_line) {
(None, None) => None,
(Some(start), end) => Some(LineRange { start, end }),
(None, Some(end)) => Some(LineRange {
start: 1,
end: Some(end),
}),
})
}
fn parse_positive_usize(value: Option<&serde_json::Value>, field: &str) -> Result<Option<usize>> {
let Some(value) = value else {
return Ok(None);
};
if value.is_null() {
return Ok(None);
}
let Some(number) = value.as_u64() else {
return Err(crate::error::Error::Tool(format!(
"{field} must be a positive integer"
)));
};
if number == 0 {
return Err(crate::error::Error::Tool(format!("{field} must be >= 1")));
}
Ok(Some(number as usize))
}
fn apply_line_range(content: &str, range: Option<LineRange>) -> String {
let lines: Vec<&str> = content.lines().collect();
let start = range
.map(|range| range.start.saturating_sub(1))
.unwrap_or(0);
if start >= lines.len() {
return String::new();
}
let end = range
.and_then(|range| range.end)
.map(|end| end.min(lines.len()))
.unwrap_or(lines.len());
lines[start..end].join("\n")
}
fn detect_line_ending(content: &str) -> &'static str {
if content.contains("\r\n") {
"crlf"
} else if content.contains('\r') {
"cr"
} else {
"lf"
}
}
async fn read_image(path: &Path) -> Result<ToolOutput> {
let metadata = tokio::fs::metadata(path).await?;
if metadata.len() > MAX_IMAGE_BYTES {
return Ok(ToolOutput::error(format!(
"Image is too large to read: {} ({} bytes, max {} bytes)",
path.display(),
metadata.len(),
MAX_IMAGE_BYTES
)));
}
let bytes = tokio::fs::read(path).await?;
let ext = path
.extension()
.and_then(|e| e.to_str())
.unwrap_or("png")
.to_lowercase();
let media_type = match ext.as_str() {
"png" => "image/png",
"jpg" | "jpeg" => "image/jpeg",
"gif" => "image/gif",
"webp" => "image/webp",
"svg" => "image/svg+xml",
_ => "application/octet-stream",
};
use std::io::Write;
let mut encoded = Vec::new();
{
let mut encoder = base64_encoder(&mut encoded);
encoder.write_all(&bytes)?;
encoder.finish()?;
}
let data = String::from_utf8(encoded).unwrap_or_default();
Ok(ToolOutput {
content: vec![imp_llm::ContentBlock::Image {
media_type: media_type.to_string(),
data,
}],
details: json!({
"action": "read",
"path": path.display().to_string(),
"media_type": media_type,
"bytes": bytes.len(),
"total_bytes": metadata.len(),
}),
is_error: false,
})
}
fn base64_encoder(output: &mut Vec<u8>) -> Base64Writer<'_> {
Base64Writer {
output,
buffer: [0; 3],
buffer_len: 0,
}
}
const BASE64_CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
struct Base64Writer<'a> {
output: &'a mut Vec<u8>,
buffer: [u8; 3],
buffer_len: usize,
}
impl<'a> std::io::Write for Base64Writer<'a> {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
for &byte in buf {
self.buffer[self.buffer_len] = byte;
self.buffer_len += 1;
if self.buffer_len == 3 {
self.encode_block();
}
}
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
impl<'a> Base64Writer<'a> {
fn encode_block(&mut self) {
let b = &self.buffer;
self.output.push(BASE64_CHARS[(b[0] >> 2) as usize]);
self.output
.push(BASE64_CHARS[((b[0] & 0x03) << 4 | b[1] >> 4) as usize]);
self.output
.push(BASE64_CHARS[((b[1] & 0x0f) << 2 | b[2] >> 6) as usize]);
self.output.push(BASE64_CHARS[(b[2] & 0x3f) as usize]);
self.buffer_len = 0;
}
fn finish(self) -> std::io::Result<()> {
match self.buffer_len {
1 => {
let b = self.buffer[0];
self.output.push(BASE64_CHARS[(b >> 2) as usize]);
self.output.push(BASE64_CHARS[((b & 0x03) << 4) as usize]);
self.output.push(b'=');
self.output.push(b'=');
}
2 => {
let b0 = self.buffer[0];
let b1 = self.buffer[1];
self.output.push(BASE64_CHARS[(b0 >> 2) as usize]);
self.output
.push(BASE64_CHARS[((b0 & 0x03) << 4 | b1 >> 4) as usize]);
self.output.push(BASE64_CHARS[((b1 & 0x0f) << 2) as usize]);
self.output.push(b'=');
}
_ => {}
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tools::ToolContext;
use std::sync::Arc;
fn test_ctx(dir: &Path) -> ToolContext {
let (tx, _rx) = tokio::sync::mpsc::channel(16);
let (cmd_tx, _cmd_rx) = tokio::sync::mpsc::channel(16);
ToolContext {
cwd: dir.to_path_buf(),
cancelled: Arc::new(std::sync::atomic::AtomicBool::new(false)),
update_tx: tx,
command_tx: cmd_tx,
ui: Arc::new(crate::ui::NullInterface),
file_cache: Arc::new(crate::tools::FileCache::new()),
checkpoint_state: Arc::new(crate::tools::CheckpointState::new()),
file_tracker: Arc::new(std::sync::Mutex::new(crate::tools::FileTracker::new())),
anchor_store: Arc::new(crate::tools::AnchorStore::new()),
lua_tool_loader: None,
mode: crate::config::AgentMode::Full,
read_max_lines: 500,
turn_mana_review: Arc::new(std::sync::Mutex::new(
crate::mana_review::TurnManaReviewAccumulator::default(),
)),
config: Arc::new(crate::config::Config::default()),
run_policy: Default::default(),
supporting_provenance: Vec::new(),
}
}
#[tokio::test]
async fn read_known_file() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("hello.txt");
std::fs::write(&file, "line1\nline2\nline3\n").unwrap();
let tool = ReadTool;
let result = tool
.execute("c1", json!({"path": "hello.txt"}), test_ctx(dir.path()))
.await
.unwrap();
assert!(!result.is_error);
let text = extract_text(&result);
assert!(text.contains("line1"));
assert!(text.contains("line3"));
}
#[tokio::test]
async fn read_start_end_lines() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("data.txt");
std::fs::write(&file, "a\nb\nc\nd\ne\n").unwrap();
let tool = ReadTool;
let result = tool
.execute(
"c2",
json!({"path": "data.txt", "start_line": 2, "end_line": 3}),
test_ctx(dir.path()),
)
.await
.unwrap();
assert!(!result.is_error);
let text = extract_text(&result);
assert!(text.contains("b"));
assert!(text.contains("c"));
assert!(!text.contains("a"));
assert!(!text.contains("d"));
assert_eq!(result.details["start_line"], 2);
assert_eq!(result.details["end_line"], 3);
}
#[tokio::test]
async fn read_file_not_found_suggestions() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("hello.txt"), "hi").unwrap();
let tool = ReadTool;
let result = tool
.execute("c3", json!({"path": "helo.txt"}), test_ctx(dir.path()))
.await
.unwrap();
assert!(result.is_error);
let text = extract_text(&result);
assert!(text.contains("File not found"));
assert!(text.contains("hello.txt"));
}
#[tokio::test]
async fn read_binary_file_rejected() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("data.bin");
std::fs::write(&file, b"\x00\x01\x02\x03").unwrap();
let tool = ReadTool;
let result = tool
.execute("c4", json!({"path": "data.bin"}), test_ctx(dir.path()))
.await
.unwrap();
assert!(result.is_error);
assert!(extract_text(&result).contains("Binary file"));
}
#[tokio::test]
async fn read_strips_at_prefix() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("test.txt"), "content").unwrap();
let tool = ReadTool;
let result = tool
.execute("c5", json!({"path": "@test.txt"}), test_ctx(dir.path()))
.await
.unwrap();
assert!(!result.is_error);
assert!(extract_text(&result).contains("content"));
}
#[tokio::test]
async fn read_empty_file() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("empty.txt");
std::fs::write(&file, "").unwrap();
let tool = ReadTool;
let result = tool
.execute("c6", json!({"path": "empty.txt"}), test_ctx(dir.path()))
.await
.unwrap();
assert!(!result.is_error);
}
#[tokio::test]
async fn read_large_file_truncated() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("big.txt");
let mut content = String::new();
for i in 0..3000 {
content.push_str(&format!("line {i}\n"));
}
std::fs::write(&file, &content).unwrap();
let tool = ReadTool;
let result = tool
.execute("c7", json!({"path": "big.txt"}), test_ctx(dir.path()))
.await
.unwrap();
assert!(!result.is_error);
let text = extract_text(&result);
assert!(text.contains("truncated"));
assert!(text.contains("line 0"));
assert_eq!(result.details["truncated"], true);
}
#[tokio::test]
async fn read_respects_configured_line_limit() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("limited.txt");
let mut content = String::new();
for i in 0..800 {
content.push_str(&format!("line {i}\n"));
}
std::fs::write(&file, &content).unwrap();
let tool = ReadTool;
let mut ctx = test_ctx(dir.path());
ctx.read_max_lines = 500;
let result = tool
.execute("c7b", json!({"path": "limited.txt"}), ctx)
.await
.unwrap();
assert!(!result.is_error);
let text = extract_text(&result);
assert!(text.contains("truncated"));
assert!(text.contains("showing 500/800 lines"));
assert_eq!(result.details["lines"], 500);
assert_eq!(result.details["total_lines"], 800);
}
#[tokio::test]
async fn read_zero_line_limit_disables_line_truncation() {
let dir = tempfile::tempdir().unwrap();
let file = dir.path().join("unlimited.txt");
let mut content = String::new();
for i in 0..800 {
content.push_str(&format!("line {i}\n"));
}
std::fs::write(&file, &content).unwrap();
let tool = ReadTool;
let mut ctx = test_ctx(dir.path());
ctx.read_max_lines = 0;
let result = tool
.execute("c7c", json!({"path": "unlimited.txt"}), ctx)
.await
.unwrap();
assert!(!result.is_error);
let text = extract_text(&result);
assert!(!text.contains("truncated"));
assert!(text.contains("line 799"));
assert_eq!(result.details["truncated"], false);
assert_eq!(result.details["lines"], 800);
assert_eq!(result.details["total_lines"], 800);
assert!(result.details["path"]
.as_str()
.unwrap()
.contains("unlimited.txt"));
}
#[tokio::test]
async fn read_directory_error() {
let dir = tempfile::tempdir().unwrap();
let subdir = dir.path().join("subdir");
std::fs::create_dir(&subdir).unwrap();
let tool = ReadTool;
let result = tool
.execute("c8", json!({"path": "subdir"}), test_ctx(dir.path()))
.await;
if let Ok(output) = result {
assert!(output.is_error)
}
}
#[tokio::test]
async fn read_can_emit_line_anchors() {
let dir = tempfile::tempdir().unwrap();
std::fs::write(dir.path().join("anchored.txt"), "alpha\nbeta\ngamma\n").unwrap();
let tool = ReadTool;
let ctx = test_ctx(dir.path());
let result = tool
.execute(
"c-anchors",
json!({"path": "anchored.txt", "start_line": 2, "end_line": 2, "anchors": true}),
ctx.clone(),
)
.await
.unwrap();
assert!(!result.is_error);
let text = extract_text(&result);
assert!(text.contains("Anchors:"));
let anchors = result.details["anchors"].as_array().unwrap();
assert_eq!(anchors.len(), 1);
assert_eq!(anchors[0]["line"], 2);
let anchor = anchors[0]["anchor"].as_str().unwrap();
let path = dir.path().join("anchored.txt");
assert!(ctx.anchor_store.get(&path, anchor).is_some());
}
fn extract_text(output: &ToolOutput) -> String {
output
.content
.iter()
.filter_map(|b| match b {
imp_llm::ContentBlock::Text { text } => Some(text.as_str()),
_ => None,
})
.collect::<Vec<_>>()
.join("\n")
}
}