1use sha2::{Digest, Sha256};
8use std::collections::HashMap;
9use std::fmt::Write as FmtWrite;
10use std::path::{Path, PathBuf};
11use std::sync::{Arc, OnceLock};
12use tokio::sync::Mutex;
13use tracing::{debug, error, info, instrument, warn};
14
15use crate::errors::{ErrorRecovery, Result, WinxError};
16use crate::state::bash_state::BashState;
17use crate::types::ReadFiles;
18use crate::utils::file_cache::FileCache;
19use crate::utils::mmap::read_file_to_string;
20use crate::utils::path::{expand_user, validate_path_in_workspace};
21
22const CODING_MAX_TOKENS: usize = 24_000;
24const NONCODING_MAX_TOKENS: usize = 8_000;
25
26type FileReadResult = (String, bool, usize, String, (usize, usize), String, usize);
28type ReadCoverage = (Vec<(usize, usize)>, String, usize);
29
30const MAX_FILE_SIZE: u64 = 50_000_000;
32
33fn range_format(start_line_num: Option<usize>, end_line_num: Option<usize>) -> String {
34 let st = start_line_num.map_or(String::new(), |n| n.to_string());
35 let end = end_line_num.map_or(String::new(), |n| n.to_string());
36
37 if st.is_empty() && end.is_empty() {
38 String::new()
39 } else {
40 format!(":{st}-{end}")
41 }
42}
43
44#[instrument(level = "debug", skip(file_path))]
45async fn read_file(
46 file_path: &str,
47 max_tokens: Option<usize>,
48 cwd: &Path,
49 workspace_root: &Path,
50 show_line_numbers: bool,
51 start_line_num: Option<usize>,
52 end_line_num: Option<usize>,
53) -> Result<FileReadResult> {
54 let file_path_expanded = expand_user(file_path);
55 let path = if Path::new(&file_path_expanded).is_absolute() {
56 PathBuf::from(&file_path_expanded)
57 } else {
58 cwd.join(&file_path_expanded)
59 };
60
61 if !path.exists() {
62 return Err(WinxError::FileAccessError {
63 path: path.clone(),
64 message: "File does not exist".to_string(),
65 });
66 }
67
68 let path = match validate_path_in_workspace(&path, workspace_root) {
69 Ok(canonical) => canonical,
70 Err(security_err) => {
71 return Err(WinxError::PathSecurityError {
72 path: path.clone(),
73 message: security_err.to_string(),
74 });
75 }
76 };
77
78 if !path.is_file() {
79 return Err(WinxError::FileAccessError {
80 path: path.clone(),
81 message: "Path exists but is not a file".to_string(),
82 });
83 }
84
85 let content = read_file_to_string(&path, MAX_FILE_SIZE)?;
86 let file_hash = hash_content(&content);
87 let lines: Vec<&str> = content.lines().collect();
88 let total_lines = lines.len();
89
90 let start_idx = start_line_num.map_or(0, |n| n.saturating_sub(1).min(lines.len()));
91 let end_idx = end_line_num.map_or(lines.len(), |n| n.min(lines.len()));
92
93 if start_idx > lines.len() || start_idx > end_idx {
94 return Err(ErrorRecovery::param_error(
95 "line_range",
96 &format!("Invalid line range for file with {} lines", lines.len()),
97 ));
98 }
99
100 let effective_start = start_line_num.unwrap_or(1);
101 let effective_end = end_line_num.unwrap_or(total_lines);
102
103 let filtered_lines =
104 if lines.is_empty() { &[] } else { &lines[start_idx..end_idx.min(lines.len())] };
105 let mut result_content = String::new();
106
107 if show_line_numbers {
108 for (i, line) in filtered_lines.iter().enumerate() {
109 let line_num = start_idx + i + 1;
110 let _ = writeln!(result_content, "{line_num} {line}");
111 }
112 } else {
113 for line in filtered_lines {
114 result_content.push_str(line);
115 result_content.push('\n');
116 }
117 }
118
119 let mut truncated = false;
120 let tokens_count = count_tokens(&result_content);
121 let max_tokens = max_tokens.unwrap_or_else(|| select_max_tokens(file_path));
122
123 if tokens_count > max_tokens {
124 truncate_to_token_budget(&mut result_content, max_tokens);
125 let kept_lines = result_content.lines().count();
127 let last_shown = (start_idx + kept_lines).min(total_lines);
128 let resume_from = last_shown + 1;
129 let _ = write!(
130 result_content,
131 "\n(...truncated) Showing up to line {last_shown} of {total_lines} total lines \
132 ({tokens_count} tokens exceeded limit {max_tokens}). Continue reading from line \
133 {resume_from} using the syntax {file_path}:{resume_from}-{total_lines}"
134 );
135 truncated = true;
136 }
137
138 let canon_path = path.to_string_lossy().to_string();
139
140 Ok((
141 result_content,
142 truncated,
143 tokens_count,
144 canon_path,
145 (effective_start, effective_end.min(total_lines.max(1))),
146 file_hash,
147 total_lines,
148 ))
149}
150
151fn hash_content(content: &str) -> String {
152 let digest = Sha256::digest(content.as_bytes());
153 digest.iter().fold(String::with_capacity(digest.len() * 2), |mut hash, byte| {
154 let _ = write!(hash, "{byte:02x}");
155 hash
156 })
157}
158
159fn count_tokens(content: &str) -> usize {
160 crate::utils::encoder::count_tokens(content)
161}
162
163fn truncate_to_token_budget(content: &mut String, max_tokens: usize) {
164 let Some(ids) = crate::utils::encoder::encode_ids(content) else {
168 let byte_idx = byte_index_for_char_count(content, max_tokens);
170 content.truncate(byte_idx);
171 return;
172 };
173
174 if ids.len() <= max_tokens {
175 return;
176 }
177
178 if let Some(decoded) = crate::utils::encoder::decode_ids(&ids[..max_tokens]) {
179 *content = decoded;
180 } else {
181 let byte_idx = byte_index_for_char_count(content, max_tokens);
182 content.truncate(byte_idx);
183 }
184}
185
186fn byte_index_for_char_count(content: &str, char_count: usize) -> usize {
187 content.char_indices().nth(char_count).map_or(content.len(), |(idx, _)| idx)
188}
189
190fn select_max_tokens(file_path: &str) -> usize {
191 if is_source_code_file(file_path) {
192 CODING_MAX_TOKENS
193 } else {
194 NONCODING_MAX_TOKENS
195 }
196}
197
198fn is_source_code_file(file_path: &str) -> bool {
199 let path = Path::new(file_path);
200 let file_name = path.file_name().and_then(|name| name.to_str()).unwrap_or_default();
201 let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or_default();
202
203 matches!(file_name, "Makefile" | "Dockerfile" | "Jenkinsfile")
204 || matches!(
205 extension,
206 "py" | "pyx"
207 | "pyi"
208 | "pyw"
209 | "js"
210 | "jsx"
211 | "ts"
212 | "tsx"
213 | "mjs"
214 | "cjs"
215 | "html"
216 | "css"
217 | "scss"
218 | "sass"
219 | "less"
220 | "c"
221 | "h"
222 | "cpp"
223 | "cxx"
224 | "cc"
225 | "hpp"
226 | "java"
227 | "kt"
228 | "go"
229 | "rs"
230 | "rb"
231 | "php"
232 | "sh"
233 | "bash"
234 | "zsh"
235 | "sql"
236 | "xml"
237 | "json"
238 | "yaml"
239 | "yml"
240 | "toml"
241 | "md"
242 | "ex"
243 | "exs"
244 )
245}
246
247pub async fn handle_tool_call(
248 bash_state_arc: &Arc<Mutex<Option<BashState>>>,
249 read_files: ReadFiles,
250) -> Result<String> {
251 let (cwd, workspace_root) = {
252 let bash_state_guard = bash_state_arc.lock().await;
253 let bash_state = bash_state_guard.as_ref().ok_or(WinxError::BashStateNotInitialized)?;
254 (bash_state.cwd.clone(), bash_state.workspace_root.clone())
255 };
256
257 let mut message = String::new();
258 let cache = FileCache::global();
259 let mut file_ranges_dict: HashMap<String, ReadCoverage> = HashMap::new();
260
261 for (index, file_path) in read_files.file_paths.iter().enumerate() {
262 let clean_path = read_files.get_clean_path(index);
263 let start_line_num = read_files.start_line_nums.get(index).copied().flatten();
264 let end_line_num = read_files.end_line_nums.get(index).copied().flatten();
265
266 match read_file(
267 &clean_path,
268 Some(select_max_tokens(&clean_path)),
269 &cwd,
270 &workspace_root,
271 read_files.show_line_numbers(),
272 start_line_num,
273 end_line_num,
274 )
275 .await
276 {
277 Ok((content, truncated, _, canon_path, line_range, file_hash, total_lines)) => {
278 let entry = file_ranges_dict
279 .entry(canon_path.clone())
280 .or_insert_with(|| (Vec::new(), file_hash.clone(), total_lines));
281 entry.0.push(line_range);
282 entry.1 = file_hash;
283 entry.2 = total_lines;
284 let _ = write!(
285 message,
286 "\n{}{}\n```\n{content}\n```",
287 clean_path,
288 range_format(start_line_num, end_line_num)
289 );
290
291 let _ = cache.record_read_range(Path::new(&canon_path), line_range.0, line_range.1);
292 let _ = cache.record_file_hash(Path::new(&canon_path), &entry.1);
293 let _ = crate::utils::workspace_stats::record_read(
294 &workspace_root,
295 Path::new(&canon_path),
296 );
297
298 if truncated {
299 let remaining = read_files.file_paths.len().saturating_sub(index + 1);
300 if remaining > 0 {
301 let _ = write!(
302 message,
303 "\n\n(Not reading the remaining {remaining} file(s) due to the token \
304 limit. Call ReadFiles again for them.)"
305 );
306 }
307 break;
308 }
309 }
310 Err(e) => {
311 let _ = write!(message, "\nError reading {file_path}: {e}");
312 }
313 }
314 }
315
316 let mut bash_state_guard = bash_state_arc.lock().await;
317 if let Some(bash_state) = bash_state_guard.as_mut() {
318 for (path, (ranges, file_hash, total_lines)) in file_ranges_dict {
319 bash_state
320 .whitelist_for_overwrite
321 .entry(path)
322 .and_modify(|existing| {
323 existing.file_hash.clone_from(&file_hash);
324 existing.total_lines = total_lines;
325 existing.line_ranges_read.extend(ranges.iter().copied());
326 })
327 .or_insert_with(|| {
328 crate::state::bash_state::FileWhitelistData::new(file_hash, ranges, total_lines)
329 });
330 }
331 }
332
333 Ok(message)
334}