1use sha2::{Digest, Sha256};
8use std::collections::HashMap;
9use std::fmt::Write as FmtWrite;
10use std::fs;
11use std::path::{Path, PathBuf};
12use std::sync::{Arc, OnceLock};
13use tokio::sync::Mutex;
14use tracing::{debug, error, info, instrument, warn};
15
16use crate::errors::{ErrorRecovery, Result, WinxError};
17use crate::state::bash_state::BashState;
18use crate::types::ReadFiles;
19use crate::utils::file_cache::FileCache;
20use crate::utils::mmap::read_file_to_string;
21use crate::utils::path::{expand_user, validate_path_in_workspace};
22
23const CODING_MAX_TOKENS: usize = 24_000;
25const NONCODING_MAX_TOKENS: usize = 8_000;
26
27type FileReadResult = (String, bool, usize, String, (usize, usize));
29
30const MAX_FILE_SIZE: u64 = 50_000_000;
32
33fn range_format(start_line_num: Option<usize>, end_line_num: Option<usize>) -> String {
34 let st = start_line_num.map_or(String::new(), |n| n.to_string());
35 let end = end_line_num.map_or(String::new(), |n| n.to_string());
36
37 if st.is_empty() && end.is_empty() {
38 String::new()
39 } else {
40 format!(":{st}-{end}")
41 }
42}
43
44#[instrument(level = "debug", skip(file_path))]
45async fn read_file(
46 file_path: &str,
47 max_tokens: Option<usize>,
48 cwd: &Path,
49 workspace_root: &Path,
50 show_line_numbers: bool,
51 start_line_num: Option<usize>,
52 end_line_num: Option<usize>,
53) -> Result<FileReadResult> {
54 let file_path_expanded = expand_user(file_path);
55 let path = if Path::new(&file_path_expanded).is_absolute() {
56 PathBuf::from(&file_path_expanded)
57 } else {
58 cwd.join(&file_path_expanded)
59 };
60
61 if !path.exists() {
62 return Err(WinxError::FileAccessError {
63 path: path.clone(),
64 message: "File does not exist".to_string(),
65 });
66 }
67
68 let path = match validate_path_in_workspace(&path, workspace_root) {
69 Ok(canonical) => canonical,
70 Err(security_err) => {
71 return Err(WinxError::PathSecurityError {
72 path: path.clone(),
73 message: security_err.to_string(),
74 });
75 }
76 };
77
78 if !path.is_file() {
79 return Err(WinxError::FileAccessError {
80 path: path.clone(),
81 message: "Path exists but is not a file".to_string(),
82 });
83 }
84
85 let content = read_file_to_string(&path, MAX_FILE_SIZE)?;
86 let lines: Vec<&str> = content.lines().collect();
87 let total_lines = lines.len() + usize::from(content.ends_with('\n'));
88
89 let start_idx = start_line_num.map_or(0, |n| n.saturating_sub(1).min(lines.len()));
90 let end_idx = end_line_num.map_or(lines.len(), |n| n.min(lines.len()));
91
92 if start_idx > lines.len() || (end_idx > 0 && start_idx > end_idx) {
93 return Err(ErrorRecovery::param_error(
94 "line_range",
95 &format!("Invalid line range for file with {} lines", lines.len()),
96 ));
97 }
98
99 let effective_start = start_line_num.unwrap_or(1);
100 let effective_end = end_line_num.unwrap_or(total_lines);
101
102 let filtered_lines =
103 if lines.is_empty() { &[] } else { &lines[start_idx..end_idx.min(lines.len())] };
104 let mut result_content = String::new();
105
106 if show_line_numbers {
107 for (i, line) in filtered_lines.iter().enumerate() {
108 let line_num = start_idx + i + 1;
109 let _ = writeln!(result_content, "{line_num} {line}");
110 }
111 } else {
112 for line in filtered_lines {
113 result_content.push_str(line);
114 result_content.push('\n');
115 }
116 }
117
118 let mut truncated = false;
119 let tokens_count = count_tokens(&result_content);
120 let max_tokens = max_tokens.unwrap_or_else(|| select_max_tokens(file_path));
121
122 if tokens_count > max_tokens {
123 truncate_to_token_budget(&mut result_content, max_tokens);
124 let _ = write!(
125 result_content,
126 "\n(...truncated) {tokens_count} tokens exceeded limit {max_tokens}."
127 );
128 truncated = true;
129 }
130
131 let canon_path = path.to_string_lossy().to_string();
132
133 Ok((result_content, truncated, tokens_count, canon_path, (effective_start, effective_end)))
134}
135
136fn count_tokens(content: &str) -> usize {
137 static TOKENIZER: OnceLock<Option<tiktoken_rs::CoreBPE>> = OnceLock::new();
138
139 TOKENIZER.get_or_init(|| tiktoken_rs::cl100k_base().ok()).as_ref().map_or_else(
140 || estimate_tokens(content),
141 |encoder| encoder.encode_with_special_tokens(content).len(),
142 )
143}
144
145fn estimate_tokens(content: &str) -> usize {
146 content.chars().count().div_ceil(4).max(content.split_whitespace().count())
147}
148
149fn truncate_to_token_budget(content: &mut String, max_tokens: usize) {
150 let mut low = 0;
151 let mut high = content.chars().count();
152
153 while low < high {
154 let mid = (low + high).div_ceil(2);
155 let byte_idx = byte_index_for_char_count(content, mid);
156
157 if count_tokens(&content[..byte_idx]) <= max_tokens {
158 low = mid;
159 } else {
160 high = mid.saturating_sub(1);
161 }
162 }
163
164 let byte_idx = byte_index_for_char_count(content, low);
165 content.truncate(byte_idx);
166}
167
168fn byte_index_for_char_count(content: &str, char_count: usize) -> usize {
169 content.char_indices().nth(char_count).map_or(content.len(), |(idx, _)| idx)
170}
171
172fn select_max_tokens(file_path: &str) -> usize {
173 if is_source_code_file(file_path) {
174 CODING_MAX_TOKENS
175 } else {
176 NONCODING_MAX_TOKENS
177 }
178}
179
180fn is_source_code_file(file_path: &str) -> bool {
181 let path = Path::new(file_path);
182 let file_name = path.file_name().and_then(|name| name.to_str()).unwrap_or_default();
183 let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or_default();
184
185 matches!(file_name, "Makefile" | "Dockerfile" | "Jenkinsfile")
186 || matches!(
187 extension,
188 "py" | "pyx"
189 | "pyi"
190 | "pyw"
191 | "js"
192 | "jsx"
193 | "ts"
194 | "tsx"
195 | "mjs"
196 | "cjs"
197 | "html"
198 | "css"
199 | "scss"
200 | "sass"
201 | "less"
202 | "c"
203 | "h"
204 | "cpp"
205 | "cxx"
206 | "cc"
207 | "hpp"
208 | "java"
209 | "kt"
210 | "go"
211 | "rs"
212 | "rb"
213 | "php"
214 | "sh"
215 | "bash"
216 | "zsh"
217 | "sql"
218 | "xml"
219 | "json"
220 | "yaml"
221 | "yml"
222 | "toml"
223 | "md"
224 | "ex"
225 | "exs"
226 )
227}
228
229pub async fn handle_tool_call(
230 bash_state_arc: &Arc<Mutex<Option<BashState>>>,
231 read_files: ReadFiles,
232) -> Result<String> {
233 let (cwd, workspace_root) = {
234 let bash_state_guard = bash_state_arc.lock().await;
235 let bash_state = bash_state_guard.as_ref().ok_or(WinxError::BashStateNotInitialized)?;
236 (bash_state.cwd.clone(), bash_state.workspace_root.clone())
237 };
238
239 let mut message = String::new();
240 let cache = FileCache::global();
241 let mut file_ranges_dict: HashMap<String, Vec<(usize, usize)>> = HashMap::new();
242
243 for (index, file_path) in read_files.file_paths.iter().enumerate() {
244 let clean_path = read_files.get_clean_path(index);
245 let start_line_num = read_files.start_line_nums.get(index).copied().flatten();
246 let end_line_num = read_files.end_line_nums.get(index).copied().flatten();
247
248 match read_file(
249 &clean_path,
250 Some(select_max_tokens(&clean_path)),
251 &cwd,
252 &workspace_root,
253 read_files.show_line_numbers(),
254 start_line_num,
255 end_line_num,
256 )
257 .await
258 {
259 Ok((content, truncated, _, canon_path, line_range)) => {
260 file_ranges_dict.entry(canon_path.clone()).or_default().push(line_range);
261 let _ = write!(
262 message,
263 "\n{}{}\n```\n{content}\n```",
264 clean_path,
265 range_format(start_line_num, end_line_num)
266 );
267
268 let _ = cache.record_read_range(Path::new(&canon_path), line_range.0, line_range.1);
269
270 if truncated {
271 break;
272 }
273 }
274 Err(e) => {
275 let _ = write!(message, "\nError reading {file_path}: {e}");
276 }
277 }
278 }
279
280 let mut bash_state_guard = bash_state_arc.lock().await;
281 if let Some(bash_state) = bash_state_guard.as_mut() {
282 for (path, ranges) in file_ranges_dict {
283 let file_hash = cache.get_cached_hash(Path::new(&path)).unwrap_or_default();
284 let total_lines = cache
285 .get_unread_ranges(Path::new(&path))
286 .iter()
287 .map(|&(_, end)| end)
288 .max()
289 .unwrap_or(0);
290
291 bash_state.whitelist_for_overwrite.insert(
292 path.clone(),
293 crate::state::bash_state::FileWhitelistData::new(file_hash, ranges, total_lines),
294 );
295 }
296 }
297
298 Ok(message)
299}