Skip to main content

winx_code_agent/tools/
read_files.rs

1//! Implementation of the `ReadFiles` tool.
2//!
3//! This module provides the implementation for the `ReadFiles` tool, which is used
4//! to read and display the contents of files, optionally with line numbers and
5//! line range filtering.
6
7use sha2::{Digest, Sha256};
8use std::collections::HashMap;
9use std::fmt::Write as FmtWrite;
10use std::path::{Path, PathBuf};
11use std::sync::{Arc, OnceLock};
12use tokio::sync::Mutex;
13use tracing::{debug, error, info, instrument, warn};
14
15use crate::errors::{ErrorRecovery, Result, WinxError};
16use crate::state::bash_state::BashState;
17use crate::types::ReadFiles;
18use crate::utils::file_cache::FileCache;
19use crate::utils::mmap::read_file_to_string;
20use crate::utils::path::{expand_user, validate_path_in_workspace};
21
22/// Default token limits for file reading
23const CODING_MAX_TOKENS: usize = 24_000;
24const NONCODING_MAX_TOKENS: usize = 8_000;
25
26/// Type alias for file reading result
27type FileReadResult = (String, bool, usize, String, (usize, usize), String, usize);
28type ReadCoverage = (Vec<(usize, usize)>, String, usize);
29
30/// Maximum amount of data to read from a file
31const MAX_FILE_SIZE: u64 = 50_000_000;
32
33fn range_format(start_line_num: Option<usize>, end_line_num: Option<usize>) -> String {
34    let st = start_line_num.map_or(String::new(), |n| n.to_string());
35    let end = end_line_num.map_or(String::new(), |n| n.to_string());
36
37    if st.is_empty() && end.is_empty() {
38        String::new()
39    } else {
40        format!(":{st}-{end}")
41    }
42}
43
44#[instrument(level = "debug", skip(file_path))]
45async fn read_file(
46    file_path: &str,
47    max_tokens: Option<usize>,
48    cwd: &Path,
49    workspace_root: &Path,
50    show_line_numbers: bool,
51    start_line_num: Option<usize>,
52    end_line_num: Option<usize>,
53) -> Result<FileReadResult> {
54    let file_path_expanded = expand_user(file_path);
55    let path = if Path::new(&file_path_expanded).is_absolute() {
56        PathBuf::from(&file_path_expanded)
57    } else {
58        cwd.join(&file_path_expanded)
59    };
60
61    if !path.exists() {
62        return Err(WinxError::FileAccessError {
63            path: path.clone(),
64            message: "File does not exist".to_string(),
65        });
66    }
67
68    let path = match validate_path_in_workspace(&path, workspace_root) {
69        Ok(canonical) => canonical,
70        Err(security_err) => {
71            return Err(WinxError::PathSecurityError {
72                path: path.clone(),
73                message: security_err.to_string(),
74            });
75        }
76    };
77
78    if !path.is_file() {
79        return Err(WinxError::FileAccessError {
80            path: path.clone(),
81            message: "Path exists but is not a file".to_string(),
82        });
83    }
84
85    let content = read_file_to_string(&path, MAX_FILE_SIZE)?;
86    let file_hash = hash_content(&content);
87    let lines: Vec<&str> = content.lines().collect();
88    let total_lines = lines.len();
89
90    let start_idx = start_line_num.map_or(0, |n| n.saturating_sub(1).min(lines.len()));
91    let end_idx = end_line_num.map_or(lines.len(), |n| n.min(lines.len()));
92
93    if start_idx > lines.len() || (end_idx > 0 && start_idx > end_idx) {
94        return Err(ErrorRecovery::param_error(
95            "line_range",
96            &format!("Invalid line range for file with {} lines", lines.len()),
97        ));
98    }
99
100    let effective_start = start_line_num.unwrap_or(1);
101    let effective_end = end_line_num.unwrap_or(total_lines);
102
103    let filtered_lines =
104        if lines.is_empty() { &[] } else { &lines[start_idx..end_idx.min(lines.len())] };
105    let mut result_content = String::new();
106
107    if show_line_numbers {
108        for (i, line) in filtered_lines.iter().enumerate() {
109            let line_num = start_idx + i + 1;
110            let _ = writeln!(result_content, "{line_num} {line}");
111        }
112    } else {
113        for line in filtered_lines {
114            result_content.push_str(line);
115            result_content.push('\n');
116        }
117    }
118
119    let mut truncated = false;
120    let tokens_count = count_tokens(&result_content);
121    let max_tokens = max_tokens.unwrap_or_else(|| select_max_tokens(file_path));
122
123    if tokens_count > max_tokens {
124        truncate_to_token_budget(&mut result_content, max_tokens);
125        let _ = write!(
126            result_content,
127            "\n(...truncated) {tokens_count} tokens exceeded limit {max_tokens}."
128        );
129        truncated = true;
130    }
131
132    let canon_path = path.to_string_lossy().to_string();
133
134    Ok((
135        result_content,
136        truncated,
137        tokens_count,
138        canon_path,
139        (effective_start, effective_end.min(total_lines.max(1))),
140        file_hash,
141        total_lines,
142    ))
143}
144
145fn hash_content(content: &str) -> String {
146    let digest = Sha256::digest(content.as_bytes());
147    digest.iter().fold(String::with_capacity(digest.len() * 2), |mut hash, byte| {
148        let _ = write!(hash, "{byte:02x}");
149        hash
150    })
151}
152
153fn count_tokens(content: &str) -> usize {
154    crate::utils::encoder::count_tokens(content)
155}
156
157fn truncate_to_token_budget(content: &mut String, max_tokens: usize) {
158    let mut low = 0;
159    let mut high = content.chars().count();
160
161    while low < high {
162        let mid = (low + high).div_ceil(2);
163        let byte_idx = byte_index_for_char_count(content, mid);
164
165        if count_tokens(&content[..byte_idx]) <= max_tokens {
166            low = mid;
167        } else {
168            high = mid.saturating_sub(1);
169        }
170    }
171
172    let byte_idx = byte_index_for_char_count(content, low);
173    content.truncate(byte_idx);
174}
175
176fn byte_index_for_char_count(content: &str, char_count: usize) -> usize {
177    content.char_indices().nth(char_count).map_or(content.len(), |(idx, _)| idx)
178}
179
180fn select_max_tokens(file_path: &str) -> usize {
181    if is_source_code_file(file_path) {
182        CODING_MAX_TOKENS
183    } else {
184        NONCODING_MAX_TOKENS
185    }
186}
187
188fn is_source_code_file(file_path: &str) -> bool {
189    let path = Path::new(file_path);
190    let file_name = path.file_name().and_then(|name| name.to_str()).unwrap_or_default();
191    let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or_default();
192
193    matches!(file_name, "Makefile" | "Dockerfile" | "Jenkinsfile")
194        || matches!(
195            extension,
196            "py" | "pyx"
197                | "pyi"
198                | "pyw"
199                | "js"
200                | "jsx"
201                | "ts"
202                | "tsx"
203                | "mjs"
204                | "cjs"
205                | "html"
206                | "css"
207                | "scss"
208                | "sass"
209                | "less"
210                | "c"
211                | "h"
212                | "cpp"
213                | "cxx"
214                | "cc"
215                | "hpp"
216                | "java"
217                | "kt"
218                | "go"
219                | "rs"
220                | "rb"
221                | "php"
222                | "sh"
223                | "bash"
224                | "zsh"
225                | "sql"
226                | "xml"
227                | "json"
228                | "yaml"
229                | "yml"
230                | "toml"
231                | "md"
232                | "ex"
233                | "exs"
234        )
235}
236
237pub async fn handle_tool_call(
238    bash_state_arc: &Arc<Mutex<Option<BashState>>>,
239    read_files: ReadFiles,
240) -> Result<String> {
241    let (cwd, workspace_root) = {
242        let bash_state_guard = bash_state_arc.lock().await;
243        let bash_state = bash_state_guard.as_ref().ok_or(WinxError::BashStateNotInitialized)?;
244        (bash_state.cwd.clone(), bash_state.workspace_root.clone())
245    };
246
247    let mut message = String::new();
248    let cache = FileCache::global();
249    let mut file_ranges_dict: HashMap<String, ReadCoverage> = HashMap::new();
250
251    for (index, file_path) in read_files.file_paths.iter().enumerate() {
252        let clean_path = read_files.get_clean_path(index);
253        let start_line_num = read_files.start_line_nums.get(index).copied().flatten();
254        let end_line_num = read_files.end_line_nums.get(index).copied().flatten();
255
256        match read_file(
257            &clean_path,
258            Some(select_max_tokens(&clean_path)),
259            &cwd,
260            &workspace_root,
261            read_files.show_line_numbers(),
262            start_line_num,
263            end_line_num,
264        )
265        .await
266        {
267            Ok((content, truncated, _, canon_path, line_range, file_hash, total_lines)) => {
268                let entry = file_ranges_dict
269                    .entry(canon_path.clone())
270                    .or_insert_with(|| (Vec::new(), file_hash.clone(), total_lines));
271                entry.0.push(line_range);
272                entry.1 = file_hash;
273                entry.2 = total_lines;
274                let _ = write!(
275                    message,
276                    "\n{}{}\n```\n{content}\n```",
277                    clean_path,
278                    range_format(start_line_num, end_line_num)
279                );
280
281                let _ = cache.record_read_range(Path::new(&canon_path), line_range.0, line_range.1);
282                let _ = cache.record_file_hash(Path::new(&canon_path), &entry.1);
283                let _ = crate::utils::workspace_stats::record_read(
284                    &workspace_root,
285                    Path::new(&canon_path),
286                );
287
288                if truncated {
289                    break;
290                }
291            }
292            Err(e) => {
293                let _ = write!(message, "\nError reading {file_path}: {e}");
294            }
295        }
296    }
297
298    let mut bash_state_guard = bash_state_arc.lock().await;
299    if let Some(bash_state) = bash_state_guard.as_mut() {
300        for (path, (ranges, file_hash, total_lines)) in file_ranges_dict {
301            bash_state
302                .whitelist_for_overwrite
303                .entry(path)
304                .and_modify(|existing| {
305                    existing.file_hash.clone_from(&file_hash);
306                    existing.total_lines = total_lines;
307                    existing.line_ranges_read.extend(ranges.iter().copied());
308                })
309                .or_insert_with(|| {
310                    crate::state::bash_state::FileWhitelistData::new(file_hash, ranges, total_lines)
311                });
312        }
313    }
314
315    Ok(message)
316}