winx-code-agent 0.2.308

//! Implementation of the `ReadFiles` tool.
//!
//! This module provides the implementation for the `ReadFiles` tool, which is used
//! to read and display the contents of files, optionally with line numbers and
//! line range filtering.

use sha2::{Digest, Sha256};
use std::collections::HashMap;
use std::fmt::Write as FmtWrite;
use std::fs;
use std::path::{Path, PathBuf};
use std::sync::{Arc, OnceLock};
use tokio::sync::Mutex;
use tracing::{debug, error, info, instrument, warn};

use crate::errors::{ErrorRecovery, Result, WinxError};
use crate::state::bash_state::BashState;
use crate::types::ReadFiles;
use crate::utils::file_cache::FileCache;
use crate::utils::mmap::read_file_to_string;
use crate::utils::path::{expand_user, validate_path_in_workspace};

/// Default token limits for file reading
const CODING_MAX_TOKENS: usize = 24_000;
const NONCODING_MAX_TOKENS: usize = 8_000;

/// Type alias for file reading result
type FileReadResult = (String, bool, usize, String, (usize, usize));

/// Maximum amount of data to read from a file
const MAX_FILE_SIZE: u64 = 50_000_000;

fn range_format(start_line_num: Option<usize>, end_line_num: Option<usize>) -> String {
    let st = start_line_num.map_or(String::new(), |n| n.to_string());
    let end = end_line_num.map_or(String::new(), |n| n.to_string());

    if st.is_empty() && end.is_empty() {
        String::new()
    } else {
        format!(":{st}-{end}")
    }
}

#[instrument(level = "debug", skip(file_path))]
async fn read_file(
    file_path: &str,
    max_tokens: Option<usize>,
    cwd: &Path,
    workspace_root: &Path,
    show_line_numbers: bool,
    start_line_num: Option<usize>,
    end_line_num: Option<usize>,
) -> Result<FileReadResult> {
    let file_path_expanded = expand_user(file_path);
    let path = if Path::new(&file_path_expanded).is_absolute() {
        PathBuf::from(&file_path_expanded)
    } else {
        cwd.join(&file_path_expanded)
    };

    if !path.exists() {
        return Err(WinxError::FileAccessError {
            path: path.clone(),
            message: "File does not exist".to_string(),
        });
    }

    let path = match validate_path_in_workspace(&path, workspace_root) {
        Ok(canonical) => canonical,
        Err(security_err) => {
            return Err(WinxError::PathSecurityError {
                path: path.clone(),
                message: security_err.to_string(),
            });
        }
    };

    if !path.is_file() {
        return Err(WinxError::FileAccessError {
            path: path.clone(),
            message: "Path exists but is not a file".to_string(),
        });
    }

    let content = read_file_to_string(&path, MAX_FILE_SIZE)?;
    let lines: Vec<&str> = content.lines().collect();
    let total_lines = lines.len() + usize::from(content.ends_with('\n'));

    let start_idx = start_line_num.map_or(0, |n| n.saturating_sub(1).min(lines.len()));
    let end_idx = end_line_num.map_or(lines.len(), |n| n.min(lines.len()));

    if start_idx > lines.len() || (end_idx > 0 && start_idx > end_idx) {
        return Err(ErrorRecovery::param_error(
            "line_range",
            &format!("Invalid line range for file with {} lines", lines.len()),
        ));
    }

    let effective_start = start_line_num.unwrap_or(1);
    let effective_end = end_line_num.unwrap_or(total_lines);

    let filtered_lines =
        if lines.is_empty() { &[] } else { &lines[start_idx..end_idx.min(lines.len())] };
    let mut result_content = String::new();

    if show_line_numbers {
        for (i, line) in filtered_lines.iter().enumerate() {
            let line_num = start_idx + i + 1;
            let _ = writeln!(result_content, "{line_num} {line}");
        }
    } else {
        for line in filtered_lines {
            result_content.push_str(line);
            result_content.push('\n');
        }
    }

    let mut truncated = false;
    let tokens_count = count_tokens(&result_content);
    let max_tokens = max_tokens.unwrap_or_else(|| select_max_tokens(file_path));

    if tokens_count > max_tokens {
        truncate_to_token_budget(&mut result_content, max_tokens);
        let _ = write!(
            result_content,
            "\n(...truncated) {tokens_count} tokens exceeded limit {max_tokens}."
        );
        truncated = true;
    }

    let canon_path = path.to_string_lossy().to_string();

    Ok((result_content, truncated, tokens_count, canon_path, (effective_start, effective_end)))
}

fn count_tokens(content: &str) -> usize {
    static TOKENIZER: OnceLock<Option<tiktoken_rs::CoreBPE>> = OnceLock::new();

    TOKENIZER.get_or_init(|| tiktoken_rs::cl100k_base().ok()).as_ref().map_or_else(
        || estimate_tokens(content),
        |encoder| encoder.encode_with_special_tokens(content).len(),
    )
}

fn estimate_tokens(content: &str) -> usize {
    content.chars().count().div_ceil(4).max(content.split_whitespace().count())
}

fn truncate_to_token_budget(content: &mut String, max_tokens: usize) {
    let mut low = 0;
    let mut high = content.chars().count();

    while low < high {
        let mid = (low + high).div_ceil(2);
        let byte_idx = byte_index_for_char_count(content, mid);

        if count_tokens(&content[..byte_idx]) <= max_tokens {
            low = mid;
        } else {
            high = mid.saturating_sub(1);
        }
    }

    let byte_idx = byte_index_for_char_count(content, low);
    content.truncate(byte_idx);
}

fn byte_index_for_char_count(content: &str, char_count: usize) -> usize {
    content.char_indices().nth(char_count).map_or(content.len(), |(idx, _)| idx)
}

fn select_max_tokens(file_path: &str) -> usize {
    if is_source_code_file(file_path) {
        CODING_MAX_TOKENS
    } else {
        NONCODING_MAX_TOKENS
    }
}

fn is_source_code_file(file_path: &str) -> bool {
    let path = Path::new(file_path);
    let file_name = path.file_name().and_then(|name| name.to_str()).unwrap_or_default();
    let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or_default();

    matches!(file_name, "Makefile" | "Dockerfile" | "Jenkinsfile")
        || matches!(
            extension,
            "py" | "pyx"
                | "pyi"
                | "pyw"
                | "js"
                | "jsx"
                | "ts"
                | "tsx"
                | "mjs"
                | "cjs"
                | "html"
                | "css"
                | "scss"
                | "sass"
                | "less"
                | "c"
                | "h"
                | "cpp"
                | "cxx"
                | "cc"
                | "hpp"
                | "java"
                | "kt"
                | "go"
                | "rs"
                | "rb"
                | "php"
                | "sh"
                | "bash"
                | "zsh"
                | "sql"
                | "xml"
                | "json"
                | "yaml"
                | "yml"
                | "toml"
                | "md"
                | "ex"
                | "exs"
        )
}

pub async fn handle_tool_call(
    bash_state_arc: &Arc<Mutex<Option<BashState>>>,
    read_files: ReadFiles,
) -> Result<String> {
    let (cwd, workspace_root) = {
        let bash_state_guard = bash_state_arc.lock().await;
        let bash_state = bash_state_guard.as_ref().ok_or(WinxError::BashStateNotInitialized)?;
        (bash_state.cwd.clone(), bash_state.workspace_root.clone())
    };

    let mut message = String::new();
    let cache = FileCache::global();
    let mut file_ranges_dict: HashMap<String, Vec<(usize, usize)>> = HashMap::new();

    for (index, file_path) in read_files.file_paths.iter().enumerate() {
        let clean_path = read_files.get_clean_path(index);
        let start_line_num = read_files.start_line_nums.get(index).copied().flatten();
        let end_line_num = read_files.end_line_nums.get(index).copied().flatten();

        match read_file(
            &clean_path,
            Some(select_max_tokens(&clean_path)),
            &cwd,
            &workspace_root,
            read_files.show_line_numbers(),
            start_line_num,
            end_line_num,
        )
        .await
        {
            Ok((content, truncated, _, canon_path, line_range)) => {
                file_ranges_dict.entry(canon_path.clone()).or_default().push(line_range);
                let _ = write!(
                    message,
                    "\n{}{}\n```\n{content}\n```",
                    clean_path,
                    range_format(start_line_num, end_line_num)
                );

                let _ = cache.record_read_range(Path::new(&canon_path), line_range.0, line_range.1);

                if truncated {
                    break;
                }
            }
            Err(e) => {
                let _ = write!(message, "\nError reading {file_path}: {e}");
            }
        }
    }

    let mut bash_state_guard = bash_state_arc.lock().await;
    if let Some(bash_state) = bash_state_guard.as_mut() {
        for (path, ranges) in file_ranges_dict {
            let file_hash = cache.get_cached_hash(Path::new(&path)).unwrap_or_default();
            let total_lines = cache
                .get_unread_ranges(Path::new(&path))
                .iter()
                .map(|&(_, end)| end)
                .max()
                .unwrap_or(0);

            bash_state.whitelist_for_overwrite.insert(
                path.clone(),
                crate::state::bash_state::FileWhitelistData::new(file_hash, ranges, total_lines),
            );
        }
    }

    Ok(message)
}