Skip to main content

vtcode_core/tools/handlers/
read_file.rs

1use std::collections::VecDeque;
2use std::fmt::Write;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
6
7use anyhow::{Context, Result};
8use async_trait::async_trait;
9use futures::stream::{self, StreamExt};
10use serde::{Deserialize, Deserializer, Serialize};
11use serde_json::{Value, json};
12use tokio::fs::File;
13use tokio::io::{AsyncBufReadExt, BufReader};
14use tokio::sync::Semaphore;
15use vtcode_commons::diff_paths::looks_like_diff_content;
16
17use crate::tools::error_helpers::deserialize_tool_args;
18use crate::tools::traits::Tool;
19use crate::utils::serde_helpers::{deserialize_maybe_quoted, deserialize_opt_maybe_quoted};
20
21pub struct ReadFileHandler;
22
23const MAX_LINE_LENGTH: usize = 500;
24const TAB_WIDTH: usize = 4;
25const COMMENT_PREFIXES: &[&str] = &["#", "//", "--"];
26const MIN_BATCH_LIMIT: usize = 200;
27const DEFAULT_MAX_CONCURRENCY: usize = 8;
28const BATCH_CONDENSED_THRESHOLD: usize = 100;
29
30#[derive(Clone, Debug, PartialEq, Eq)]
31pub(crate) struct ReadFileOutcome {
32    pub content: String,
33    pub lines_read: usize,
34    pub has_more: bool,
35}
36
37/// JSON arguments accepted by the `read_file` tool handler.
38#[derive(Deserialize, Serialize, Clone, Debug)]
39pub struct ReadFileArgs {
40    /// Absolute path to the file that will be read.
41    #[serde(
42        alias = "path",
43        alias = "filepath",
44        alias = "target_path",
45        alias = "file"
46    )]
47    pub file_path: String,
48    /// 1-indexed line number to start reading from; defaults to 1.
49    #[serde(
50        default = "defaults::offset",
51        deserialize_with = "deserialize_maybe_quoted"
52    )]
53    pub offset: usize,
54    /// Maximum number of lines to return; defaults to 2000.
55    #[serde(
56        default = "defaults::limit",
57        deserialize_with = "deserialize_maybe_quoted"
58    )]
59    pub limit: usize,
60    /// Determines whether the handler reads a simple slice or indentation-aware block.
61    #[serde(default, deserialize_with = "deserialize_read_mode")]
62    pub mode: ReadMode,
63    /// Optional indentation configuration used when `mode` is `Indentation`.
64    #[serde(default, deserialize_with = "deserialize_indentation")]
65    pub indentation: Option<IndentationArgs>,
66    /// Optional token limit for response
67    #[serde(default, deserialize_with = "deserialize_opt_maybe_quoted")]
68    pub max_tokens: Option<usize>,
69    /// Whether to condense long outputs to head/tail.
70    #[serde(
71        default = "defaults::condense",
72        deserialize_with = "deserialize_maybe_quoted"
73    )]
74    pub condense: bool,
75}
76
77/// Batch read request for reading multiple files or ranges in parallel.
78#[derive(Deserialize, Serialize, Clone, Debug)]
79pub struct BatchReadArgs {
80    /// List of read requests to execute in parallel.
81    pub reads: Vec<BatchReadRequest>,
82    /// Maximum concurrent file reads (default: 8).
83    #[serde(default = "defaults::max_concurrency")]
84    pub max_concurrency: usize,
85    /// Whether to show progress in UI (default: true).
86    #[serde(default = "defaults::ui_progress")]
87    pub ui_progress: bool,
88}
89
90/// A single file read request within a batch.
91#[derive(Deserialize, Serialize, Clone, Debug)]
92pub struct BatchReadRequest {
93    /// Absolute path to the file to read.
94    pub file_path: String,
95    /// Single range to read (mutually exclusive with `ranges`).
96    #[serde(flatten)]
97    pub range: Option<ReadRange>,
98    /// Multiple ranges to read from the same file.
99    #[serde(default)]
100    pub ranges: Option<Vec<ReadRange>>,
101}
102
103/// A range specification for reading.
104#[derive(Deserialize, Serialize, Clone, Debug, Default)]
105pub struct ReadRange {
106    /// 1-indexed line number to start reading from; defaults to 1.
107    #[serde(
108        default = "defaults::offset",
109        deserialize_with = "deserialize_maybe_quoted"
110    )]
111    pub offset: usize,
112    /// Maximum number of lines to return; defaults to 500 for batch.
113    #[serde(
114        default = "defaults::batch_limit",
115        deserialize_with = "deserialize_maybe_quoted"
116    )]
117    pub limit: usize,
118    /// Read mode: slice or indentation.
119    #[serde(default, deserialize_with = "deserialize_read_mode")]
120    pub mode: ReadMode,
121    /// Indentation options when mode is indentation.
122    #[serde(default, deserialize_with = "deserialize_indentation")]
123    pub indentation: Option<IndentationArgs>,
124}
125
126/// Result for a single file read in batch mode.
127#[derive(Serialize, Clone, Debug)]
128pub struct BatchReadResult {
129    /// The file path that was read.
130    pub file_path: String,
131    /// Results for each range read.
132    pub ranges: Vec<RangeResult>,
133    /// Error if the entire file read failed.
134    #[serde(skip_serializing_if = "Option::is_none")]
135    pub error: Option<String>,
136}
137
138/// Result for a single range read.
139#[derive(Serialize, Clone, Debug)]
140pub struct RangeResult {
141    /// Starting line offset.
142    pub offset: usize,
143    /// Lines actually read.
144    pub lines_read: usize,
145    /// Whether content was condensed.
146    pub condensed: bool,
147    /// Number of lines omitted if condensed.
148    #[serde(skip_serializing_if = "Option::is_none")]
149    pub omitted_lines: Option<usize>,
150    /// The content read.
151    pub content: String,
152}
153
154/// Progress tracking for batch reads.
155#[derive(Clone)]
156pub struct BatchProgress {
157    /// Total number of files to read.
158    pub total_files: Arc<AtomicUsize>,
159    /// Number of files completed.
160    pub completed_files: Arc<AtomicUsize>,
161    /// Current file being read.
162    pub current_file: Arc<tokio::sync::RwLock<String>>,
163    /// Total bytes to read (estimated).
164    pub total_bytes: Arc<AtomicU64>,
165    /// Bytes read so far.
166    pub bytes_read: Arc<AtomicU64>,
167}
168
169impl BatchProgress {
170    pub fn new(total_files: usize) -> Self {
171        Self {
172            total_files: Arc::new(AtomicUsize::new(total_files)),
173            completed_files: Arc::new(AtomicUsize::new(0)),
174            current_file: Arc::new(tokio::sync::RwLock::new(String::new())),
175            total_bytes: Arc::new(AtomicU64::new(0)),
176            bytes_read: Arc::new(AtomicU64::new(0)),
177        }
178    }
179
180    pub async fn file_started(&self, file_path: &str) {
181        let mut current = self.current_file.write().await;
182        *current = file_path.to_string();
183    }
184
185    pub fn file_completed(&self) {
186        self.completed_files.fetch_add(1, Ordering::Relaxed);
187    }
188
189    pub fn add_bytes(&self, bytes: u64) {
190        self.bytes_read.fetch_add(bytes, Ordering::Relaxed);
191    }
192
193    pub fn progress_percent(&self) -> f64 {
194        let completed = self.completed_files.load(Ordering::Relaxed);
195        let total = self.total_files.load(Ordering::Relaxed);
196        if total == 0 {
197            100.0
198        } else {
199            (completed as f64 / total as f64) * 100.0
200        }
201    }
202
203    pub async fn status_line(&self) -> (String, String) {
204        let completed = self.completed_files.load(Ordering::Relaxed);
205        let total = self.total_files.load(Ordering::Relaxed);
206        let current = self.current_file.read().await;
207        let file_name = PathBuf::from(current.as_str())
208            .file_name()
209            .map(|n| n.to_string_lossy().to_string())
210            .unwrap_or_else(|| current.clone());
211
212        let left = format!("Reading {}/{}: {}", completed + 1, total, file_name);
213        let right = format!("{:.0}%", self.progress_percent());
214        (left, right)
215    }
216}
217
218#[derive(Deserialize, Serialize, Clone, Debug, Default)]
219#[serde(rename_all = "snake_case")]
220pub enum ReadMode {
221    #[default]
222    Slice,
223    Indentation,
224}
225
226/// Additional configuration for indentation-aware reads.
227#[derive(Deserialize, Serialize, Clone, Debug, Default)]
228pub struct IndentationArgs {
229    /// Optional explicit anchor line; defaults to `offset` when omitted.
230    #[serde(default, deserialize_with = "deserialize_opt_maybe_quoted")]
231    pub anchor_line: Option<usize>,
232    /// Maximum indentation depth to collect; `0` means unlimited.
233    #[serde(
234        default = "defaults::max_levels",
235        deserialize_with = "deserialize_maybe_quoted"
236    )]
237    pub max_levels: usize,
238    /// Whether to include sibling blocks at the same indentation level.
239    #[serde(default = "defaults::include_siblings")]
240    pub include_siblings: bool,
241    /// Whether to include header lines above the anchor block.
242    #[serde(default = "defaults::include_header")]
243    pub include_header: bool,
244    /// Optional hard cap on returned lines; defaults to the global `limit`.
245    #[serde(default, deserialize_with = "deserialize_opt_maybe_quoted")]
246    pub max_lines: Option<usize>,
247}
248
249fn deserialize_read_mode<'de, D>(deserializer: D) -> Result<ReadMode, D::Error>
250where
251    D: Deserializer<'de>,
252{
253    let value = Value::deserialize(deserializer)?;
254    match value {
255        Value::Null => Ok(ReadMode::Slice),
256        Value::String(raw) => {
257            let trimmed = raw.trim();
258            if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("slice") {
259                Ok(ReadMode::Slice)
260            } else if trimmed.eq_ignore_ascii_case("indentation") {
261                Ok(ReadMode::Indentation)
262            } else {
263                Err(serde::de::Error::custom(format!(
264                    "invalid read mode: {trimmed}"
265                )))
266            }
267        }
268        other => Err(serde::de::Error::custom(format!(
269            "invalid read mode type: {other}"
270        ))),
271    }
272}
273
274fn deserialize_indentation<'de, D>(deserializer: D) -> Result<Option<IndentationArgs>, D::Error>
275where
276    D: Deserializer<'de>,
277{
278    let value = Value::deserialize(deserializer)?;
279    match value {
280        Value::Null => Ok(None),
281        Value::Bool(true) => Ok(Some(IndentationArgs::default())),
282        Value::Bool(false) => Ok(None),
283        Value::String(raw) => {
284            let trimmed = raw.trim();
285            if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("false") {
286                Ok(None)
287            } else if trimmed.eq_ignore_ascii_case("true") {
288                Ok(Some(IndentationArgs::default()))
289            } else {
290                Err(serde::de::Error::custom(format!(
291                    "invalid indentation value: {trimmed}"
292                )))
293            }
294        }
295        Value::Object(_) => {
296            let args = IndentationArgs::deserialize(value).map_err(serde::de::Error::custom)?;
297            Ok(Some(args))
298        }
299        other => Err(serde::de::Error::custom(format!(
300            "invalid indentation type: {other}"
301        ))),
302    }
303}
304
305#[derive(Clone, Debug)]
306struct LineRecord {
307    number: usize,
308    raw: String,
309    display: String,
310    indent: usize,
311}
312
313impl LineRecord {
314    fn trimmed(&self) -> &str {
315        self.raw.trim_start()
316    }
317
318    fn is_blank(&self) -> bool {
319        self.trimmed().is_empty()
320    }
321
322    fn is_comment(&self) -> bool {
323        COMMENT_PREFIXES
324            .iter()
325            .any(|prefix| self.raw.trim().starts_with(prefix))
326    }
327}
328
329impl ReadFileHandler {
330    /// Execute a batch read of multiple files/ranges in parallel.
331    pub async fn handle_batch(&self, args: BatchReadArgs) -> Result<Value> {
332        if args.reads.is_empty() {
333            return Ok(json!({
334                "success": false,
335                "error": "No read requests provided"
336            }));
337        }
338
339        let progress = BatchProgress::new(args.reads.len());
340        let semaphore = Arc::new(Semaphore::new(args.max_concurrency.min(args.reads.len())));
341
342        let results: Vec<BatchReadResult> = stream::iter(args.reads)
343            .map(|req| {
344                let sem = semaphore.clone();
345                let prog = progress.clone();
346                async move {
347                    let _permit = sem.acquire().await.ok();
348                    prog.file_started(&req.file_path).await;
349                    let result = self.read_single_batch_request(&req).await;
350                    prog.file_completed();
351                    result
352                }
353            })
354            .buffer_unordered(args.max_concurrency)
355            .collect()
356            .await;
357
358        // Build concatenated content for token-efficient response
359        let mut content_parts = Vec::new();
360        let mut buf = String::new();
361        for result in &results {
362            if let Some(ref error) = result.error {
363                buf.clear();
364                let _ = write!(buf, "== {} (ERROR)\n{}", result.file_path, error);
365                content_parts.push(std::mem::take(&mut buf));
366            } else {
367                for range in &result.ranges {
368                    let end_line = range.offset + range.lines_read.saturating_sub(1);
369                    buf.clear();
370                    let _ = write!(
371                        buf,
372                        "== {} (L{}..L{})\n{}",
373                        result.file_path, range.offset, end_line, range.content
374                    );
375                    content_parts.push(std::mem::take(&mut buf));
376                }
377            }
378        }
379
380        let all_success = results.iter().all(|r| r.error.is_none());
381        Ok(json!({
382            "success": all_success,
383            "content": content_parts.join("\n\n"),
384            "items": results,
385            "files_read": results.len(),
386            "files_succeeded": results.iter().filter(|r| r.error.is_none()).count(),
387            "no_spool": true
388        }))
389    }
390
391    /// Read a single batch request (one file, possibly multiple ranges).
392    async fn read_single_batch_request(&self, req: &BatchReadRequest) -> BatchReadResult {
393        let path = PathBuf::from(&req.file_path);
394
395        // Validate path
396        if !path.is_absolute() {
397            return BatchReadResult {
398                file_path: req.file_path.clone(),
399                ranges: vec![],
400                error: Some("file_path must be an absolute path".to_string()),
401            };
402        }
403
404        // Determine ranges to read
405        let ranges_to_read: Vec<ReadRange> = if let Some(ref ranges) = req.ranges {
406            ranges.clone()
407        } else if let Some(ref range) = req.range {
408            vec![range.clone()]
409        } else {
410            vec![ReadRange::default()]
411        };
412
413        let mut range_results = Vec::new();
414        for range in ranges_to_read {
415            match self.read_range(&path, &range).await {
416                Ok(result) => range_results.push(result),
417                Err(e) => {
418                    return BatchReadResult {
419                        file_path: req.file_path.clone(),
420                        ranges: range_results,
421                        error: Some(e.to_string()),
422                    };
423                }
424            }
425        }
426
427        BatchReadResult {
428            file_path: req.file_path.clone(),
429            ranges: range_results,
430            error: None,
431        }
432    }
433
434    /// Read a single range from a file.
435    async fn read_range(&self, path: &Path, range: &ReadRange) -> Result<RangeResult> {
436        let offset = range.offset.max(1);
437        let limit = range.limit.max(1);
438
439        let mut collected = match range.mode {
440            ReadMode::Slice => slice::read(path, offset, limit).await?.lines,
441            ReadMode::Indentation => {
442                let indentation = range.indentation.clone().unwrap_or_default();
443                indentation::read_block(path, offset, limit, indentation).await?
444            }
445        };
446
447        let original_len = collected.len();
448        let (condensed, omitted) = condense_for_batch(&mut collected);
449
450        Ok(RangeResult {
451            offset,
452            lines_read: original_len,
453            condensed,
454            omitted_lines: (omitted > 0).then_some(omitted),
455            content: collected.join("\n"),
456        })
457    }
458
459    pub(crate) async fn handle_detailed(&self, args: ReadFileArgs) -> Result<ReadFileOutcome> {
460        let ReadFileArgs {
461            file_path,
462            offset,
463            limit,
464            mode,
465            indentation,
466            max_tokens,
467            condense,
468        } = args;
469
470        anyhow::ensure!(offset > 0, "offset must be a 1-indexed line number");
471        anyhow::ensure!(limit > 0, "limit must be greater than zero");
472
473        let path = PathBuf::from(&file_path);
474        anyhow::ensure!(path.is_absolute(), "file_path must be an absolute path");
475
476        let effective_limit =
477            if matches!(mode, ReadMode::Slice) && max_tokens.is_none() && limit < MIN_BATCH_LIMIT {
478                MIN_BATCH_LIMIT
479            } else {
480                limit
481            };
482
483        let (mut collected, has_more) = match mode {
484            ReadMode::Slice => {
485                let result = slice::read(&path, offset, effective_limit).await?;
486                (result.lines, result.has_more)
487            }
488            ReadMode::Indentation => {
489                let indentation = indentation.unwrap_or_default();
490                (
491                    indentation::read_block(&path, offset, limit, indentation).await?,
492                    false,
493                )
494            }
495        };
496        let lines_read = collected.len();
497
498        if condense {
499            // Condense large outputs (>100 lines) to head + tail
500            condense_collected_lines(&mut collected);
501        }
502
503        Ok(ReadFileOutcome {
504            content: collected.join("\n"),
505            lines_read,
506            has_more,
507        })
508    }
509
510    /// Legacy handle method for backward compatibility with file_ops.rs
511    pub async fn handle(&self, args: ReadFileArgs) -> Result<String> {
512        Ok(self.handle_detailed(args).await?.content)
513    }
514}
515
516#[async_trait]
517impl Tool for ReadFileHandler {
518    async fn execute(&self, args: Value) -> Result<Value> {
519        // Try batch mode first (has "reads" field)
520        if args.get("reads").is_some() {
521            let batch_args: BatchReadArgs = deserialize_tool_args(&args, "read_file")?;
522            return self.handle_batch(batch_args).await;
523        }
524
525        // Legacy single-file mode
526        let args: ReadFileArgs = deserialize_tool_args(&args, "read_file")?;
527
528        let file_path = args.file_path.clone();
529        let content = self.handle_detailed(args).await?.content;
530
531        Ok(json!({
532            "content": content,
533            "file_path": file_path,
534            "path": file_path,
535            "success": true,
536            "no_spool": true
537        }))
538    }
539
540    fn name(&self) -> &str {
541        "read_file"
542    }
543
544    fn description(&self) -> &str {
545        "Read file contents with optional line range, indentation-aware block selection, or batch multiple files"
546    }
547
548    fn parameter_schema(&self) -> Option<Value> {
549        Some(json!({
550            "type": "object",
551            "properties": {
552                "file_path": {
553                    "type": "string",
554                    "description": "Absolute path to the file to read (for single-file mode)"
555                },
556                "offset": {
557                    "type": "integer",
558                    "description": "1-indexed line number to start from (default: 1)",
559                    "default": 1,
560                    "minimum": 1
561                },
562                "limit": {
563                    "type": "integer",
564                    "description": "Maximum lines to return (default: 2000)",
565                    "default": 2000,
566                    "minimum": 1
567                },
568                "mode": {
569                    "type": "string",
570                    "enum": ["slice", "indentation"],
571                    "description": "Read mode: slice for simple range, indentation for block",
572                    "default": "slice"
573                },
574                "indentation": {
575                    "description": "Indentation settings when mode=indentation",
576                    "anyOf": [
577                        {"type": "boolean"},
578                        {
579                            "type": "object",
580                            "properties": {
581                                "anchor_line": {
582                                    "type": "integer",
583                                    "description": "Line number to anchor on (defaults to offset)"
584                                },
585                                "max_levels": {
586                                    "type": "integer",
587                                    "description": "Max indentation depth (0=unlimited)",
588                                    "default": 0
589                                },
590                                "include_siblings": {
591                                    "type": "boolean",
592                                    "description": "Include sibling blocks",
593                                    "default": false
594                                },
595                                "include_header": {
596                                    "type": "boolean",
597                                    "description": "Include header lines above anchor",
598                                    "default": true
599                                },
600                                "max_lines": {
601                                    "type": "integer",
602                                    "description": "Hard cap on returned lines"
603                                }
604                            }
605                        }
606                    ]
607                },
608                "max_tokens": {
609                    "type": "integer",
610                    "description": "Optional token limit for response (approximate)"
611                },
612                "condense": {
613                    "type": "boolean",
614                    "description": "Condense long outputs to head/tail (default: true)",
615                    "default": true
616                },
617                "reads": {
618                    "type": "array",
619                    "description": "Batch mode: array of file read requests to execute in parallel",
620                    "items": {
621                        "type": "object",
622                        "properties": {
623                            "file_path": {
624                                "type": "string",
625                                "description": "Absolute path to the file"
626                            },
627                            "offset": {
628                                "type": "integer",
629                                "description": "1-indexed start line (default: 1)"
630                            },
631                            "limit": {
632                                "type": "integer",
633                                "description": "Max lines to return (default: 500 for batch)"
634                            },
635                            "ranges": {
636                                "type": "array",
637                                "description": "Multiple ranges from the same file",
638                                "items": {
639                                    "type": "object",
640                                    "properties": {
641                                        "offset": { "type": "integer" },
642                                        "limit": { "type": "integer" },
643                                        "mode": { "type": "string", "enum": ["slice", "indentation"] }
644                                    }
645                                }
646                            }
647                        },
648                        "required": ["file_path"]
649                    }
650                },
651                "max_concurrency": {
652                    "type": "integer",
653                    "description": "Batch mode: max concurrent file reads (default: 8)",
654                    "default": 8
655                }
656            }
657        }))
658    }
659}
660
661mod slice {
662    use super::*;
663
664    #[derive(Clone, Debug, PartialEq, Eq)]
665    pub(super) struct SliceReadResult {
666        pub lines: Vec<String>,
667        pub has_more: bool,
668    }
669
670    pub async fn read(path: &Path, offset: usize, limit: usize) -> Result<SliceReadResult> {
671        let file = File::open(path)
672            .await
673            .context(format!("failed to open file: {}", path.display()))?;
674
675        let mut reader = BufReader::new(file);
676        let mut collected = Vec::new();
677        let mut seen = 0usize;
678        let mut buffer = Vec::new();
679        let mut reached_eof = false;
680
681        loop {
682            buffer.clear();
683            let bytes_read = reader
684                .read_until(b'\n', &mut buffer)
685                .await
686                .context("failed to read file")?;
687
688            if bytes_read == 0 {
689                reached_eof = true;
690                break;
691            }
692
693            // Strip newline characters
694            if buffer.last() == Some(&b'\n') {
695                buffer.pop();
696                if buffer.last() == Some(&b'\r') {
697                    buffer.pop();
698                }
699            }
700
701            seen += 1;
702
703            if seen < offset {
704                continue;
705            }
706
707            if collected.len() >= limit {
708                break;
709            }
710
711            let formatted = format_line(&buffer);
712            collected.push(formatted);
713        }
714
715        if seen < offset {
716            anyhow::bail!("offset exceeds file length");
717        }
718
719        Ok(SliceReadResult {
720            lines: collected,
721            has_more: !reached_eof,
722        })
723    }
724}
725
726mod indentation {
727    use super::*;
728
729    pub async fn read_block(
730        path: &Path,
731        offset: usize,
732        limit: usize,
733        options: IndentationArgs,
734    ) -> Result<Vec<String>> {
735        let anchor_line = options.anchor_line.unwrap_or(offset);
736        anyhow::ensure!(
737            anchor_line > 0,
738            "anchor_line must be a 1-indexed line number"
739        );
740
741        let guard_limit = options.max_lines.unwrap_or(limit);
742        anyhow::ensure!(guard_limit > 0, "max_lines must be greater than zero");
743
744        let collected = collect_file_lines(path).await?;
745        anyhow::ensure!(
746            !collected.is_empty() && anchor_line <= collected.len(),
747            "anchor_line exceeds file length"
748        );
749
750        let anchor_index = anchor_line - 1;
751        let effective_indents = compute_effective_indents(&collected);
752        let anchor_indent = effective_indents[anchor_index];
753
754        // Compute the min indent
755        let min_indent = if options.max_levels == 0 {
756            0
757        } else {
758            anchor_indent.saturating_sub(options.max_levels * TAB_WIDTH)
759        };
760
761        // Cap requested lines by guard_limit and file length
762        let final_limit = limit.min(guard_limit).min(collected.len());
763
764        if final_limit == 1 {
765            return Ok(vec![format!(
766                "{}: {}",
767                collected[anchor_index].number, collected[anchor_index].display
768            )]);
769        }
770
771        // Bidirectional cursors
772        // SAFETY: `i` starts as `anchor_index - 1` (isize) and is guarded by `i >= 0` before being cast to usize.
773        let mut i: isize = anchor_index as isize - 1; // up
774        let mut j: usize = anchor_index + 1; // down
775        let mut i_counter_min_indent = 0;
776        let mut j_counter_min_indent = 0;
777
778        let mut out = VecDeque::with_capacity(limit);
779        out.push_back(&collected[anchor_index]);
780
781        while out.len() < final_limit {
782            let mut progressed = 0;
783
784            // Expand upward
785            if i >= 0 {
786                let iu = i as usize;
787                if effective_indents[iu] >= min_indent {
788                    out.push_front(&collected[iu]);
789                    progressed += 1;
790                    i -= 1;
791
792                    // Control sibling inclusion
793                    if effective_indents[iu] == min_indent && !options.include_siblings {
794                        let allow_header_comment =
795                            options.include_header && collected[iu].is_comment();
796                        let can_take_line = allow_header_comment || i_counter_min_indent == 0;
797
798                        if can_take_line {
799                            i_counter_min_indent += 1;
800                        } else {
801                            out.pop_front();
802                            progressed -= 1;
803                            i = -1;
804                        }
805                    }
806
807                    if out.len() >= final_limit {
808                        break;
809                    }
810                } else {
811                    i = -1;
812                }
813            }
814
815            // Expand downward
816            if j < collected.len() {
817                let ju = j;
818                if effective_indents[ju] >= min_indent {
819                    out.push_back(&collected[ju]);
820                    progressed += 1;
821                    j += 1;
822
823                    // Control sibling inclusion
824                    if effective_indents[ju] == min_indent && !options.include_siblings {
825                        if j_counter_min_indent > 0 {
826                            out.pop_back();
827                            progressed -= 1;
828                            j = collected.len();
829                        }
830                        j_counter_min_indent += 1;
831                    }
832                } else {
833                    j = collected.len();
834                }
835            }
836
837            if progressed == 0 {
838                break;
839            }
840        }
841
842        trim_empty_lines(&mut out);
843
844        Ok(out
845            .into_iter()
846            .map(|record| format!("{}: {}", record.number, record.display))
847            .collect())
848    }
849
850    async fn collect_file_lines(path: &Path) -> Result<Vec<LineRecord>> {
851        let file = File::open(path)
852            .await
853            .context(format!("failed to open file: {}", path.display()))?;
854
855        let mut reader = BufReader::new(file);
856        let mut buffer = Vec::new();
857        let mut lines = Vec::new();
858        let mut number = 0usize;
859
860        loop {
861            buffer.clear();
862            let bytes_read = reader
863                .read_until(b'\n', &mut buffer)
864                .await
865                .context("failed to read file")?;
866
867            if bytes_read == 0 {
868                break;
869            }
870
871            if buffer.last() == Some(&b'\n') {
872                buffer.pop();
873                if buffer.last() == Some(&b'\r') {
874                    buffer.pop();
875                }
876            }
877
878            number += 1;
879            let raw = String::from_utf8_lossy(&buffer).into_owned();
880            let indent = measure_indent(&raw);
881            let display = format_line(&buffer);
882            lines.push(LineRecord {
883                number,
884                raw,
885                display,
886                indent,
887            });
888        }
889
890        Ok(lines)
891    }
892
893    fn compute_effective_indents(records: &[LineRecord]) -> Vec<usize> {
894        let mut effective = Vec::with_capacity(records.len());
895        let mut previous_indent = 0usize;
896        for record in records {
897            if record.is_blank() {
898                effective.push(previous_indent);
899            } else {
900                previous_indent = record.indent;
901                effective.push(previous_indent);
902            }
903        }
904        effective
905    }
906
907    fn measure_indent(line: &str) -> usize {
908        line.chars()
909            .take_while(|c| matches!(c, ' ' | '\t'))
910            .map(|c| if c == '\t' { TAB_WIDTH } else { 1 })
911            .sum()
912    }
913}
914
915fn format_line(bytes: &[u8]) -> String {
916    let decoded = String::from_utf8_lossy(bytes);
917    if decoded.len() > MAX_LINE_LENGTH {
918        take_bytes_at_char_boundary(&decoded, MAX_LINE_LENGTH).to_string()
919    } else {
920        decoded.into_owned()
921    }
922}
923
924fn take_bytes_at_char_boundary(s: &str, limit: usize) -> &str {
925    if limit >= s.len() {
926        return s;
927    }
928    let mut i = limit;
929    while i > 0 && !s.is_char_boundary(i) {
930        i -= 1;
931    }
932    &s[..i]
933}
934
935fn trim_empty_lines(out: &mut VecDeque<&LineRecord>) {
936    while matches!(out.front(), Some(line) if line.raw.trim().is_empty()) {
937        out.pop_front();
938    }
939    while matches!(out.back(), Some(line) if line.raw.trim().is_empty()) {
940        out.pop_back();
941    }
942}
943
944fn condense_collected_lines(lines: &mut Vec<String>) {
945    if looks_like_diff_lines(lines) {
946        return;
947    }
948    const CONDENSED_THRESHOLD: usize = 200;
949    const HEAD_LINES: usize = 80;
950    const TAIL_LINES: usize = 40;
951
952    // If under threshold, return as-is
953    if lines.len() <= CONDENSED_THRESHOLD {
954        return;
955    }
956
957    // Build condensed output: head + omission indicator + tail
958    let head_count = HEAD_LINES.min(lines.len());
959    let tail_count = TAIL_LINES.min(lines.len() - head_count);
960    let omitted_count = lines.len() - head_count - tail_count;
961    let total_lines = lines.len();
962
963    // Take head lines
964    let mut condensed: Vec<String> = lines[..head_count].to_vec();
965
966    // Add omission indicator with actionable guidance
967    condensed.push(format!(
968        "… [+{} lines omitted ({} total). To read omitted ranges: unified_file offset={} limit={}]",
969        omitted_count,
970        total_lines,
971        head_count + 1,
972        omitted_count
973    ));
974
975    // Add tail lines
976    let tail_start = lines.len() - tail_count;
977    condensed.extend_from_slice(&lines[tail_start..]);
978
979    // Replace original with condensed
980    *lines = condensed;
981}
982
983/// Condense lines for batch mode with stricter threshold.
984/// Returns (was_condensed, omitted_count).
985fn condense_for_batch(lines: &mut Vec<String>) -> (bool, usize) {
986    if looks_like_diff_lines(lines) {
987        return (false, 0);
988    }
989    const HEAD_LINES: usize = 15;
990    const TAIL_LINES: usize = 5;
991
992    if lines.len() <= BATCH_CONDENSED_THRESHOLD {
993        return (false, 0);
994    }
995
996    let head_count = HEAD_LINES.min(lines.len());
997    let tail_count = TAIL_LINES.min(lines.len() - head_count);
998    let omitted_count = lines.len() - head_count - tail_count;
999
1000    let mut condensed: Vec<String> = lines[..head_count].to_vec();
1001    condensed.push(format!(
1002        "… [+{} lines omitted ({} total). To read omitted ranges: unified_file offset={} limit={}]",
1003        omitted_count,
1004        lines.len(),
1005        head_count + 1,
1006        omitted_count
1007    ));
1008
1009    let tail_start = lines.len() - tail_count;
1010    condensed.extend_from_slice(&lines[tail_start..]);
1011
1012    *lines = condensed;
1013    (true, omitted_count)
1014}
1015
1016fn looks_like_diff_lines(lines: &[String]) -> bool {
1017    let joined = lines.join("\n");
1018    looks_like_diff_content(&joined)
1019}
1020
1021mod defaults {
1022    pub fn offset() -> usize {
1023        1
1024    }
1025
1026    pub fn limit() -> usize {
1027        2000
1028    }
1029
1030    pub fn batch_limit() -> usize {
1031        500
1032    }
1033
1034    pub fn max_concurrency() -> usize {
1035        super::DEFAULT_MAX_CONCURRENCY
1036    }
1037
1038    pub fn ui_progress() -> bool {
1039        true
1040    }
1041
1042    pub fn max_levels() -> usize {
1043        0
1044    }
1045
1046    pub fn include_siblings() -> bool {
1047        false
1048    }
1049
1050    pub fn include_header() -> bool {
1051        true
1052    }
1053
1054    pub fn condense() -> bool {
1055        true
1056    }
1057}
1058
1059#[cfg(test)]
1060mod tests {
1061    #[expect(unused_imports)]
1062    use super::indentation::*;
1063    use super::slice::*;
1064    use super::*;
1065    use std::io::Write;
1066    use tempfile::NamedTempFile;
1067
1068    #[tokio::test]
1069    async fn reads_requested_range() -> Result<()> {
1070        let mut temp = NamedTempFile::new()?;
1071        writeln!(temp, "alpha")?;
1072        writeln!(temp, "beta")?;
1073        writeln!(temp, "gamma")?;
1074
1075        let lines = read(temp.path(), 2, 2).await?.lines;
1076        assert_eq!(lines, vec!["beta".to_string(), "gamma".to_string()]);
1077        Ok(())
1078    }
1079
1080    #[test]
1081    fn read_file_args_accepts_boolean_indentation() {
1082        let args = json!({
1083            "file_path": "/tmp/example.txt",
1084            "mode": "slice",
1085            "indentation": false
1086        });
1087
1088        let parsed: ReadFileArgs = serde_json::from_value(args).unwrap();
1089        assert!(matches!(parsed.mode, ReadMode::Slice));
1090        assert!(parsed.indentation.is_none());
1091    }
1092
1093    #[test]
1094    fn read_file_args_accepts_true_indentation() {
1095        let args = json!({
1096            "file_path": "/tmp/example.txt",
1097            "mode": "indentation",
1098            "indentation": true
1099        });
1100
1101        let parsed: ReadFileArgs = serde_json::from_value(args).unwrap();
1102        assert!(matches!(parsed.mode, ReadMode::Indentation));
1103        assert!(parsed.indentation.is_some());
1104    }
1105
1106    #[test]
1107    fn read_file_args_accepts_empty_mode() {
1108        let args = json!({
1109            "file_path": "/tmp/example.txt",
1110            "mode": ""
1111        });
1112
1113        let parsed: ReadFileArgs = serde_json::from_value(args).unwrap();
1114        assert!(matches!(parsed.mode, ReadMode::Slice));
1115    }
1116
1117    #[tokio::test]
1118    async fn read_file_handler_skips_condense_when_disabled() -> Result<()> {
1119        let mut temp = NamedTempFile::new()?;
1120        for idx in 0..60 {
1121            writeln!(temp, "line-{idx}")?;
1122        }
1123
1124        let args = ReadFileArgs {
1125            file_path: temp.path().to_string_lossy().to_string(),
1126            offset: 1,
1127            limit: 2000,
1128            mode: ReadMode::Slice,
1129            indentation: None,
1130            max_tokens: None,
1131            condense: false,
1132        };
1133        let handler = ReadFileHandler;
1134        let content = handler.handle(args).await?;
1135
1136        assert!(!content.contains("lines omitted"));
1137        assert_eq!(content.lines().count(), 60);
1138        Ok(())
1139    }
1140
1141    #[tokio::test]
1142    async fn errors_when_offset_exceeds_length() {
1143        let mut temp = NamedTempFile::new().unwrap();
1144        writeln!(temp, "only").unwrap();
1145
1146        let err = read(temp.path(), 3, 1).await;
1147        err.unwrap_err();
1148    }
1149
1150    #[tokio::test]
1151    async fn reads_non_utf8_lines() -> Result<()> {
1152        let mut temp = NamedTempFile::new()?;
1153        temp.as_file_mut().write_all(b"\xff\xfe\nplain\n")?;
1154
1155        let lines = read(temp.path(), 1, 2).await?.lines;
1156        let expected_first = format!("{}{}", '\u{FFFD}', '\u{FFFD}');
1157        assert_eq!(lines, vec![expected_first, "plain".to_string()]);
1158        Ok(())
1159    }
1160
1161    #[tokio::test]
1162    async fn trims_crlf_endings() -> Result<()> {
1163        let mut temp = NamedTempFile::new()?;
1164        write!(temp, "one\r\ntwo\r\n")?;
1165
1166        let lines = read(temp.path(), 1, 2).await?.lines;
1167        assert_eq!(lines, vec!["one".to_string(), "two".to_string()]);
1168        Ok(())
1169    }
1170
1171    #[tokio::test]
1172    async fn respects_limit_even_with_more_lines() -> Result<()> {
1173        let mut temp = NamedTempFile::new()?;
1174        writeln!(temp, "first")?;
1175        writeln!(temp, "second")?;
1176        writeln!(temp, "third")?;
1177
1178        let result = read(temp.path(), 1, 2).await?;
1179        assert_eq!(
1180            result.lines,
1181            vec!["first".to_string(), "second".to_string()]
1182        );
1183        assert!(result.has_more);
1184        Ok(())
1185    }
1186
1187    #[tokio::test]
1188    async fn reads_exact_limit_without_continuation_at_eof() -> Result<()> {
1189        let mut temp = NamedTempFile::new()?;
1190        writeln!(temp, "first")?;
1191        writeln!(temp, "second")?;
1192
1193        let result = read(temp.path(), 1, 2).await?;
1194        assert_eq!(
1195            result.lines,
1196            vec!["first".to_string(), "second".to_string()]
1197        );
1198        assert!(!result.has_more);
1199        Ok(())
1200    }
1201
1202    #[tokio::test]
1203    async fn truncates_lines_longer_than_max_length() -> Result<()> {
1204        let mut temp = NamedTempFile::new()?;
1205        let long_line = "x".repeat(MAX_LINE_LENGTH + 50);
1206        writeln!(temp, "{long_line}")?;
1207
1208        let lines = read(temp.path(), 1, 1).await?.lines;
1209        let expected = "x".repeat(MAX_LINE_LENGTH);
1210        assert_eq!(lines, vec![expected]);
1211        Ok(())
1212    }
1213
1214    #[tokio::test]
1215    async fn batch_reads_multiple_files() -> Result<()> {
1216        let mut temp1 = NamedTempFile::new()?;
1217        writeln!(temp1, "file1_line1")?;
1218        writeln!(temp1, "file1_line2")?;
1219
1220        let mut temp2 = NamedTempFile::new()?;
1221        writeln!(temp2, "file2_line1")?;
1222        writeln!(temp2, "file2_line2")?;
1223
1224        let handler = ReadFileHandler;
1225        let args = BatchReadArgs {
1226            reads: vec![
1227                BatchReadRequest {
1228                    file_path: temp1.path().to_string_lossy().to_string(),
1229                    range: None,
1230                    ranges: None,
1231                },
1232                BatchReadRequest {
1233                    file_path: temp2.path().to_string_lossy().to_string(),
1234                    range: None,
1235                    ranges: None,
1236                },
1237            ],
1238            max_concurrency: 2,
1239            ui_progress: false,
1240        };
1241
1242        let result = handler.handle_batch(args).await?;
1243        assert_eq!(result["success"], true);
1244        assert_eq!(result["files_read"], 2);
1245        assert_eq!(result["files_succeeded"], 2);
1246
1247        let content = result["content"].as_str().unwrap();
1248        assert!(content.contains("file1_line1"));
1249        assert!(content.contains("file2_line1"));
1250        Ok(())
1251    }
1252
1253    #[tokio::test]
1254    async fn batch_reads_multiple_ranges_from_same_file() -> Result<()> {
1255        let mut temp = NamedTempFile::new()?;
1256        for i in 1..=20 {
1257            writeln!(temp, "line{i}")?;
1258        }
1259
1260        let handler = ReadFileHandler;
1261        let args = BatchReadArgs {
1262            reads: vec![BatchReadRequest {
1263                file_path: temp.path().to_string_lossy().to_string(),
1264                range: None,
1265                ranges: Some(vec![
1266                    ReadRange {
1267                        offset: 1,
1268                        limit: 3,
1269                        mode: ReadMode::Slice,
1270                        indentation: None,
1271                    },
1272                    ReadRange {
1273                        offset: 10,
1274                        limit: 3,
1275                        mode: ReadMode::Slice,
1276                        indentation: None,
1277                    },
1278                ]),
1279            }],
1280            max_concurrency: 4,
1281            ui_progress: false,
1282        };
1283
1284        let result = handler.handle_batch(args).await?;
1285        assert_eq!(result["success"], true);
1286
1287        let items = result["items"].as_array().unwrap();
1288        assert_eq!(items.len(), 1);
1289
1290        let ranges = items[0]["ranges"].as_array().unwrap();
1291        assert_eq!(ranges.len(), 2);
1292        assert_eq!(ranges[0]["offset"], 1);
1293        assert_eq!(ranges[1]["offset"], 10);
1294        Ok(())
1295    }
1296
1297    #[tokio::test]
1298    async fn batch_handles_missing_file_gracefully() -> Result<()> {
1299        let handler = ReadFileHandler;
1300        let args = BatchReadArgs {
1301            reads: vec![BatchReadRequest {
1302                file_path: "/nonexistent/path/file.txt".to_string(),
1303                range: None,
1304                ranges: None,
1305            }],
1306            max_concurrency: 1,
1307            ui_progress: false,
1308        };
1309
1310        let result = handler.handle_batch(args).await?;
1311        assert_eq!(result["success"], false);
1312
1313        let items = result["items"].as_array().unwrap();
1314        assert!(items[0]["error"].as_str().is_some());
1315        Ok(())
1316    }
1317
1318    #[test]
1319    fn condense_for_batch_preserves_small_outputs() {
1320        let mut lines: Vec<String> = (1..=20).map(|i| format!("line{i}")).collect();
1321        let (condensed, omitted) = condense_for_batch(&mut lines);
1322        assert!(!condensed);
1323        assert_eq!(omitted, 0);
1324        assert_eq!(lines.len(), 20);
1325    }
1326
1327    #[test]
1328    fn condense_for_batch_condenses_large_outputs() {
1329        let mut lines: Vec<String> = (1..=200).map(|i| format!("line{i}")).collect();
1330        let (condensed, omitted) = condense_for_batch(&mut lines);
1331        assert!(condensed);
1332        assert!(omitted > 0);
1333        assert!(lines.len() < 200);
1334        assert!(lines.iter().any(|l| l.contains("omitted")));
1335    }
1336
1337    #[test]
1338    fn condense_for_batch_does_not_treat_plus_minus_text_as_diff() {
1339        let mut lines: Vec<String> = (1..=150)
1340            .map(|i| {
1341                if i % 2 == 0 {
1342                    format!("+ normal status line {i}")
1343                } else {
1344                    format!("- normal status line {i}")
1345                }
1346            })
1347            .collect();
1348        let (condensed, omitted) = condense_for_batch(&mut lines);
1349        assert!(condensed);
1350        assert!(omitted > 0);
1351    }
1352
1353    #[test]
1354    fn condense_for_batch_preserves_actual_diff_output() {
1355        let mut lines = vec![
1356            "diff --git a/src/main.rs b/src/main.rs".to_string(),
1357            "index 1111111..2222222 100644".to_string(),
1358            "--- a/src/main.rs".to_string(),
1359            "+++ b/src/main.rs".to_string(),
1360            "@@ -1 +1 @@".to_string(),
1361            "-old".to_string(),
1362            "+new".to_string(),
1363        ];
1364        let (condensed, omitted) = condense_for_batch(&mut lines);
1365        assert!(!condensed);
1366        assert_eq!(omitted, 0);
1367    }
1368}