1use std::collections::VecDeque;
2use std::fmt::Write;
3use std::path::{Path, PathBuf};
4use std::sync::Arc;
5use std::sync::atomic::{AtomicU64, AtomicUsize, Ordering};
6
7use anyhow::{Context, Result};
8use async_trait::async_trait;
9use futures::stream::{self, StreamExt};
10use serde::{Deserialize, Deserializer, Serialize};
11use serde_json::{Value, json};
12use tokio::fs::File;
13use tokio::io::{AsyncBufReadExt, BufReader};
14use tokio::sync::Semaphore;
15use vtcode_commons::diff_paths::looks_like_diff_content;
16
17use crate::tools::error_helpers::deserialize_tool_args;
18use crate::tools::traits::Tool;
19use crate::utils::serde_helpers::{deserialize_maybe_quoted, deserialize_opt_maybe_quoted};
20
21pub struct ReadFileHandler;
22
23const MAX_LINE_LENGTH: usize = 500;
24const TAB_WIDTH: usize = 4;
25const COMMENT_PREFIXES: &[&str] = &["#", "//", "--"];
26const MIN_BATCH_LIMIT: usize = 200;
27const DEFAULT_MAX_CONCURRENCY: usize = 8;
28const BATCH_CONDENSED_THRESHOLD: usize = 100;
29
30#[derive(Clone, Debug, PartialEq, Eq)]
31pub(crate) struct ReadFileOutcome {
32 pub content: String,
33 pub lines_read: usize,
34 pub has_more: bool,
35}
36
37#[derive(Deserialize, Serialize, Clone, Debug)]
39pub struct ReadFileArgs {
40 #[serde(
42 alias = "path",
43 alias = "filepath",
44 alias = "target_path",
45 alias = "file"
46 )]
47 pub file_path: String,
48 #[serde(
50 default = "defaults::offset",
51 deserialize_with = "deserialize_maybe_quoted"
52 )]
53 pub offset: usize,
54 #[serde(
56 default = "defaults::limit",
57 deserialize_with = "deserialize_maybe_quoted"
58 )]
59 pub limit: usize,
60 #[serde(default, deserialize_with = "deserialize_read_mode")]
62 pub mode: ReadMode,
63 #[serde(default, deserialize_with = "deserialize_indentation")]
65 pub indentation: Option<IndentationArgs>,
66 #[serde(default, deserialize_with = "deserialize_opt_maybe_quoted")]
68 pub max_tokens: Option<usize>,
69 #[serde(
71 default = "defaults::condense",
72 deserialize_with = "deserialize_maybe_quoted"
73 )]
74 pub condense: bool,
75}
76
77#[derive(Deserialize, Serialize, Clone, Debug)]
79pub struct BatchReadArgs {
80 pub reads: Vec<BatchReadRequest>,
82 #[serde(default = "defaults::max_concurrency")]
84 pub max_concurrency: usize,
85 #[serde(default = "defaults::ui_progress")]
87 pub ui_progress: bool,
88}
89
90#[derive(Deserialize, Serialize, Clone, Debug)]
92pub struct BatchReadRequest {
93 pub file_path: String,
95 #[serde(flatten)]
97 pub range: Option<ReadRange>,
98 #[serde(default)]
100 pub ranges: Option<Vec<ReadRange>>,
101}
102
103#[derive(Deserialize, Serialize, Clone, Debug, Default)]
105pub struct ReadRange {
106 #[serde(
108 default = "defaults::offset",
109 deserialize_with = "deserialize_maybe_quoted"
110 )]
111 pub offset: usize,
112 #[serde(
114 default = "defaults::batch_limit",
115 deserialize_with = "deserialize_maybe_quoted"
116 )]
117 pub limit: usize,
118 #[serde(default, deserialize_with = "deserialize_read_mode")]
120 pub mode: ReadMode,
121 #[serde(default, deserialize_with = "deserialize_indentation")]
123 pub indentation: Option<IndentationArgs>,
124}
125
126#[derive(Serialize, Clone, Debug)]
128pub struct BatchReadResult {
129 pub file_path: String,
131 pub ranges: Vec<RangeResult>,
133 #[serde(skip_serializing_if = "Option::is_none")]
135 pub error: Option<String>,
136}
137
138#[derive(Serialize, Clone, Debug)]
140pub struct RangeResult {
141 pub offset: usize,
143 pub lines_read: usize,
145 pub condensed: bool,
147 #[serde(skip_serializing_if = "Option::is_none")]
149 pub omitted_lines: Option<usize>,
150 pub content: String,
152}
153
154#[derive(Clone)]
156pub struct BatchProgress {
157 pub total_files: Arc<AtomicUsize>,
159 pub completed_files: Arc<AtomicUsize>,
161 pub current_file: Arc<tokio::sync::RwLock<String>>,
163 pub total_bytes: Arc<AtomicU64>,
165 pub bytes_read: Arc<AtomicU64>,
167}
168
169impl BatchProgress {
170 pub fn new(total_files: usize) -> Self {
171 Self {
172 total_files: Arc::new(AtomicUsize::new(total_files)),
173 completed_files: Arc::new(AtomicUsize::new(0)),
174 current_file: Arc::new(tokio::sync::RwLock::new(String::new())),
175 total_bytes: Arc::new(AtomicU64::new(0)),
176 bytes_read: Arc::new(AtomicU64::new(0)),
177 }
178 }
179
180 pub async fn file_started(&self, file_path: &str) {
181 let mut current = self.current_file.write().await;
182 *current = file_path.to_string();
183 }
184
185 pub fn file_completed(&self) {
186 self.completed_files.fetch_add(1, Ordering::Relaxed);
187 }
188
189 pub fn add_bytes(&self, bytes: u64) {
190 self.bytes_read.fetch_add(bytes, Ordering::Relaxed);
191 }
192
193 pub fn progress_percent(&self) -> f64 {
194 let completed = self.completed_files.load(Ordering::Relaxed);
195 let total = self.total_files.load(Ordering::Relaxed);
196 if total == 0 {
197 100.0
198 } else {
199 (completed as f64 / total as f64) * 100.0
200 }
201 }
202
203 pub async fn status_line(&self) -> (String, String) {
204 let completed = self.completed_files.load(Ordering::Relaxed);
205 let total = self.total_files.load(Ordering::Relaxed);
206 let current = self.current_file.read().await;
207 let file_name = PathBuf::from(current.as_str())
208 .file_name()
209 .map(|n| n.to_string_lossy().to_string())
210 .unwrap_or_else(|| current.clone());
211
212 let left = format!("Reading {}/{}: {}", completed + 1, total, file_name);
213 let right = format!("{:.0}%", self.progress_percent());
214 (left, right)
215 }
216}
217
218#[derive(Deserialize, Serialize, Clone, Debug, Default)]
219#[serde(rename_all = "snake_case")]
220pub enum ReadMode {
221 #[default]
222 Slice,
223 Indentation,
224}
225
226#[derive(Deserialize, Serialize, Clone, Debug, Default)]
228pub struct IndentationArgs {
229 #[serde(default, deserialize_with = "deserialize_opt_maybe_quoted")]
231 pub anchor_line: Option<usize>,
232 #[serde(
234 default = "defaults::max_levels",
235 deserialize_with = "deserialize_maybe_quoted"
236 )]
237 pub max_levels: usize,
238 #[serde(default = "defaults::include_siblings")]
240 pub include_siblings: bool,
241 #[serde(default = "defaults::include_header")]
243 pub include_header: bool,
244 #[serde(default, deserialize_with = "deserialize_opt_maybe_quoted")]
246 pub max_lines: Option<usize>,
247}
248
249fn deserialize_read_mode<'de, D>(deserializer: D) -> Result<ReadMode, D::Error>
250where
251 D: Deserializer<'de>,
252{
253 let value = Value::deserialize(deserializer)?;
254 match value {
255 Value::Null => Ok(ReadMode::Slice),
256 Value::String(raw) => {
257 let trimmed = raw.trim();
258 if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("slice") {
259 Ok(ReadMode::Slice)
260 } else if trimmed.eq_ignore_ascii_case("indentation") {
261 Ok(ReadMode::Indentation)
262 } else {
263 Err(serde::de::Error::custom(format!(
264 "invalid read mode: {trimmed}"
265 )))
266 }
267 }
268 other => Err(serde::de::Error::custom(format!(
269 "invalid read mode type: {other}"
270 ))),
271 }
272}
273
274fn deserialize_indentation<'de, D>(deserializer: D) -> Result<Option<IndentationArgs>, D::Error>
275where
276 D: Deserializer<'de>,
277{
278 let value = Value::deserialize(deserializer)?;
279 match value {
280 Value::Null => Ok(None),
281 Value::Bool(true) => Ok(Some(IndentationArgs::default())),
282 Value::Bool(false) => Ok(None),
283 Value::String(raw) => {
284 let trimmed = raw.trim();
285 if trimmed.is_empty() || trimmed.eq_ignore_ascii_case("false") {
286 Ok(None)
287 } else if trimmed.eq_ignore_ascii_case("true") {
288 Ok(Some(IndentationArgs::default()))
289 } else {
290 Err(serde::de::Error::custom(format!(
291 "invalid indentation value: {trimmed}"
292 )))
293 }
294 }
295 Value::Object(_) => {
296 let args = IndentationArgs::deserialize(value).map_err(serde::de::Error::custom)?;
297 Ok(Some(args))
298 }
299 other => Err(serde::de::Error::custom(format!(
300 "invalid indentation type: {other}"
301 ))),
302 }
303}
304
305#[derive(Clone, Debug)]
306struct LineRecord {
307 number: usize,
308 raw: String,
309 display: String,
310 indent: usize,
311}
312
313impl LineRecord {
314 fn trimmed(&self) -> &str {
315 self.raw.trim_start()
316 }
317
318 fn is_blank(&self) -> bool {
319 self.trimmed().is_empty()
320 }
321
322 fn is_comment(&self) -> bool {
323 COMMENT_PREFIXES
324 .iter()
325 .any(|prefix| self.raw.trim().starts_with(prefix))
326 }
327}
328
329impl ReadFileHandler {
330 pub async fn handle_batch(&self, args: BatchReadArgs) -> Result<Value> {
332 if args.reads.is_empty() {
333 return Ok(json!({
334 "success": false,
335 "error": "No read requests provided"
336 }));
337 }
338
339 let progress = BatchProgress::new(args.reads.len());
340 let semaphore = Arc::new(Semaphore::new(args.max_concurrency.min(args.reads.len())));
341
342 let results: Vec<BatchReadResult> = stream::iter(args.reads)
343 .map(|req| {
344 let sem = semaphore.clone();
345 let prog = progress.clone();
346 async move {
347 let _permit = sem.acquire().await.ok();
348 prog.file_started(&req.file_path).await;
349 let result = self.read_single_batch_request(&req).await;
350 prog.file_completed();
351 result
352 }
353 })
354 .buffer_unordered(args.max_concurrency)
355 .collect()
356 .await;
357
358 let mut content_parts = Vec::new();
360 let mut buf = String::new();
361 for result in &results {
362 if let Some(ref error) = result.error {
363 buf.clear();
364 let _ = write!(buf, "== {} (ERROR)\n{}", result.file_path, error);
365 content_parts.push(std::mem::take(&mut buf));
366 } else {
367 for range in &result.ranges {
368 let end_line = range.offset + range.lines_read.saturating_sub(1);
369 buf.clear();
370 let _ = write!(
371 buf,
372 "== {} (L{}..L{})\n{}",
373 result.file_path, range.offset, end_line, range.content
374 );
375 content_parts.push(std::mem::take(&mut buf));
376 }
377 }
378 }
379
380 let all_success = results.iter().all(|r| r.error.is_none());
381 Ok(json!({
382 "success": all_success,
383 "content": content_parts.join("\n\n"),
384 "items": results,
385 "files_read": results.len(),
386 "files_succeeded": results.iter().filter(|r| r.error.is_none()).count(),
387 "no_spool": true
388 }))
389 }
390
391 async fn read_single_batch_request(&self, req: &BatchReadRequest) -> BatchReadResult {
393 let path = PathBuf::from(&req.file_path);
394
395 if !path.is_absolute() {
397 return BatchReadResult {
398 file_path: req.file_path.clone(),
399 ranges: vec![],
400 error: Some("file_path must be an absolute path".to_string()),
401 };
402 }
403
404 let ranges_to_read: Vec<ReadRange> = if let Some(ref ranges) = req.ranges {
406 ranges.clone()
407 } else if let Some(ref range) = req.range {
408 vec![range.clone()]
409 } else {
410 vec![ReadRange::default()]
411 };
412
413 let mut range_results = Vec::new();
414 for range in ranges_to_read {
415 match self.read_range(&path, &range).await {
416 Ok(result) => range_results.push(result),
417 Err(e) => {
418 return BatchReadResult {
419 file_path: req.file_path.clone(),
420 ranges: range_results,
421 error: Some(e.to_string()),
422 };
423 }
424 }
425 }
426
427 BatchReadResult {
428 file_path: req.file_path.clone(),
429 ranges: range_results,
430 error: None,
431 }
432 }
433
434 async fn read_range(&self, path: &Path, range: &ReadRange) -> Result<RangeResult> {
436 let offset = range.offset.max(1);
437 let limit = range.limit.max(1);
438
439 let mut collected = match range.mode {
440 ReadMode::Slice => slice::read(path, offset, limit).await?.lines,
441 ReadMode::Indentation => {
442 let indentation = range.indentation.clone().unwrap_or_default();
443 indentation::read_block(path, offset, limit, indentation).await?
444 }
445 };
446
447 let original_len = collected.len();
448 let (condensed, omitted) = condense_for_batch(&mut collected);
449
450 Ok(RangeResult {
451 offset,
452 lines_read: original_len,
453 condensed,
454 omitted_lines: (omitted > 0).then_some(omitted),
455 content: collected.join("\n"),
456 })
457 }
458
459 pub(crate) async fn handle_detailed(&self, args: ReadFileArgs) -> Result<ReadFileOutcome> {
460 let ReadFileArgs {
461 file_path,
462 offset,
463 limit,
464 mode,
465 indentation,
466 max_tokens,
467 condense,
468 } = args;
469
470 anyhow::ensure!(offset > 0, "offset must be a 1-indexed line number");
471 anyhow::ensure!(limit > 0, "limit must be greater than zero");
472
473 let path = PathBuf::from(&file_path);
474 anyhow::ensure!(path.is_absolute(), "file_path must be an absolute path");
475
476 let effective_limit =
477 if matches!(mode, ReadMode::Slice) && max_tokens.is_none() && limit < MIN_BATCH_LIMIT {
478 MIN_BATCH_LIMIT
479 } else {
480 limit
481 };
482
483 let (mut collected, has_more) = match mode {
484 ReadMode::Slice => {
485 let result = slice::read(&path, offset, effective_limit).await?;
486 (result.lines, result.has_more)
487 }
488 ReadMode::Indentation => {
489 let indentation = indentation.unwrap_or_default();
490 (
491 indentation::read_block(&path, offset, limit, indentation).await?,
492 false,
493 )
494 }
495 };
496 let lines_read = collected.len();
497
498 if condense {
499 condense_collected_lines(&mut collected);
501 }
502
503 Ok(ReadFileOutcome {
504 content: collected.join("\n"),
505 lines_read,
506 has_more,
507 })
508 }
509
510 pub async fn handle(&self, args: ReadFileArgs) -> Result<String> {
512 Ok(self.handle_detailed(args).await?.content)
513 }
514}
515
516#[async_trait]
517impl Tool for ReadFileHandler {
518 async fn execute(&self, args: Value) -> Result<Value> {
519 if args.get("reads").is_some() {
521 let batch_args: BatchReadArgs = deserialize_tool_args(&args, "read_file")?;
522 return self.handle_batch(batch_args).await;
523 }
524
525 let args: ReadFileArgs = deserialize_tool_args(&args, "read_file")?;
527
528 let file_path = args.file_path.clone();
529 let content = self.handle_detailed(args).await?.content;
530
531 Ok(json!({
532 "content": content,
533 "file_path": file_path,
534 "path": file_path,
535 "success": true,
536 "no_spool": true
537 }))
538 }
539
540 fn name(&self) -> &str {
541 "read_file"
542 }
543
544 fn description(&self) -> &str {
545 "Read file contents with optional line range, indentation-aware block selection, or batch multiple files"
546 }
547
548 fn parameter_schema(&self) -> Option<Value> {
549 Some(json!({
550 "type": "object",
551 "properties": {
552 "file_path": {
553 "type": "string",
554 "description": "Absolute path to the file to read (for single-file mode)"
555 },
556 "offset": {
557 "type": "integer",
558 "description": "1-indexed line number to start from (default: 1)",
559 "default": 1,
560 "minimum": 1
561 },
562 "limit": {
563 "type": "integer",
564 "description": "Maximum lines to return (default: 2000)",
565 "default": 2000,
566 "minimum": 1
567 },
568 "mode": {
569 "type": "string",
570 "enum": ["slice", "indentation"],
571 "description": "Read mode: slice for simple range, indentation for block",
572 "default": "slice"
573 },
574 "indentation": {
575 "description": "Indentation settings when mode=indentation",
576 "anyOf": [
577 {"type": "boolean"},
578 {
579 "type": "object",
580 "properties": {
581 "anchor_line": {
582 "type": "integer",
583 "description": "Line number to anchor on (defaults to offset)"
584 },
585 "max_levels": {
586 "type": "integer",
587 "description": "Max indentation depth (0=unlimited)",
588 "default": 0
589 },
590 "include_siblings": {
591 "type": "boolean",
592 "description": "Include sibling blocks",
593 "default": false
594 },
595 "include_header": {
596 "type": "boolean",
597 "description": "Include header lines above anchor",
598 "default": true
599 },
600 "max_lines": {
601 "type": "integer",
602 "description": "Hard cap on returned lines"
603 }
604 }
605 }
606 ]
607 },
608 "max_tokens": {
609 "type": "integer",
610 "description": "Optional token limit for response (approximate)"
611 },
612 "condense": {
613 "type": "boolean",
614 "description": "Condense long outputs to head/tail (default: true)",
615 "default": true
616 },
617 "reads": {
618 "type": "array",
619 "description": "Batch mode: array of file read requests to execute in parallel",
620 "items": {
621 "type": "object",
622 "properties": {
623 "file_path": {
624 "type": "string",
625 "description": "Absolute path to the file"
626 },
627 "offset": {
628 "type": "integer",
629 "description": "1-indexed start line (default: 1)"
630 },
631 "limit": {
632 "type": "integer",
633 "description": "Max lines to return (default: 500 for batch)"
634 },
635 "ranges": {
636 "type": "array",
637 "description": "Multiple ranges from the same file",
638 "items": {
639 "type": "object",
640 "properties": {
641 "offset": { "type": "integer" },
642 "limit": { "type": "integer" },
643 "mode": { "type": "string", "enum": ["slice", "indentation"] }
644 }
645 }
646 }
647 },
648 "required": ["file_path"]
649 }
650 },
651 "max_concurrency": {
652 "type": "integer",
653 "description": "Batch mode: max concurrent file reads (default: 8)",
654 "default": 8
655 }
656 }
657 }))
658 }
659}
660
661mod slice {
662 use super::*;
663
664 #[derive(Clone, Debug, PartialEq, Eq)]
665 pub(super) struct SliceReadResult {
666 pub lines: Vec<String>,
667 pub has_more: bool,
668 }
669
670 pub async fn read(path: &Path, offset: usize, limit: usize) -> Result<SliceReadResult> {
671 let file = File::open(path)
672 .await
673 .context(format!("failed to open file: {}", path.display()))?;
674
675 let mut reader = BufReader::new(file);
676 let mut collected = Vec::new();
677 let mut seen = 0usize;
678 let mut buffer = Vec::new();
679 let mut reached_eof = false;
680
681 loop {
682 buffer.clear();
683 let bytes_read = reader
684 .read_until(b'\n', &mut buffer)
685 .await
686 .context("failed to read file")?;
687
688 if bytes_read == 0 {
689 reached_eof = true;
690 break;
691 }
692
693 if buffer.last() == Some(&b'\n') {
695 buffer.pop();
696 if buffer.last() == Some(&b'\r') {
697 buffer.pop();
698 }
699 }
700
701 seen += 1;
702
703 if seen < offset {
704 continue;
705 }
706
707 if collected.len() >= limit {
708 break;
709 }
710
711 let formatted = format_line(&buffer);
712 collected.push(formatted);
713 }
714
715 if seen < offset {
716 anyhow::bail!("offset exceeds file length");
717 }
718
719 Ok(SliceReadResult {
720 lines: collected,
721 has_more: !reached_eof,
722 })
723 }
724}
725
726mod indentation {
727 use super::*;
728
729 pub async fn read_block(
730 path: &Path,
731 offset: usize,
732 limit: usize,
733 options: IndentationArgs,
734 ) -> Result<Vec<String>> {
735 let anchor_line = options.anchor_line.unwrap_or(offset);
736 anyhow::ensure!(
737 anchor_line > 0,
738 "anchor_line must be a 1-indexed line number"
739 );
740
741 let guard_limit = options.max_lines.unwrap_or(limit);
742 anyhow::ensure!(guard_limit > 0, "max_lines must be greater than zero");
743
744 let collected = collect_file_lines(path).await?;
745 anyhow::ensure!(
746 !collected.is_empty() && anchor_line <= collected.len(),
747 "anchor_line exceeds file length"
748 );
749
750 let anchor_index = anchor_line - 1;
751 let effective_indents = compute_effective_indents(&collected);
752 let anchor_indent = effective_indents[anchor_index];
753
754 let min_indent = if options.max_levels == 0 {
756 0
757 } else {
758 anchor_indent.saturating_sub(options.max_levels * TAB_WIDTH)
759 };
760
761 let final_limit = limit.min(guard_limit).min(collected.len());
763
764 if final_limit == 1 {
765 return Ok(vec![format!(
766 "{}: {}",
767 collected[anchor_index].number, collected[anchor_index].display
768 )]);
769 }
770
771 let mut i: isize = anchor_index as isize - 1; let mut j: usize = anchor_index + 1; let mut i_counter_min_indent = 0;
776 let mut j_counter_min_indent = 0;
777
778 let mut out = VecDeque::with_capacity(limit);
779 out.push_back(&collected[anchor_index]);
780
781 while out.len() < final_limit {
782 let mut progressed = 0;
783
784 if i >= 0 {
786 let iu = i as usize;
787 if effective_indents[iu] >= min_indent {
788 out.push_front(&collected[iu]);
789 progressed += 1;
790 i -= 1;
791
792 if effective_indents[iu] == min_indent && !options.include_siblings {
794 let allow_header_comment =
795 options.include_header && collected[iu].is_comment();
796 let can_take_line = allow_header_comment || i_counter_min_indent == 0;
797
798 if can_take_line {
799 i_counter_min_indent += 1;
800 } else {
801 out.pop_front();
802 progressed -= 1;
803 i = -1;
804 }
805 }
806
807 if out.len() >= final_limit {
808 break;
809 }
810 } else {
811 i = -1;
812 }
813 }
814
815 if j < collected.len() {
817 let ju = j;
818 if effective_indents[ju] >= min_indent {
819 out.push_back(&collected[ju]);
820 progressed += 1;
821 j += 1;
822
823 if effective_indents[ju] == min_indent && !options.include_siblings {
825 if j_counter_min_indent > 0 {
826 out.pop_back();
827 progressed -= 1;
828 j = collected.len();
829 }
830 j_counter_min_indent += 1;
831 }
832 } else {
833 j = collected.len();
834 }
835 }
836
837 if progressed == 0 {
838 break;
839 }
840 }
841
842 trim_empty_lines(&mut out);
843
844 Ok(out
845 .into_iter()
846 .map(|record| format!("{}: {}", record.number, record.display))
847 .collect())
848 }
849
850 async fn collect_file_lines(path: &Path) -> Result<Vec<LineRecord>> {
851 let file = File::open(path)
852 .await
853 .context(format!("failed to open file: {}", path.display()))?;
854
855 let mut reader = BufReader::new(file);
856 let mut buffer = Vec::new();
857 let mut lines = Vec::new();
858 let mut number = 0usize;
859
860 loop {
861 buffer.clear();
862 let bytes_read = reader
863 .read_until(b'\n', &mut buffer)
864 .await
865 .context("failed to read file")?;
866
867 if bytes_read == 0 {
868 break;
869 }
870
871 if buffer.last() == Some(&b'\n') {
872 buffer.pop();
873 if buffer.last() == Some(&b'\r') {
874 buffer.pop();
875 }
876 }
877
878 number += 1;
879 let raw = String::from_utf8_lossy(&buffer).into_owned();
880 let indent = measure_indent(&raw);
881 let display = format_line(&buffer);
882 lines.push(LineRecord {
883 number,
884 raw,
885 display,
886 indent,
887 });
888 }
889
890 Ok(lines)
891 }
892
893 fn compute_effective_indents(records: &[LineRecord]) -> Vec<usize> {
894 let mut effective = Vec::with_capacity(records.len());
895 let mut previous_indent = 0usize;
896 for record in records {
897 if record.is_blank() {
898 effective.push(previous_indent);
899 } else {
900 previous_indent = record.indent;
901 effective.push(previous_indent);
902 }
903 }
904 effective
905 }
906
907 fn measure_indent(line: &str) -> usize {
908 line.chars()
909 .take_while(|c| matches!(c, ' ' | '\t'))
910 .map(|c| if c == '\t' { TAB_WIDTH } else { 1 })
911 .sum()
912 }
913}
914
915fn format_line(bytes: &[u8]) -> String {
916 let decoded = String::from_utf8_lossy(bytes);
917 if decoded.len() > MAX_LINE_LENGTH {
918 take_bytes_at_char_boundary(&decoded, MAX_LINE_LENGTH).to_string()
919 } else {
920 decoded.into_owned()
921 }
922}
923
924fn take_bytes_at_char_boundary(s: &str, limit: usize) -> &str {
925 if limit >= s.len() {
926 return s;
927 }
928 let mut i = limit;
929 while i > 0 && !s.is_char_boundary(i) {
930 i -= 1;
931 }
932 &s[..i]
933}
934
935fn trim_empty_lines(out: &mut VecDeque<&LineRecord>) {
936 while matches!(out.front(), Some(line) if line.raw.trim().is_empty()) {
937 out.pop_front();
938 }
939 while matches!(out.back(), Some(line) if line.raw.trim().is_empty()) {
940 out.pop_back();
941 }
942}
943
944fn condense_collected_lines(lines: &mut Vec<String>) {
945 if looks_like_diff_lines(lines) {
946 return;
947 }
948 const CONDENSED_THRESHOLD: usize = 200;
949 const HEAD_LINES: usize = 80;
950 const TAIL_LINES: usize = 40;
951
952 if lines.len() <= CONDENSED_THRESHOLD {
954 return;
955 }
956
957 let head_count = HEAD_LINES.min(lines.len());
959 let tail_count = TAIL_LINES.min(lines.len() - head_count);
960 let omitted_count = lines.len() - head_count - tail_count;
961 let total_lines = lines.len();
962
963 let mut condensed: Vec<String> = lines[..head_count].to_vec();
965
966 condensed.push(format!(
968 "… [+{} lines omitted ({} total). To read omitted ranges: unified_file offset={} limit={}]",
969 omitted_count,
970 total_lines,
971 head_count + 1,
972 omitted_count
973 ));
974
975 let tail_start = lines.len() - tail_count;
977 condensed.extend_from_slice(&lines[tail_start..]);
978
979 *lines = condensed;
981}
982
983fn condense_for_batch(lines: &mut Vec<String>) -> (bool, usize) {
986 if looks_like_diff_lines(lines) {
987 return (false, 0);
988 }
989 const HEAD_LINES: usize = 15;
990 const TAIL_LINES: usize = 5;
991
992 if lines.len() <= BATCH_CONDENSED_THRESHOLD {
993 return (false, 0);
994 }
995
996 let head_count = HEAD_LINES.min(lines.len());
997 let tail_count = TAIL_LINES.min(lines.len() - head_count);
998 let omitted_count = lines.len() - head_count - tail_count;
999
1000 let mut condensed: Vec<String> = lines[..head_count].to_vec();
1001 condensed.push(format!(
1002 "… [+{} lines omitted ({} total). To read omitted ranges: unified_file offset={} limit={}]",
1003 omitted_count,
1004 lines.len(),
1005 head_count + 1,
1006 omitted_count
1007 ));
1008
1009 let tail_start = lines.len() - tail_count;
1010 condensed.extend_from_slice(&lines[tail_start..]);
1011
1012 *lines = condensed;
1013 (true, omitted_count)
1014}
1015
1016fn looks_like_diff_lines(lines: &[String]) -> bool {
1017 let joined = lines.join("\n");
1018 looks_like_diff_content(&joined)
1019}
1020
1021mod defaults {
1022 pub fn offset() -> usize {
1023 1
1024 }
1025
1026 pub fn limit() -> usize {
1027 2000
1028 }
1029
1030 pub fn batch_limit() -> usize {
1031 500
1032 }
1033
1034 pub fn max_concurrency() -> usize {
1035 super::DEFAULT_MAX_CONCURRENCY
1036 }
1037
1038 pub fn ui_progress() -> bool {
1039 true
1040 }
1041
1042 pub fn max_levels() -> usize {
1043 0
1044 }
1045
1046 pub fn include_siblings() -> bool {
1047 false
1048 }
1049
1050 pub fn include_header() -> bool {
1051 true
1052 }
1053
1054 pub fn condense() -> bool {
1055 true
1056 }
1057}
1058
1059#[cfg(test)]
1060mod tests {
1061 #[expect(unused_imports)]
1062 use super::indentation::*;
1063 use super::slice::*;
1064 use super::*;
1065 use std::io::Write;
1066 use tempfile::NamedTempFile;
1067
1068 #[tokio::test]
1069 async fn reads_requested_range() -> Result<()> {
1070 let mut temp = NamedTempFile::new()?;
1071 writeln!(temp, "alpha")?;
1072 writeln!(temp, "beta")?;
1073 writeln!(temp, "gamma")?;
1074
1075 let lines = read(temp.path(), 2, 2).await?.lines;
1076 assert_eq!(lines, vec!["beta".to_string(), "gamma".to_string()]);
1077 Ok(())
1078 }
1079
1080 #[test]
1081 fn read_file_args_accepts_boolean_indentation() {
1082 let args = json!({
1083 "file_path": "/tmp/example.txt",
1084 "mode": "slice",
1085 "indentation": false
1086 });
1087
1088 let parsed: ReadFileArgs = serde_json::from_value(args).unwrap();
1089 assert!(matches!(parsed.mode, ReadMode::Slice));
1090 assert!(parsed.indentation.is_none());
1091 }
1092
1093 #[test]
1094 fn read_file_args_accepts_true_indentation() {
1095 let args = json!({
1096 "file_path": "/tmp/example.txt",
1097 "mode": "indentation",
1098 "indentation": true
1099 });
1100
1101 let parsed: ReadFileArgs = serde_json::from_value(args).unwrap();
1102 assert!(matches!(parsed.mode, ReadMode::Indentation));
1103 assert!(parsed.indentation.is_some());
1104 }
1105
1106 #[test]
1107 fn read_file_args_accepts_empty_mode() {
1108 let args = json!({
1109 "file_path": "/tmp/example.txt",
1110 "mode": ""
1111 });
1112
1113 let parsed: ReadFileArgs = serde_json::from_value(args).unwrap();
1114 assert!(matches!(parsed.mode, ReadMode::Slice));
1115 }
1116
1117 #[tokio::test]
1118 async fn read_file_handler_skips_condense_when_disabled() -> Result<()> {
1119 let mut temp = NamedTempFile::new()?;
1120 for idx in 0..60 {
1121 writeln!(temp, "line-{idx}")?;
1122 }
1123
1124 let args = ReadFileArgs {
1125 file_path: temp.path().to_string_lossy().to_string(),
1126 offset: 1,
1127 limit: 2000,
1128 mode: ReadMode::Slice,
1129 indentation: None,
1130 max_tokens: None,
1131 condense: false,
1132 };
1133 let handler = ReadFileHandler;
1134 let content = handler.handle(args).await?;
1135
1136 assert!(!content.contains("lines omitted"));
1137 assert_eq!(content.lines().count(), 60);
1138 Ok(())
1139 }
1140
1141 #[tokio::test]
1142 async fn errors_when_offset_exceeds_length() {
1143 let mut temp = NamedTempFile::new().unwrap();
1144 writeln!(temp, "only").unwrap();
1145
1146 let err = read(temp.path(), 3, 1).await;
1147 err.unwrap_err();
1148 }
1149
1150 #[tokio::test]
1151 async fn reads_non_utf8_lines() -> Result<()> {
1152 let mut temp = NamedTempFile::new()?;
1153 temp.as_file_mut().write_all(b"\xff\xfe\nplain\n")?;
1154
1155 let lines = read(temp.path(), 1, 2).await?.lines;
1156 let expected_first = format!("{}{}", '\u{FFFD}', '\u{FFFD}');
1157 assert_eq!(lines, vec![expected_first, "plain".to_string()]);
1158 Ok(())
1159 }
1160
1161 #[tokio::test]
1162 async fn trims_crlf_endings() -> Result<()> {
1163 let mut temp = NamedTempFile::new()?;
1164 write!(temp, "one\r\ntwo\r\n")?;
1165
1166 let lines = read(temp.path(), 1, 2).await?.lines;
1167 assert_eq!(lines, vec!["one".to_string(), "two".to_string()]);
1168 Ok(())
1169 }
1170
1171 #[tokio::test]
1172 async fn respects_limit_even_with_more_lines() -> Result<()> {
1173 let mut temp = NamedTempFile::new()?;
1174 writeln!(temp, "first")?;
1175 writeln!(temp, "second")?;
1176 writeln!(temp, "third")?;
1177
1178 let result = read(temp.path(), 1, 2).await?;
1179 assert_eq!(
1180 result.lines,
1181 vec!["first".to_string(), "second".to_string()]
1182 );
1183 assert!(result.has_more);
1184 Ok(())
1185 }
1186
1187 #[tokio::test]
1188 async fn reads_exact_limit_without_continuation_at_eof() -> Result<()> {
1189 let mut temp = NamedTempFile::new()?;
1190 writeln!(temp, "first")?;
1191 writeln!(temp, "second")?;
1192
1193 let result = read(temp.path(), 1, 2).await?;
1194 assert_eq!(
1195 result.lines,
1196 vec!["first".to_string(), "second".to_string()]
1197 );
1198 assert!(!result.has_more);
1199 Ok(())
1200 }
1201
1202 #[tokio::test]
1203 async fn truncates_lines_longer_than_max_length() -> Result<()> {
1204 let mut temp = NamedTempFile::new()?;
1205 let long_line = "x".repeat(MAX_LINE_LENGTH + 50);
1206 writeln!(temp, "{long_line}")?;
1207
1208 let lines = read(temp.path(), 1, 1).await?.lines;
1209 let expected = "x".repeat(MAX_LINE_LENGTH);
1210 assert_eq!(lines, vec![expected]);
1211 Ok(())
1212 }
1213
1214 #[tokio::test]
1215 async fn batch_reads_multiple_files() -> Result<()> {
1216 let mut temp1 = NamedTempFile::new()?;
1217 writeln!(temp1, "file1_line1")?;
1218 writeln!(temp1, "file1_line2")?;
1219
1220 let mut temp2 = NamedTempFile::new()?;
1221 writeln!(temp2, "file2_line1")?;
1222 writeln!(temp2, "file2_line2")?;
1223
1224 let handler = ReadFileHandler;
1225 let args = BatchReadArgs {
1226 reads: vec![
1227 BatchReadRequest {
1228 file_path: temp1.path().to_string_lossy().to_string(),
1229 range: None,
1230 ranges: None,
1231 },
1232 BatchReadRequest {
1233 file_path: temp2.path().to_string_lossy().to_string(),
1234 range: None,
1235 ranges: None,
1236 },
1237 ],
1238 max_concurrency: 2,
1239 ui_progress: false,
1240 };
1241
1242 let result = handler.handle_batch(args).await?;
1243 assert_eq!(result["success"], true);
1244 assert_eq!(result["files_read"], 2);
1245 assert_eq!(result["files_succeeded"], 2);
1246
1247 let content = result["content"].as_str().unwrap();
1248 assert!(content.contains("file1_line1"));
1249 assert!(content.contains("file2_line1"));
1250 Ok(())
1251 }
1252
1253 #[tokio::test]
1254 async fn batch_reads_multiple_ranges_from_same_file() -> Result<()> {
1255 let mut temp = NamedTempFile::new()?;
1256 for i in 1..=20 {
1257 writeln!(temp, "line{i}")?;
1258 }
1259
1260 let handler = ReadFileHandler;
1261 let args = BatchReadArgs {
1262 reads: vec![BatchReadRequest {
1263 file_path: temp.path().to_string_lossy().to_string(),
1264 range: None,
1265 ranges: Some(vec![
1266 ReadRange {
1267 offset: 1,
1268 limit: 3,
1269 mode: ReadMode::Slice,
1270 indentation: None,
1271 },
1272 ReadRange {
1273 offset: 10,
1274 limit: 3,
1275 mode: ReadMode::Slice,
1276 indentation: None,
1277 },
1278 ]),
1279 }],
1280 max_concurrency: 4,
1281 ui_progress: false,
1282 };
1283
1284 let result = handler.handle_batch(args).await?;
1285 assert_eq!(result["success"], true);
1286
1287 let items = result["items"].as_array().unwrap();
1288 assert_eq!(items.len(), 1);
1289
1290 let ranges = items[0]["ranges"].as_array().unwrap();
1291 assert_eq!(ranges.len(), 2);
1292 assert_eq!(ranges[0]["offset"], 1);
1293 assert_eq!(ranges[1]["offset"], 10);
1294 Ok(())
1295 }
1296
1297 #[tokio::test]
1298 async fn batch_handles_missing_file_gracefully() -> Result<()> {
1299 let handler = ReadFileHandler;
1300 let args = BatchReadArgs {
1301 reads: vec![BatchReadRequest {
1302 file_path: "/nonexistent/path/file.txt".to_string(),
1303 range: None,
1304 ranges: None,
1305 }],
1306 max_concurrency: 1,
1307 ui_progress: false,
1308 };
1309
1310 let result = handler.handle_batch(args).await?;
1311 assert_eq!(result["success"], false);
1312
1313 let items = result["items"].as_array().unwrap();
1314 assert!(items[0]["error"].as_str().is_some());
1315 Ok(())
1316 }
1317
1318 #[test]
1319 fn condense_for_batch_preserves_small_outputs() {
1320 let mut lines: Vec<String> = (1..=20).map(|i| format!("line{i}")).collect();
1321 let (condensed, omitted) = condense_for_batch(&mut lines);
1322 assert!(!condensed);
1323 assert_eq!(omitted, 0);
1324 assert_eq!(lines.len(), 20);
1325 }
1326
1327 #[test]
1328 fn condense_for_batch_condenses_large_outputs() {
1329 let mut lines: Vec<String> = (1..=200).map(|i| format!("line{i}")).collect();
1330 let (condensed, omitted) = condense_for_batch(&mut lines);
1331 assert!(condensed);
1332 assert!(omitted > 0);
1333 assert!(lines.len() < 200);
1334 assert!(lines.iter().any(|l| l.contains("omitted")));
1335 }
1336
1337 #[test]
1338 fn condense_for_batch_does_not_treat_plus_minus_text_as_diff() {
1339 let mut lines: Vec<String> = (1..=150)
1340 .map(|i| {
1341 if i % 2 == 0 {
1342 format!("+ normal status line {i}")
1343 } else {
1344 format!("- normal status line {i}")
1345 }
1346 })
1347 .collect();
1348 let (condensed, omitted) = condense_for_batch(&mut lines);
1349 assert!(condensed);
1350 assert!(omitted > 0);
1351 }
1352
1353 #[test]
1354 fn condense_for_batch_preserves_actual_diff_output() {
1355 let mut lines = vec![
1356 "diff --git a/src/main.rs b/src/main.rs".to_string(),
1357 "index 1111111..2222222 100644".to_string(),
1358 "--- a/src/main.rs".to_string(),
1359 "+++ b/src/main.rs".to_string(),
1360 "@@ -1 +1 @@".to_string(),
1361 "-old".to_string(),
1362 "+new".to_string(),
1363 ];
1364 let (condensed, omitted) = condense_for_batch(&mut lines);
1365 assert!(!condensed);
1366 assert_eq!(omitted, 0);
1367 }
1368}