llm_coding_tools_core/operations/
read.rs

1//! File reading operation.
2
3use crate::error::{ToolError, ToolResult};
4use crate::fs;
5use crate::output::ToolOutput;
6use crate::path::PathResolver;
7use crate::util::{truncate_line, ESTIMATED_CHARS_PER_LINE};
8use memchr::memchr;
9use std::borrow::Cow;
10use std::fmt::Write;
11
12const MAX_LINE_LENGTH: usize = 2000;
13
14/// Strips trailing CR from a line (for CRLF handling).
15#[inline]
16fn strip_cr(line: &[u8]) -> &[u8] {
17    line.strip_suffix(b"\r").unwrap_or(line)
18}
19
20/// Processes a single line, appending it to output with optional line numbers.
21#[inline]
22fn process_line<const LINE_NUMBERS: bool>(
23    line_bytes: &[u8],
24    line_number: usize,
25    output: &mut String,
26    lines_output: &mut usize,
27) {
28    let line_bytes = strip_cr(line_bytes);
29    let content: Cow<'_, str> = String::from_utf8_lossy(line_bytes);
30    let (truncated_content, _) = truncate_line(&content, MAX_LINE_LENGTH);
31
32    if *lines_output > 0 {
33        output.push('\n');
34    }
35
36    if LINE_NUMBERS {
37        let _ = write!(output, "L{}: {}", line_number, truncated_content);
38    } else {
39        output.push_str(truncated_content);
40    }
41
42    *lines_output += 1;
43}
44
45/// Reads a file and returns formatted content, optionally with line numbers.
46///
47/// When `LINE_NUMBERS` is `true`, each line is prefixed with `L{number}: `.
48/// When `false`, raw content is returned without prefixes.
49#[maybe_async::maybe_async]
50pub async fn read_file<R: PathResolver, const LINE_NUMBERS: bool>(
51    resolver: &R,
52    file_path: &str,
53    offset: usize,
54    limit: usize,
55) -> ToolResult<ToolOutput> {
56    // Conditional trait import for consume() method
57    #[cfg(feature = "blocking")]
58    use std::io::BufRead as _;
59    #[cfg(not(feature = "blocking"))]
60    use tokio::io::AsyncBufReadExt as _;
61
62    if offset == 0 {
63        return Err(ToolError::OutOfBounds(
64            "offset must be >= 1 (1-indexed)".into(),
65        ));
66    }
67    if limit == 0 {
68        return Err(ToolError::OutOfBounds("limit must be >= 1".into()));
69    }
70
71    let path = resolver.resolve(file_path)?;
72    let buf_capacity = (limit * ESTIMATED_CHARS_PER_LINE).next_power_of_two();
73    let mut reader = fs::open_buffered(&path, buf_capacity).await?;
74
75    let estimated_capacity = limit * ESTIMATED_CHARS_PER_LINE;
76    let mut output = String::with_capacity(estimated_capacity);
77    // Holds a partial line that spans multiple buffers.
78    let mut overflow: Vec<u8> = Vec::new();
79    let mut line_number = 0usize;
80    let mut lines_output = 0usize;
81
82    // Stream buffered chunks, splitting into lines as we go.
83    loop {
84        let buf = reader.fill_buf().await?;
85        // Flush any trailing partial line at EOF.
86        if buf.is_empty() {
87            if !overflow.is_empty() {
88                line_number += 1;
89                if line_number >= offset && lines_output < limit {
90                    process_line::<LINE_NUMBERS>(
91                        &overflow,
92                        line_number,
93                        &mut output,
94                        &mut lines_output,
95                    );
96                }
97            }
98            break;
99        }
100
101        let mut pos = 0;
102        while pos < buf.len() {
103            // Fast newline search to delimit lines.
104            if let Some(newline_offset) = memchr(b'\n', &buf[pos..]) {
105                let newline_pos = pos + newline_offset;
106                line_number += 1;
107
108                // Only emit lines within the requested window.
109                if line_number >= offset && lines_output < limit {
110                    if overflow.is_empty() {
111                        // Fast path: line is fully in this buffer.
112                        process_line::<LINE_NUMBERS>(
113                            &buf[pos..newline_pos],
114                            line_number,
115                            &mut output,
116                            &mut lines_output,
117                        );
118                    } else {
119                        // Slow path: prepend buffered fragment.
120                        overflow.extend_from_slice(&buf[pos..newline_pos]);
121                        process_line::<LINE_NUMBERS>(
122                            &overflow,
123                            line_number,
124                            &mut output,
125                            &mut lines_output,
126                        );
127                        overflow.clear();
128                    }
129                } else if !overflow.is_empty() {
130                    overflow.clear();
131                }
132
133                pos = newline_pos + 1;
134
135                if lines_output >= limit {
136                    break;
137                }
138            } else {
139                overflow.extend_from_slice(&buf[pos..]);
140                pos = buf.len();
141            }
142        }
143
144        reader.consume(pos);
145
146        if lines_output >= limit {
147            break;
148        }
149    }
150
151    if line_number < offset {
152        return Err(ToolError::OutOfBounds(format!(
153            "offset {} exceeds file length of {} lines",
154            offset, line_number
155        )));
156    }
157
158    Ok(ToolOutput::new(output))
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use crate::path::AbsolutePathResolver;
165    use std::io::Write as _;
166    use tempfile::NamedTempFile;
167
168    #[maybe_async::maybe_async]
169    async fn read_temp_file<const LINE_NUMBERS: bool>(
170        content: &[u8],
171        offset: usize,
172        limit: usize,
173    ) -> ToolResult<ToolOutput> {
174        let mut temp = NamedTempFile::new().unwrap();
175        temp.write_all(content).unwrap();
176        let resolver = AbsolutePathResolver;
177        read_file::<_, LINE_NUMBERS>(&resolver, temp.path().to_str().unwrap(), offset, limit).await
178    }
179
180    #[maybe_async::test(feature = "blocking", async(not(feature = "blocking"), tokio::test))]
181    async fn reads_basic_file_with_line_numbers() {
182        let result = read_temp_file::<true>(b"hello\nworld\n", 1, 2000)
183            .await
184            .unwrap();
185        assert_eq!(result.content, "L1: hello\nL2: world");
186    }
187
188    #[maybe_async::test(feature = "blocking", async(not(feature = "blocking"), tokio::test))]
189    async fn reads_basic_file_without_line_numbers() {
190        let result = read_temp_file::<false>(b"hello\nworld\n", 1, 2000)
191            .await
192            .unwrap();
193        assert_eq!(result.content, "hello\nworld");
194    }
195
196    #[maybe_async::test(feature = "blocking", async(not(feature = "blocking"), tokio::test))]
197    async fn errors_on_offset_zero() {
198        let err = read_temp_file::<true>(b"test\n", 0, 10).await.unwrap_err();
199        assert!(matches!(err, ToolError::OutOfBounds(_)));
200    }
201}