reading_liner/stream.rs
1use crate::{
2 index::{self, Index},
3 location::{Offset, line_column},
4};
5use std::io;
6
7/// A stream which can be used to convert between offsets and line-column locations.
8#[derive(Debug)]
9pub struct Stream<'index, Reader> {
10 reader: Reader,
11
12 base: usize, // For future use
13 index: &'index mut Index,
14 next_offset: Offset,
15 current_line: usize,
16}
17
18impl<'index, R> Stream<'index, R> {
19 pub fn new(reader: R, index: &'index mut Index) -> Self {
20 Self {
21 reader,
22 base: 0,
23 index,
24 next_offset: 0.into(),
25 current_line: 0,
26 }
27 }
28
29 pub fn get_ref(&self) -> &R {
30 &self.reader
31 }
32
33 #[inline]
34 pub fn base(&self) -> usize {
35 self.base
36 }
37
38 /// Immutable query to further query offsets and line-column locations
39 #[inline]
40 pub fn query(&self) -> index::Query<'_> {
41 self.index.query()
42 }
43
44 #[inline]
45 pub fn get_index(&self) -> &Index {
46 &self.index
47 }
48}
49
50impl<'index, R: io::Read> Stream<'index, R> {
51 /// Read length
52 #[inline]
53 pub fn read_len(&self) -> usize {
54 self.next_offset.raw()
55 }
56
57 /// Try to get more bytes and update states
58 fn forward(&mut self, buf: &mut [u8]) -> io::Result<usize> {
59 let n = self.reader.read(buf)?;
60
61 for (offset, b) in buf.iter().take(n).enumerate() {
62 if *b == b'\n' {
63 self.current_line += 1;
64 self.index.add_next_line(self.next_offset + offset + 1); // next line begin
65 continue;
66 }
67 }
68
69 // reached EoF, try to add fake ending
70 if !buf.is_empty() && n == 0 {
71 // TODO
72 match self.index.end() {
73 Some(end) if end != self.next_offset => {
74 self.index.add_next_line(self.next_offset);
75 }
76 None => self.index.add_next_line(self.next_offset),
77 _ => {}
78 }
79 }
80
81 self.next_offset += n;
82 Ok(n)
83 }
84
85 /// Locate the (line, column) position for a given byte `offset`.
86 ///
87 /// NOTE: this method may cause extra reading when the offset input cannot find a location.
88 ///
89 /// This method first resolves the line index via [`locate_line`], then
90 /// computes the column by subtracting the starting offset of that line.
91 ///
92 /// # Parameters
93 /// - `offset`: The target byte offset.
94 /// - `buf`: A temporary buffer used for incremental reading.
95 ///
96 /// # Returns
97 /// - `Ok(ZeroBased(line, column))` if the offset is within bounds.
98 /// - `Err` if the offset exceeds EOF (propagated from [`locate_line`]).
99 ///
100 /// # Invariants
101 /// - The internal index always contains a valid starting offset for every line.
102 /// - Therefore, `line_offset(line)` must succeed for any valid `line`.
103 ///
104 /// # Notes
105 /// - Both line and column are zero-based.
106 /// - Column is computed in **bytes**, not characters (UTF-8 aware handling is not performed here).
107 pub fn locate(&mut self, offset: Offset, buf: &mut [u8]) -> io::Result<line_column::ZeroBased> {
108 let line = self.locate_line(offset, buf)?;
109 let line_offset = self.query().line_offset(line).unwrap();
110 let col = offset - line_offset;
111 Ok((line, col.raw()).into())
112 }
113
114 /// Locate the line index for a given byte `offset`.
115 ///
116 /// This method performs an incremental lookup:
117 /// it first queries the existing line index, and if the offset
118 /// is not covered, it reads more data and extends the index.
119 /// This method may cause extra reading when the offset input cannot find a location.
120 ///
121 /// # Invariants
122 /// - The internal index is non-empty and ends with a sentinel EOF offset.
123 ///
124 /// # Errors
125 /// Returns an error if `offset` exceeds EOF.
126 pub fn locate_line(&mut self, offset: Offset, buf: &mut [u8]) -> io::Result<usize> {
127 let mut begin = 0;
128 loop {
129 // Invariant: index is non-empty and ends with EOF.
130 // Therefore, begin <= query.count() always holds, and range_from(begin..)
131 // is guaranteed to be a valid slice (possibly containing only EOF).
132 if let Some(i) = self.query().range_from(begin..).locate_line(offset) {
133 break Ok(i); // look here the returned `i` is already `begin` based, there's no need to add an extra begin
134 }
135 begin = self.index.count();
136
137 if self.forward(buf)? == 0 {
138 break Err(io_error("Invalid offset, exceed EOF"));
139 }
140 }
141 }
142
143 /// Encode a (line, column) location into a byte `Offset`.
144 ///
145 /// This method may incrementally extend the internal line index by reading
146 /// additional data if the requested line is not yet available.
147 ///
148 /// # Behavior
149 /// - If the line is already indexed, the offset is computed directly.
150 /// - Otherwise, more data is read and the index is extended until the line
151 /// becomes available or EOF is reached.
152 ///
153 /// # Returns
154 /// - `Ok(offset)` if the position can be resolved.
155 /// - `Err` if the line index exceeds EOF.
156 ///
157 /// # Notes
158 /// - Column is interpreted as a **byte offset** relative to the start of the line.
159 /// - This method does **not** validate whether the column lies within the bounds
160 /// of the line.
161 pub fn encode(
162 &mut self,
163 line_index: line_column::ZeroBased,
164 buf: &mut [u8],
165 ) -> io::Result<Offset> {
166 let (line, col) = line_index.raw();
167 loop {
168 if let Some(offset) = self.query().line_offset(line) {
169 break Ok(offset + col);
170 }
171
172 if self.forward(buf)? == 0 {
173 break Err(io_error(format!("Invalid line index: ({}, {})", line, col)));
174 }
175 }
176 }
177
178 /// Drain the reader, consume the reader
179 pub fn drain(&mut self, buf: &mut [u8]) -> io::Result<()> {
180 loop {
181 let n = self.forward(buf)?;
182 if n == 0 {
183 return Ok(());
184 }
185 }
186 }
187}
188
189/// You can use [Stream] as a normal [io::Read] and recording index at the same time.
190impl<'index, R: io::Read> io::Read for Stream<'index, R> {
191 fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
192 self.forward(buf)
193 }
194}
195
196#[inline]
197fn io_error<S: ToString>(msg: S) -> io::Error {
198 io::Error::new(io::ErrorKind::Other, msg.to_string())
199}
200
201#[cfg(test)]
202mod test {
203 #![allow(unused_must_use)]
204 use std::io::{BufReader, Read};
205
206 use super::*;
207
208 #[test]
209 fn test_stream_str() {
210 let reader = "\nThis is s sim\nple test that\n I have to verify stream reader!";
211
212 let mut index = Index::new();
213 // let mut stream = Stream::new(reader.as_bytes(), &mut index);
214 // let mut buf = vec![b'\0'; 10];
215 // stream.drain(&mut buf);
216
217 let stream = Stream::new(reader.as_bytes(), &mut index);
218 let mut reader = BufReader::new(stream);
219 let mut buf = String::new();
220 reader.read_to_string(&mut buf).unwrap();
221
222 let ans = reader.get_ref().query().locate(Offset(20));
223 assert!(ans.is_some());
224 assert_eq!(ans.unwrap(), (2, 5).into());
225 }
226
227 // #[test]
228 // fn test_stream_file() {
229 // let file = std::fs::File::open("Cargo.toml").expect("Failed to open file");
230 // let mut index = Index::new();
231 // let mut stream = Stream::new(file, &mut index);
232 // let mut buf = vec![b'\0'; 10];
233 // let ans = stream.locate(Offset(50), &mut buf);
234 // dbg!(ans);
235
236 // let ans = stream.locate(Offset(20), &mut buf);
237 // dbg!(ans);
238 // }
239}