Skip to main content

reading_liner/
stream.rs

1use crate::{
2    index::{self, Index},
3    location::{Offset, line_column},
4};
5use std::io;
6
7/// A stream which can be used to convert between offsets and line-column locations.
8#[derive(Debug)]
9pub struct Stream<'index, Reader> {
10    reader: Reader,
11
12    base: usize, // For future use
13    index: &'index mut Index,
14    next_offset: Offset,
15    current_line: usize,
16}
17
18impl<'index, R> Stream<'index, R> {
19    pub fn new(reader: R, index: &'index mut Index) -> Self {
20        Self {
21            reader,
22            base: 0,
23            index,
24            next_offset: 0.into(),
25            current_line: 0,
26        }
27    }
28
29    pub fn get_ref(&self) -> &R {
30        &self.reader
31    }
32
33    #[inline]
34    pub fn base(&self) -> usize {
35        self.base
36    }
37
38    /// Immutable query to further query offsets and line-column locations
39    #[inline]
40    pub fn query(&self) -> index::Query<'_> {
41        self.index.query()
42    }
43
44    #[inline]
45    pub fn get_index(&self) -> &Index {
46        &self.index
47    }
48}
49
50impl<'index, R: io::Read> Stream<'index, R> {
51    /// Read length
52    #[inline]
53    pub fn read_len(&self) -> usize {
54        self.next_offset.raw()
55    }
56
57    /// Try to get more bytes and update states
58    fn forward(&mut self, buf: &mut [u8]) -> io::Result<usize> {
59        let n = self.reader.read(buf)?;
60
61        for (offset, b) in buf.iter().take(n).enumerate() {
62            if *b == b'\n' {
63                self.current_line += 1;
64                self.index.add_next_line(self.next_offset + offset + 1); // next line begin
65                continue;
66            }
67        }
68
69        // reached EoF, try to add fake ending
70        if !buf.is_empty() && n == 0 {
71            // TODO
72            match self.index.end() {
73                Some(end) if end != self.next_offset => {
74                    self.index.add_next_line(self.next_offset);
75                }
76                None => self.index.add_next_line(self.next_offset),
77                _ => {}
78            }
79        }
80
81        self.next_offset += n;
82        Ok(n)
83    }
84
85    /// Locate the (line, column) position for a given byte `offset`.  
86    ///
87    /// NOTE: this method may cause extra reading when the offset input cannot find a location.
88    ///
89    /// This method first resolves the line index via [`locate_line`], then
90    /// computes the column by subtracting the starting offset of that line.
91    ///
92    /// # Parameters
93    /// - `offset`: The target byte offset.
94    /// - `buf`: A temporary buffer used for incremental reading.
95    ///
96    /// # Returns
97    /// - `Ok(ZeroBased(line, column))` if the offset is within bounds.
98    /// - `Err` if the offset exceeds EOF (propagated from [`locate_line`]).
99    ///
100    /// # Invariants
101    /// - The internal index always contains a valid starting offset for every line.
102    /// - Therefore, `line_offset(line)` must succeed for any valid `line`.
103    ///
104    /// # Notes
105    /// - Both line and column are zero-based.
106    /// - Column is computed in **bytes**, not characters (UTF-8 aware handling is not performed here).
107    pub fn locate(&mut self, offset: Offset, buf: &mut [u8]) -> io::Result<line_column::ZeroBased> {
108        let line = self.locate_line(offset, buf)?;
109        let line_offset = self.query().line_offset(line).unwrap();
110        let col = offset - line_offset;
111        Ok((line, col.raw()).into())
112    }
113
114    /// Locate the line index for a given byte `offset`.
115    ///
116    /// This method performs an incremental lookup:
117    /// it first queries the existing line index, and if the offset
118    /// is not covered, it reads more data and extends the index.
119    /// This method may cause extra reading when the offset input cannot find a location.
120    ///
121    /// # Invariants
122    /// - The internal index is non-empty and ends with a sentinel EOF offset.
123    ///
124    /// # Errors
125    /// Returns an error if `offset` exceeds EOF.
126    pub fn locate_line(&mut self, offset: Offset, buf: &mut [u8]) -> io::Result<usize> {
127        let mut begin = 0;
128        loop {
129            // Invariant: index is non-empty and ends with EOF.
130            // Therefore, begin <= query.count() always holds, and range_from(begin..)
131            // is guaranteed to be a valid slice (possibly containing only EOF).
132            if let Some(i) = self.query().range_from(begin..).locate_line(offset) {
133                break Ok(i); // look here the returned `i` is already `begin` based, there's no need to add an extra begin
134            }
135            begin = self.index.count();
136
137            if self.forward(buf)? == 0 {
138                break Err(io_error("Invalid offset, exceed EOF"));
139            }
140        }
141    }
142
143    /// Encode a (line, column) location into a byte `Offset`.
144    ///
145    /// This method may incrementally extend the internal line index by reading
146    /// additional data if the requested line is not yet available.
147    ///
148    /// # Behavior
149    /// - If the line is already indexed, the offset is computed directly.
150    /// - Otherwise, more data is read and the index is extended until the line
151    ///   becomes available or EOF is reached.
152    ///
153    /// # Returns
154    /// - `Ok(offset)` if the position can be resolved.
155    /// - `Err` if the line index exceeds EOF.
156    ///
157    /// # Notes
158    /// - Column is interpreted as a **byte offset** relative to the start of the line.
159    /// - This method does **not** validate whether the column lies within the bounds
160    ///   of the line.
161    pub fn encode(
162        &mut self,
163        line_index: line_column::ZeroBased,
164        buf: &mut [u8],
165    ) -> io::Result<Offset> {
166        let (line, col) = line_index.raw();
167        loop {
168            if let Some(offset) = self.query().line_offset(line) {
169                break Ok(offset + col);
170            }
171
172            if self.forward(buf)? == 0 {
173                break Err(io_error(format!("Invalid line index: ({}, {})", line, col)));
174            }
175        }
176    }
177
178    /// Drain the reader, consume the reader
179    pub fn drain(&mut self, buf: &mut [u8]) -> io::Result<()> {
180        loop {
181            let n = self.forward(buf)?;
182            if n == 0 {
183                return Ok(());
184            }
185        }
186    }
187}
188
189/// You can use [Stream] as a normal [io::Read] and recording index at the same time.
190impl<'index, R: io::Read> io::Read for Stream<'index, R> {
191    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
192        self.forward(buf)
193    }
194}
195
196#[inline]
197fn io_error<S: ToString>(msg: S) -> io::Error {
198    io::Error::new(io::ErrorKind::Other, msg.to_string())
199}
200
201#[cfg(test)]
202mod test {
203    #![allow(unused_must_use)]
204    use std::io::{BufReader, Read};
205
206    use super::*;
207
208    #[test]
209    fn test_stream_str() {
210        let reader = "\nThis is s sim\nple test that\n I have to verify stream reader!";
211
212        let mut index = Index::new();
213        // let mut stream = Stream::new(reader.as_bytes(), &mut index);
214        // let mut buf = vec![b'\0'; 10];
215        // stream.drain(&mut buf);
216
217        let stream = Stream::new(reader.as_bytes(), &mut index);
218        let mut reader = BufReader::new(stream);
219        let mut buf = String::new();
220        reader.read_to_string(&mut buf).unwrap();
221
222        let ans = reader.get_ref().query().locate(Offset(20));
223        assert!(ans.is_some());
224        assert_eq!(ans.unwrap(), (2, 5).into());
225    }
226
227    // #[test]
228    // fn test_stream_file() {
229    //     let file = std::fs::File::open("Cargo.toml").expect("Failed to open file");
230    //     let mut index = Index::new();
231    //     let mut stream = Stream::new(file, &mut index);
232    //     let mut buf = vec![b'\0'; 10];
233    //     let ans = stream.locate(Offset(50), &mut buf);
234    //     dbg!(ans);
235
236    //     let ans = stream.locate(Offset(20), &mut buf);
237    //     dbg!(ans);
238    // }
239}