hedl_stream/
reader.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Line reader for streaming parser.
19//!
20//! Provides buffered line-by-line reading with line number tracking, peek support,
21//! and the ability to push back lines for re-parsing.
22//!
23//! This module is primarily an internal implementation detail of the streaming
24//! parser, but is exposed for advanced use cases.
25
26use crate::error::{StreamError, StreamResult};
27use std::io::{BufRead, BufReader, Read};
28
29/// Buffered line reader with line number tracking.
30///
31/// Reads input line-by-line, automatically handling different line endings
32/// (LF, CRLF) and tracking the current line number for error reporting.
33///
34/// # Features
35///
36/// - **Buffered I/O**: Efficient reading with configurable buffer size
37/// - **Line Number Tracking**: Automatic tracking for error messages
38/// - **Peek Support**: Look ahead without consuming lines
39/// - **Push Back**: Re-read previously consumed lines
40/// - **Iterator**: Standard Rust iterator interface
41///
42/// # Examples
43///
44/// ## Basic Line Reading
45///
46/// ```rust
47/// use hedl_stream::LineReader;
48/// use std::io::Cursor;
49///
50/// let input = "line1\nline2\nline3";
51/// let mut reader = LineReader::new(Cursor::new(input));
52///
53/// assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
54/// assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
55/// assert_eq!(reader.next_line().unwrap(), Some((3, "line3".to_string())));
56/// assert_eq!(reader.next_line().unwrap(), None);
57/// ```
58///
59/// ## Peeking Ahead
60///
61/// ```rust
62/// use hedl_stream::LineReader;
63/// use std::io::Cursor;
64///
65/// let input = "line1\nline2";
66/// let mut reader = LineReader::new(Cursor::new(input));
67///
68/// // Peek without consuming
69/// assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
70/// assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
71///
72/// // Now consume it
73/// assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
74/// ```
75///
76/// ## Push Back for Re-parsing
77///
78/// ```rust
79/// use hedl_stream::LineReader;
80/// use std::io::Cursor;
81///
82/// let input = "line1\nline2";
83/// let mut reader = LineReader::new(Cursor::new(input));
84///
85/// let line = reader.next_line().unwrap().unwrap();
86/// assert_eq!(line, (1, "line1".to_string()));
87///
88/// // Push it back
89/// reader.push_back(line.0, line.1);
90///
91/// // Read it again
92/// assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
93/// ```
94pub struct LineReader<R: Read> {
95    reader: BufReader<R>,
96    line_number: usize,
97    buffer: String,
98    peeked: Option<(usize, String)>,
99}
100
101impl<R: Read> LineReader<R> {
102    /// Create a new line reader.
103    pub fn new(reader: R) -> Self {
104        Self {
105            reader: BufReader::new(reader),
106            line_number: 0,
107            buffer: String::new(),
108            peeked: None,
109        }
110    }
111
112    /// Create with a specific buffer capacity.
113    pub fn with_capacity(reader: R, capacity: usize) -> Self {
114        Self {
115            reader: BufReader::with_capacity(capacity, reader),
116            line_number: 0,
117            buffer: String::new(),
118            peeked: None,
119        }
120    }
121
122    /// Get the current line number.
123    #[inline]
124    pub fn line_number(&self) -> usize {
125        self.line_number
126    }
127
128    /// Read the next line.
129    pub fn next_line(&mut self) -> StreamResult<Option<(usize, String)>> {
130        // Return peeked line if available
131        if let Some(peeked) = self.peeked.take() {
132            return Ok(Some(peeked));
133        }
134
135        self.buffer.clear();
136
137        match self.reader.read_line(&mut self.buffer) {
138            Ok(0) => Ok(None), // EOF
139            Ok(_) => {
140                self.line_number += 1;
141
142                // Remove trailing newline
143                if self.buffer.ends_with('\n') {
144                    self.buffer.pop();
145                    if self.buffer.ends_with('\r') {
146                        self.buffer.pop();
147                    }
148                }
149
150                Ok(Some((self.line_number, self.buffer.clone())))
151            }
152            Err(e) => Err(StreamError::Io(e)),
153        }
154    }
155
156    /// Peek at the next line without consuming it.
157    pub fn peek_line(&mut self) -> StreamResult<Option<&(usize, String)>> {
158        if self.peeked.is_none() {
159            self.peeked = self.read_line_internal()?;
160        }
161        Ok(self.peeked.as_ref())
162    }
163
164    /// Push a line back to be read again.
165    #[inline]
166    pub fn push_back(&mut self, line_num: usize, line: String) {
167        self.peeked = Some((line_num, line));
168    }
169
170    fn read_line_internal(&mut self) -> StreamResult<Option<(usize, String)>> {
171        self.buffer.clear();
172
173        match self.reader.read_line(&mut self.buffer) {
174            Ok(0) => Ok(None),
175            Ok(_) => {
176                self.line_number += 1;
177
178                if self.buffer.ends_with('\n') {
179                    self.buffer.pop();
180                    if self.buffer.ends_with('\r') {
181                        self.buffer.pop();
182                    }
183                }
184
185                Ok(Some((self.line_number, self.buffer.clone())))
186            }
187            Err(e) => Err(StreamError::Io(e)),
188        }
189    }
190}
191
192impl<R: Read> Iterator for LineReader<R> {
193    type Item = StreamResult<(usize, String)>;
194
195    fn next(&mut self) -> Option<Self::Item> {
196        match self.next_line() {
197            Ok(Some(line)) => Some(Ok(line)),
198            Ok(None) => None,
199            Err(e) => Some(Err(e)),
200        }
201    }
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207    use std::io::Cursor;
208
209    #[test]
210    fn test_read_lines() {
211        let input = "line1\nline2\nline3";
212        let mut reader = LineReader::new(Cursor::new(input));
213
214        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
215        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
216        assert_eq!(reader.next_line().unwrap(), Some((3, "line3".to_string())));
217        assert_eq!(reader.next_line().unwrap(), None);
218    }
219
220    #[test]
221    fn test_peek_and_push_back() {
222        let input = "line1\nline2";
223        let mut reader = LineReader::new(Cursor::new(input));
224
225        let peeked = reader.peek_line().unwrap().cloned();
226        assert_eq!(peeked, Some((1, "line1".to_string())));
227
228        // Should still return the same line
229        let line = reader.next_line().unwrap();
230        assert_eq!(line, Some((1, "line1".to_string())));
231
232        // Push back
233        reader.push_back(1, "line1".to_string());
234        let line = reader.next_line().unwrap();
235        assert_eq!(line, Some((1, "line1".to_string())));
236    }
237
238    // ==================== Empty input tests ====================
239
240    #[test]
241    fn test_empty_input() {
242        let input = "";
243        let mut reader = LineReader::new(Cursor::new(input));
244        assert_eq!(reader.next_line().unwrap(), None);
245    }
246
247    #[test]
248    fn test_single_empty_line() {
249        let input = "\n";
250        let mut reader = LineReader::new(Cursor::new(input));
251        assert_eq!(reader.next_line().unwrap(), Some((1, "".to_string())));
252        assert_eq!(reader.next_line().unwrap(), None);
253    }
254
255    #[test]
256    fn test_multiple_empty_lines() {
257        let input = "\n\n\n";
258        let mut reader = LineReader::new(Cursor::new(input));
259        assert_eq!(reader.next_line().unwrap(), Some((1, "".to_string())));
260        assert_eq!(reader.next_line().unwrap(), Some((2, "".to_string())));
261        assert_eq!(reader.next_line().unwrap(), Some((3, "".to_string())));
262        assert_eq!(reader.next_line().unwrap(), None);
263    }
264
265    // ==================== Line ending tests ====================
266
267    #[test]
268    fn test_crlf_line_endings() {
269        let input = "line1\r\nline2\r\nline3";
270        let mut reader = LineReader::new(Cursor::new(input));
271        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
272        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
273        assert_eq!(reader.next_line().unwrap(), Some((3, "line3".to_string())));
274    }
275
276    #[test]
277    fn test_mixed_line_endings() {
278        let input = "line1\nline2\r\nline3\nline4";
279        let mut reader = LineReader::new(Cursor::new(input));
280        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
281        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
282        assert_eq!(reader.next_line().unwrap(), Some((3, "line3".to_string())));
283        assert_eq!(reader.next_line().unwrap(), Some((4, "line4".to_string())));
284    }
285
286    #[test]
287    fn test_trailing_newline() {
288        let input = "line1\n";
289        let mut reader = LineReader::new(Cursor::new(input));
290        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
291        assert_eq!(reader.next_line().unwrap(), None);
292    }
293
294    #[test]
295    fn test_no_trailing_newline() {
296        let input = "line1";
297        let mut reader = LineReader::new(Cursor::new(input));
298        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
299        assert_eq!(reader.next_line().unwrap(), None);
300    }
301
302    // ==================== Line number tests ====================
303
304    #[test]
305    fn test_line_number_initial() {
306        let reader: LineReader<Cursor<&str>> = LineReader::new(Cursor::new("test"));
307        assert_eq!(reader.line_number(), 0);
308    }
309
310    #[test]
311    fn test_line_number_after_read() {
312        let input = "line1\nline2\nline3";
313        let mut reader = LineReader::new(Cursor::new(input));
314
315        reader.next_line().unwrap();
316        assert_eq!(reader.line_number(), 1);
317
318        reader.next_line().unwrap();
319        assert_eq!(reader.line_number(), 2);
320
321        reader.next_line().unwrap();
322        assert_eq!(reader.line_number(), 3);
323    }
324
325    #[test]
326    fn test_line_number_after_eof() {
327        let input = "line1";
328        let mut reader = LineReader::new(Cursor::new(input));
329
330        reader.next_line().unwrap();
331        assert_eq!(reader.line_number(), 1);
332
333        reader.next_line().unwrap(); // EOF
334        assert_eq!(reader.line_number(), 1); // Line number unchanged
335    }
336
337    // ==================== Peek tests ====================
338
339    #[test]
340    fn test_peek_empty_input() {
341        let mut reader = LineReader::new(Cursor::new(""));
342        assert_eq!(reader.peek_line().unwrap(), None);
343    }
344
345    #[test]
346    fn test_peek_multiple_times() {
347        let input = "line1\nline2";
348        let mut reader = LineReader::new(Cursor::new(input));
349
350        // Peek multiple times should return the same line
351        assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
352        assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
353        assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
354
355        // Consume it
356        reader.next_line().unwrap();
357
358        // Next peek should be the second line
359        assert_eq!(reader.peek_line().unwrap(), Some(&(2, "line2".to_string())));
360    }
361
362    #[test]
363    fn test_peek_then_read() {
364        let input = "line1\nline2";
365        let mut reader = LineReader::new(Cursor::new(input));
366
367        reader.peek_line().unwrap();
368        let line = reader.next_line().unwrap();
369        assert_eq!(line, Some((1, "line1".to_string())));
370    }
371
372    // ==================== Push back tests ====================
373
374    #[test]
375    fn test_push_back_with_different_line_number() {
376        let input = "line1\nline2";
377        let mut reader = LineReader::new(Cursor::new(input));
378
379        reader.next_line().unwrap(); // line1
380        reader.push_back(99, "pushed".to_string());
381
382        let line = reader.next_line().unwrap();
383        assert_eq!(line, Some((99, "pushed".to_string())));
384    }
385
386    #[test]
387    fn test_push_back_overwrites_peek() {
388        let input = "line1\nline2";
389        let mut reader = LineReader::new(Cursor::new(input));
390
391        reader.peek_line().unwrap(); // Peek line1
392        reader.push_back(42, "pushed".to_string());
393
394        let line = reader.next_line().unwrap();
395        assert_eq!(line, Some((42, "pushed".to_string())));
396    }
397
398    // ==================== Iterator tests ====================
399
400    #[test]
401    fn test_iterator() {
402        let input = "line1\nline2\nline3";
403        let reader = LineReader::new(Cursor::new(input));
404
405        let lines: Vec<_> = reader.filter_map(|r| r.ok()).collect();
406
407        assert_eq!(lines.len(), 3);
408        assert_eq!(lines[0], (1, "line1".to_string()));
409        assert_eq!(lines[1], (2, "line2".to_string()));
410        assert_eq!(lines[2], (3, "line3".to_string()));
411    }
412
413    #[test]
414    fn test_iterator_empty() {
415        let reader = LineReader::new(Cursor::new(""));
416        let lines: Vec<_> = reader.filter_map(|r| r.ok()).collect();
417        assert!(lines.is_empty());
418    }
419
420    #[test]
421    fn test_iterator_single_line() {
422        let reader = LineReader::new(Cursor::new("single"));
423        let lines: Vec<_> = reader.filter_map(|r| r.ok()).collect();
424        assert_eq!(lines, vec![(1, "single".to_string())]);
425    }
426
427    // ==================== With capacity tests ====================
428
429    #[test]
430    fn test_with_capacity() {
431        let input = "line1\nline2";
432        let mut reader = LineReader::with_capacity(Cursor::new(input), 1024);
433
434        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
435        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
436    }
437
438    #[test]
439    fn test_with_small_capacity() {
440        let input = "line1\nline2";
441        let mut reader = LineReader::with_capacity(Cursor::new(input), 1);
442
443        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
444        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
445    }
446
447    // ==================== Unicode tests ====================
448
449    #[test]
450    fn test_unicode_content() {
451        let input = "δ½ ε₯½\nδΈ–η•Œ\nπŸŽ‰";
452        let mut reader = LineReader::new(Cursor::new(input));
453
454        assert_eq!(reader.next_line().unwrap(), Some((1, "δ½ ε₯½".to_string())));
455        assert_eq!(reader.next_line().unwrap(), Some((2, "δΈ–η•Œ".to_string())));
456        assert_eq!(reader.next_line().unwrap(), Some((3, "πŸŽ‰".to_string())));
457    }
458
459    #[test]
460    fn test_unicode_line_with_emoji() {
461        let input = "Hello 🌍 World";
462        let mut reader = LineReader::new(Cursor::new(input));
463        assert_eq!(
464            reader.next_line().unwrap(),
465            Some((1, "Hello 🌍 World".to_string()))
466        );
467    }
468
469    // ==================== Whitespace tests ====================
470
471    #[test]
472    fn test_line_with_spaces() {
473        let input = "  indented  \n\ttabbed\t";
474        let mut reader = LineReader::new(Cursor::new(input));
475
476        assert_eq!(
477            reader.next_line().unwrap(),
478            Some((1, "  indented  ".to_string()))
479        );
480        assert_eq!(
481            reader.next_line().unwrap(),
482            Some((2, "\ttabbed\t".to_string()))
483        );
484    }
485
486    #[test]
487    fn test_only_whitespace_lines() {
488        let input = "   \n\t\t\n  \t  ";
489        let mut reader = LineReader::new(Cursor::new(input));
490
491        assert_eq!(reader.next_line().unwrap(), Some((1, "   ".to_string())));
492        assert_eq!(reader.next_line().unwrap(), Some((2, "\t\t".to_string())));
493        assert_eq!(reader.next_line().unwrap(), Some((3, "  \t  ".to_string())));
494    }
495
496    // ==================== Long line tests ====================
497
498    #[test]
499    fn test_long_line() {
500        let long_line = "a".repeat(10000);
501        let mut reader = LineReader::new(Cursor::new(long_line.clone()));
502        assert_eq!(reader.next_line().unwrap(), Some((1, long_line)));
503    }
504
505    #[test]
506    fn test_many_lines() {
507        let lines: Vec<String> = (0..1000).map(|i| format!("line{}", i)).collect();
508        let input = lines.join("\n");
509        let mut reader = LineReader::new(Cursor::new(input));
510
511        for (i, expected) in lines.iter().enumerate() {
512            let result = reader.next_line().unwrap();
513            assert_eq!(result, Some((i + 1, expected.clone())));
514        }
515        assert_eq!(reader.next_line().unwrap(), None);
516    }
517}