hedl_stream/
reader.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Line reader for streaming parser.
19//!
20//! Provides buffered line-by-line reading with line number tracking, peek support,
21//! and the ability to push back lines for re-parsing.
22//!
23//! This module is primarily an internal implementation detail of the streaming
24//! parser, but is exposed for advanced use cases.
25
26use crate::error::{StreamError, StreamResult};
27use std::io::{BufRead, BufReader, Read};
28
29/// Simple memchr implementation - finds the first occurrence of a byte in a slice.
30fn memchr_byte(needle: u8, haystack: &[u8]) -> Option<usize> {
31    haystack.iter().position(|&b| b == needle)
32}
33
34/// Buffered line reader with line number tracking.
35///
36/// Reads input line-by-line, automatically handling different line endings
37/// (LF, CRLF) and tracking the current line number for error reporting.
38///
39/// # Features
40///
41/// - **Buffered I/O**: Efficient reading with configurable buffer size
42/// - **Line Number Tracking**: Automatic tracking for error messages
43/// - **Peek Support**: Look ahead without consuming lines
44/// - **Push Back**: Re-read previously consumed lines
45/// - **Iterator**: Standard Rust iterator interface
46///
47/// # Examples
48///
49/// ## Basic Line Reading
50///
51/// ```rust
52/// use hedl_stream::LineReader;
53/// use std::io::Cursor;
54///
55/// let input = "line1\nline2\nline3";
56/// let mut reader = LineReader::new(Cursor::new(input));
57///
58/// assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
59/// assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
60/// assert_eq!(reader.next_line().unwrap(), Some((3, "line3".to_string())));
61/// assert_eq!(reader.next_line().unwrap(), None);
62/// ```
63///
64/// ## Peeking Ahead
65///
66/// ```rust
67/// use hedl_stream::LineReader;
68/// use std::io::Cursor;
69///
70/// let input = "line1\nline2";
71/// let mut reader = LineReader::new(Cursor::new(input));
72///
73/// // Peek without consuming
74/// assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
75/// assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
76///
77/// // Now consume it
78/// assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
79/// ```
80///
81/// ## Push Back for Re-parsing
82///
83/// ```rust
84/// use hedl_stream::LineReader;
85/// use std::io::Cursor;
86///
87/// let input = "line1\nline2";
88/// let mut reader = LineReader::new(Cursor::new(input));
89///
90/// let line = reader.next_line().unwrap().unwrap();
91/// assert_eq!(line, (1, "line1".to_string()));
92///
93/// // Push it back
94/// reader.push_back(line.0, line.1);
95///
96/// // Read it again
97/// assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
98/// ```
99pub struct LineReader<R: Read> {
100    reader: BufReader<R>,
101    line_number: usize,
102    buffer: String,
103    peeked: Option<(usize, String)>,
104    max_line_length: usize,
105}
106
107impl<R: Read> LineReader<R> {
108    /// Create a new line reader with default max line length (1MB).
109    pub fn new(reader: R) -> Self {
110        Self {
111            reader: BufReader::new(reader),
112            line_number: 0,
113            buffer: String::new(),
114            peeked: None,
115            max_line_length: 1_000_000,
116        }
117    }
118
119    /// Create with a specific buffer capacity and default max line length (1MB).
120    pub fn with_capacity(reader: R, capacity: usize) -> Self {
121        Self {
122            reader: BufReader::with_capacity(capacity, reader),
123            line_number: 0,
124            buffer: String::new(),
125            peeked: None,
126            max_line_length: 1_000_000,
127        }
128    }
129
130    /// Create with a specific max line length.
131    pub fn with_max_length(reader: R, max_line_length: usize) -> Self {
132        Self {
133            reader: BufReader::new(reader),
134            line_number: 0,
135            buffer: String::new(),
136            peeked: None,
137            max_line_length,
138        }
139    }
140
141    /// Create with a specific buffer capacity and max line length.
142    pub fn with_capacity_and_max_length(
143        reader: R,
144        capacity: usize,
145        max_line_length: usize,
146    ) -> Self {
147        Self {
148            reader: BufReader::with_capacity(capacity, reader),
149            line_number: 0,
150            buffer: String::new(),
151            peeked: None,
152            max_line_length,
153        }
154    }
155
156    /// Get the current line number.
157    #[inline]
158    pub fn line_number(&self) -> usize {
159        self.line_number
160    }
161
162    /// Read the next line.
163    pub fn next_line(&mut self) -> StreamResult<Option<(usize, String)>> {
164        // Return peeked line if available
165        if let Some(peeked) = self.peeked.take() {
166            return Ok(Some(peeked));
167        }
168
169        self.read_line_internal()
170    }
171
172    /// Peek at the next line without consuming it.
173    pub fn peek_line(&mut self) -> StreamResult<Option<&(usize, String)>> {
174        if self.peeked.is_none() {
175            self.peeked = self.read_line_internal()?;
176        }
177        Ok(self.peeked.as_ref())
178    }
179
180    /// Push a line back to be read again.
181    #[inline]
182    pub fn push_back(&mut self, line_num: usize, line: String) {
183        self.peeked = Some((line_num, line));
184    }
185
186    fn read_line_internal(&mut self) -> StreamResult<Option<(usize, String)>> {
187        self.buffer.clear();
188
189        loop {
190            // Read from BufReader's internal buffer (zero-copy)
191            let available = match self.reader.fill_buf() {
192                Ok(buf) => buf,
193                Err(e) => return Err(StreamError::Io(e)),
194            };
195
196            if available.is_empty() {
197                // EOF
198                if self.buffer.is_empty() {
199                    return Ok(None);
200                }
201                // Return partial line (no trailing newline)
202                self.line_number += 1;
203                return Ok(Some((self.line_number, self.buffer.clone())));
204            }
205
206            // Find newline in available data
207            if let Some(newline_pos) = memchr_byte(b'\n', available) {
208                // Check limit BEFORE appending
209                if self.buffer.len() + newline_pos > self.max_line_length {
210                    // CRITICAL: Consume the oversized line data to prevent infinite loop
211                    // Consume up to and including the newline character
212                    self.reader.consume(newline_pos + 1);
213                    let total_length = self.buffer.len() + newline_pos;
214                    self.line_number += 1;
215                    self.buffer.clear();
216                    return Err(StreamError::LineTooLong {
217                        line: self.line_number,
218                        length: total_length,
219                        limit: self.max_line_length,
220                    });
221                }
222
223                // Append up to newline (excluding the newline itself)
224                let _line_data = &available[..newline_pos];
225                let mut line_end = newline_pos;
226
227                // Handle CRLF: if newline is preceded by CR, exclude it too
228                if newline_pos > 0 && available[newline_pos - 1] == b'\r' {
229                    line_end = newline_pos - 1;
230                }
231
232                let to_append = &available[..line_end];
233
234                // Validate UTF-8 before appending
235                let line_str =
236                    std::str::from_utf8(to_append).map_err(|e| StreamError::InvalidUtf8 {
237                        line: self.line_number + 1,
238                        error: e,
239                    })?;
240
241                self.buffer.push_str(line_str);
242
243                // Consume bytes including newline
244                self.reader.consume(newline_pos + 1);
245
246                self.line_number += 1;
247                return Ok(Some((self.line_number, self.buffer.clone())));
248            } else {
249                // No newline yet, check if adding entire buffer exceeds limit
250                if self.buffer.len() + available.len() > self.max_line_length {
251                    // CRITICAL: Consume all available data and skip to end of line
252                    // to prevent infinite loop on subsequent reads
253                    let accumulated = self.buffer.len() + available.len();
254                    let consumed = available.len();
255                    self.reader.consume(consumed);
256
257                    // Continue reading and discarding until we find the end of line
258                    self.skip_to_end_of_line()?;
259
260                    self.line_number += 1;
261                    self.buffer.clear();
262                    return Err(StreamError::LineTooLong {
263                        line: self.line_number,
264                        length: accumulated,
265                        limit: self.max_line_length,
266                    });
267                }
268
269                // Validate UTF-8 before appending
270                let chunk_str =
271                    std::str::from_utf8(available).map_err(|e| StreamError::InvalidUtf8 {
272                        line: self.line_number + 1,
273                        error: e,
274                    })?;
275
276                // Append entire buffer and continue reading
277                self.buffer.push_str(chunk_str);
278
279                let len = available.len();
280                self.reader.consume(len);
281            }
282        }
283    }
284
285    /// Skip to end of line when handling oversized line errors.
286    /// Consumes data until a newline is found or EOF is reached.
287    fn skip_to_end_of_line(&mut self) -> StreamResult<()> {
288        loop {
289            let available = match self.reader.fill_buf() {
290                Ok(buf) => buf,
291                Err(e) => return Err(StreamError::Io(e)),
292            };
293
294            if available.is_empty() {
295                // EOF reached, line is done
296                return Ok(());
297            }
298
299            if let Some(newline_pos) = memchr_byte(b'\n', available) {
300                // Found newline, consume up to and including it
301                self.reader.consume(newline_pos + 1);
302                return Ok(());
303            } else {
304                // No newline, consume all and continue
305                let len = available.len();
306                self.reader.consume(len);
307            }
308        }
309    }
310}
311
312impl<R: Read> Iterator for LineReader<R> {
313    type Item = StreamResult<(usize, String)>;
314
315    fn next(&mut self) -> Option<Self::Item> {
316        match self.next_line() {
317            Ok(Some(line)) => Some(Ok(line)),
318            Ok(None) => None,
319            Err(e) => Some(Err(e)),
320        }
321    }
322}
323
324#[cfg(test)]
325mod tests {
326    use super::*;
327    use std::io::Cursor;
328
329    #[test]
330    fn test_read_lines() {
331        let input = "line1\nline2\nline3";
332        let mut reader = LineReader::new(Cursor::new(input));
333
334        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
335        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
336        assert_eq!(reader.next_line().unwrap(), Some((3, "line3".to_string())));
337        assert_eq!(reader.next_line().unwrap(), None);
338    }
339
340    #[test]
341    fn test_peek_and_push_back() {
342        let input = "line1\nline2";
343        let mut reader = LineReader::new(Cursor::new(input));
344
345        let peeked = reader.peek_line().unwrap().cloned();
346        assert_eq!(peeked, Some((1, "line1".to_string())));
347
348        // Should still return the same line
349        let line = reader.next_line().unwrap();
350        assert_eq!(line, Some((1, "line1".to_string())));
351
352        // Push back
353        reader.push_back(1, "line1".to_string());
354        let line = reader.next_line().unwrap();
355        assert_eq!(line, Some((1, "line1".to_string())));
356    }
357
358    // ==================== Empty input tests ====================
359
360    #[test]
361    fn test_empty_input() {
362        let input = "";
363        let mut reader = LineReader::new(Cursor::new(input));
364        assert_eq!(reader.next_line().unwrap(), None);
365    }
366
367    #[test]
368    fn test_single_empty_line() {
369        let input = "\n";
370        let mut reader = LineReader::new(Cursor::new(input));
371        assert_eq!(reader.next_line().unwrap(), Some((1, String::new())));
372        assert_eq!(reader.next_line().unwrap(), None);
373    }
374
375    #[test]
376    fn test_multiple_empty_lines() {
377        let input = "\n\n\n";
378        let mut reader = LineReader::new(Cursor::new(input));
379        assert_eq!(reader.next_line().unwrap(), Some((1, String::new())));
380        assert_eq!(reader.next_line().unwrap(), Some((2, String::new())));
381        assert_eq!(reader.next_line().unwrap(), Some((3, String::new())));
382        assert_eq!(reader.next_line().unwrap(), None);
383    }
384
385    // ==================== Line ending tests ====================
386
387    #[test]
388    fn test_crlf_line_endings() {
389        let input = "line1\r\nline2\r\nline3";
390        let mut reader = LineReader::new(Cursor::new(input));
391        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
392        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
393        assert_eq!(reader.next_line().unwrap(), Some((3, "line3".to_string())));
394    }
395
396    #[test]
397    fn test_mixed_line_endings() {
398        let input = "line1\nline2\r\nline3\nline4";
399        let mut reader = LineReader::new(Cursor::new(input));
400        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
401        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
402        assert_eq!(reader.next_line().unwrap(), Some((3, "line3".to_string())));
403        assert_eq!(reader.next_line().unwrap(), Some((4, "line4".to_string())));
404    }
405
406    #[test]
407    fn test_trailing_newline() {
408        let input = "line1\n";
409        let mut reader = LineReader::new(Cursor::new(input));
410        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
411        assert_eq!(reader.next_line().unwrap(), None);
412    }
413
414    #[test]
415    fn test_no_trailing_newline() {
416        let input = "line1";
417        let mut reader = LineReader::new(Cursor::new(input));
418        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
419        assert_eq!(reader.next_line().unwrap(), None);
420    }
421
422    // ==================== Line number tests ====================
423
424    #[test]
425    fn test_line_number_initial() {
426        let reader: LineReader<Cursor<&str>> = LineReader::new(Cursor::new("test"));
427        assert_eq!(reader.line_number(), 0);
428    }
429
430    #[test]
431    fn test_line_number_after_read() {
432        let input = "line1\nline2\nline3";
433        let mut reader = LineReader::new(Cursor::new(input));
434
435        reader.next_line().unwrap();
436        assert_eq!(reader.line_number(), 1);
437
438        reader.next_line().unwrap();
439        assert_eq!(reader.line_number(), 2);
440
441        reader.next_line().unwrap();
442        assert_eq!(reader.line_number(), 3);
443    }
444
445    #[test]
446    fn test_line_number_after_eof() {
447        let input = "line1";
448        let mut reader = LineReader::new(Cursor::new(input));
449
450        reader.next_line().unwrap();
451        assert_eq!(reader.line_number(), 1);
452
453        reader.next_line().unwrap(); // EOF
454        assert_eq!(reader.line_number(), 1); // Line number unchanged
455    }
456
457    // ==================== Peek tests ====================
458
459    #[test]
460    fn test_peek_empty_input() {
461        let mut reader = LineReader::new(Cursor::new(""));
462        assert_eq!(reader.peek_line().unwrap(), None);
463    }
464
465    #[test]
466    fn test_peek_multiple_times() {
467        let input = "line1\nline2";
468        let mut reader = LineReader::new(Cursor::new(input));
469
470        // Peek multiple times should return the same line
471        assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
472        assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
473        assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
474
475        // Consume it
476        reader.next_line().unwrap();
477
478        // Next peek should be the second line
479        assert_eq!(reader.peek_line().unwrap(), Some(&(2, "line2".to_string())));
480    }
481
482    #[test]
483    fn test_peek_then_read() {
484        let input = "line1\nline2";
485        let mut reader = LineReader::new(Cursor::new(input));
486
487        reader.peek_line().unwrap();
488        let line = reader.next_line().unwrap();
489        assert_eq!(line, Some((1, "line1".to_string())));
490    }
491
492    // ==================== Push back tests ====================
493
494    #[test]
495    fn test_push_back_with_different_line_number() {
496        let input = "line1\nline2";
497        let mut reader = LineReader::new(Cursor::new(input));
498
499        reader.next_line().unwrap(); // line1
500        reader.push_back(99, "pushed".to_string());
501
502        let line = reader.next_line().unwrap();
503        assert_eq!(line, Some((99, "pushed".to_string())));
504    }
505
506    #[test]
507    fn test_push_back_overwrites_peek() {
508        let input = "line1\nline2";
509        let mut reader = LineReader::new(Cursor::new(input));
510
511        reader.peek_line().unwrap(); // Peek line1
512        reader.push_back(42, "pushed".to_string());
513
514        let line = reader.next_line().unwrap();
515        assert_eq!(line, Some((42, "pushed".to_string())));
516    }
517
518    // ==================== Iterator tests ====================
519
520    #[test]
521    fn test_iterator() {
522        let input = "line1\nline2\nline3";
523        let reader = LineReader::new(Cursor::new(input));
524
525        let lines: Vec<_> = reader.filter_map(std::result::Result::ok).collect();
526
527        assert_eq!(lines.len(), 3);
528        assert_eq!(lines[0], (1, "line1".to_string()));
529        assert_eq!(lines[1], (2, "line2".to_string()));
530        assert_eq!(lines[2], (3, "line3".to_string()));
531    }
532
533    #[test]
534    fn test_iterator_empty() {
535        let reader = LineReader::new(Cursor::new(""));
536        let lines: Vec<_> = reader.filter_map(std::result::Result::ok).collect();
537        assert!(lines.is_empty());
538    }
539
540    #[test]
541    fn test_iterator_single_line() {
542        let reader = LineReader::new(Cursor::new("single"));
543        let lines: Vec<_> = reader.filter_map(std::result::Result::ok).collect();
544        assert_eq!(lines, vec![(1, "single".to_string())]);
545    }
546
547    // ==================== With capacity tests ====================
548
549    #[test]
550    fn test_with_capacity() {
551        let input = "line1\nline2";
552        let mut reader = LineReader::with_capacity(Cursor::new(input), 1024);
553
554        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
555        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
556    }
557
558    #[test]
559    fn test_with_small_capacity() {
560        let input = "line1\nline2";
561        let mut reader = LineReader::with_capacity(Cursor::new(input), 1);
562
563        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
564        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
565    }
566
567    // ==================== Unicode tests ====================
568
569    #[test]
570    fn test_unicode_content() {
571        let input = "δ½ ε₯½\nδΈ–η•Œ\nπŸŽ‰";
572        let mut reader = LineReader::new(Cursor::new(input));
573
574        assert_eq!(reader.next_line().unwrap(), Some((1, "δ½ ε₯½".to_string())));
575        assert_eq!(reader.next_line().unwrap(), Some((2, "δΈ–η•Œ".to_string())));
576        assert_eq!(reader.next_line().unwrap(), Some((3, "πŸŽ‰".to_string())));
577    }
578
579    #[test]
580    fn test_unicode_line_with_emoji() {
581        let input = "Hello 🌍 World";
582        let mut reader = LineReader::new(Cursor::new(input));
583        assert_eq!(
584            reader.next_line().unwrap(),
585            Some((1, "Hello 🌍 World".to_string()))
586        );
587    }
588
589    // ==================== Whitespace tests ====================
590
591    #[test]
592    fn test_line_with_spaces() {
593        let input = "  indented  \n\ttabbed\t";
594        let mut reader = LineReader::new(Cursor::new(input));
595
596        assert_eq!(
597            reader.next_line().unwrap(),
598            Some((1, "  indented  ".to_string()))
599        );
600        assert_eq!(
601            reader.next_line().unwrap(),
602            Some((2, "\ttabbed\t".to_string()))
603        );
604    }
605
606    #[test]
607    fn test_only_whitespace_lines() {
608        let input = "   \n\t\t\n  \t  ";
609        let mut reader = LineReader::new(Cursor::new(input));
610
611        assert_eq!(reader.next_line().unwrap(), Some((1, "   ".to_string())));
612        assert_eq!(reader.next_line().unwrap(), Some((2, "\t\t".to_string())));
613        assert_eq!(reader.next_line().unwrap(), Some((3, "  \t  ".to_string())));
614    }
615
616    // ==================== Long line tests ====================
617
618    #[test]
619    fn test_long_line() {
620        let long_line = "a".repeat(10000);
621        let mut reader = LineReader::new(Cursor::new(long_line.clone()));
622        assert_eq!(reader.next_line().unwrap(), Some((1, long_line)));
623    }
624
625    #[test]
626    fn test_many_lines() {
627        let lines: Vec<String> = (0..1000).map(|i| format!("line{i}")).collect();
628        let input = lines.join("\n");
629        let mut reader = LineReader::new(Cursor::new(input));
630
631        for (i, expected) in lines.iter().enumerate() {
632            let result = reader.next_line().unwrap();
633            assert_eq!(result, Some((i + 1, expected.clone())));
634        }
635        assert_eq!(reader.next_line().unwrap(), None);
636    }
637
638    // ==================== Security: Line length enforcement tests ====================
639
640    #[test]
641    fn test_line_length_limit_enforced() {
642        let config_max = 100;
643
644        // Create a line with 101 characters (exceeds limit of 100)
645        let long_line = format!("data: {}\n", "A".repeat(95)); // "data: " (6 chars) + 95 A's + newline = 102 total
646        let input = Cursor::new(long_line.as_str());
647        let mut reader = LineReader::with_max_length(input, config_max);
648
649        let result = reader.next_line();
650        assert!(result.is_err());
651
652        let err = result.unwrap_err();
653        assert!(matches!(err, StreamError::LineTooLong { .. }));
654
655        if let StreamError::LineTooLong {
656            line,
657            length,
658            limit,
659        } = err
660        {
661            assert_eq!(line, 1);
662            assert!(length > 100);
663            assert_eq!(limit, 100);
664        }
665    }
666
667    #[test]
668    fn test_line_length_limit_exactly_at_boundary() {
669        let config_max = 100;
670        // Exactly 100 characters (should succeed)
671        let line = format!("data: {}\n", "A".repeat(93)); // "data: " + 93 A's + newline = 100 chars
672        let mut reader = LineReader::with_max_length(Cursor::new(line), config_max);
673
674        let result = reader.next_line();
675        assert!(result.is_ok());
676        let (line_num, content) = result.unwrap().unwrap();
677        assert_eq!(line_num, 1);
678        assert_eq!(content.len(), 99); // "data: " + 93 A's (without newline)
679    }
680
681    #[test]
682    fn test_line_length_limit_one_over_boundary() {
683        let config_max = 100;
684        // Exactly 101 characters WITHOUT newline (should fail)
685        // "data: " (6 chars) + 95 A's = 101 chars (no newline yet)
686        let line = format!("data: {}", "A".repeat(95));
687        let mut reader =
688            LineReader::with_capacity_and_max_length(Cursor::new(line), 64, config_max);
689
690        let result = reader.next_line();
691        assert!(result.is_err());
692        assert!(matches!(
693            result.unwrap_err(),
694            StreamError::LineTooLong { .. }
695        ));
696    }
697
698    #[test]
699    fn test_default_limit_allows_reasonable_lines() {
700        // 100KB line (well under 1MB default)
701        let line = format!("data: {}\n", "A".repeat(100_000));
702        let mut reader = LineReader::new(Cursor::new(line));
703
704        let result = reader.next_line();
705        assert!(result.is_ok());
706    }
707
708    #[test]
709    fn test_default_limit_rejects_huge_lines() {
710        // 2MB line (exceeds 1MB default)
711        let line = format!("data: {}\n", "A".repeat(2_000_000));
712        let mut reader = LineReader::new(Cursor::new(line));
713
714        let result = reader.next_line();
715        assert!(result.is_err());
716        assert!(matches!(
717            result.unwrap_err(),
718            StreamError::LineTooLong { .. }
719        ));
720    }
721
722    #[test]
723    fn test_multiple_long_lines() {
724        let config_max = 50;
725        let input = format!(
726            "version: 1.0\nid: 1\ndata: {}\nid: 2\ndata: {}\n",
727            "A".repeat(60), // First line OK, second line exceeds
728            "B".repeat(60)
729        );
730        let mut reader = LineReader::with_max_length(Cursor::new(input), config_max);
731
732        // Should successfully read lines that are within the limit
733        assert!(reader.next_line().is_ok()); // version
734        assert!(reader.next_line().is_ok()); // id: 1
735
736        // Should fail on first overly long line
737        let result = reader.next_line();
738        assert!(result.is_err());
739        if let Err(StreamError::LineTooLong { line, .. }) = result {
740            assert_eq!(line, 3); // Third line (the first long data line)
741        }
742    }
743
744    #[test]
745    fn test_line_without_newline_checked() {
746        let config_max = 100;
747        // No trailing newline, 101 chars
748        let input = format!("data: {}", "A".repeat(95));
749        let mut reader = LineReader::with_max_length(Cursor::new(input), config_max);
750
751        let result = reader.next_line();
752        assert!(result.is_err());
753        assert!(matches!(
754            result.unwrap_err(),
755            StreamError::LineTooLong { .. }
756        ));
757    }
758
759    #[test]
760    fn test_very_long_single_line_without_newline() {
761        let config_max = 1000;
762        // Create a line that's way over the limit without any newline
763        let input = "A".repeat(10_000);
764        let mut reader = LineReader::with_max_length(Cursor::new(input), config_max);
765
766        let result = reader.next_line();
767        assert!(result.is_err());
768
769        if let Err(StreamError::LineTooLong {
770            line,
771            length,
772            limit,
773        }) = result
774        {
775            assert_eq!(line, 1);
776            // The actual length detected will be at least the limit
777            // It could be detected at the buffer boundary (8192) or the full length
778            assert!(length >= config_max);
779            assert_eq!(limit, 1000);
780        }
781    }
782
783    #[test]
784    fn test_line_length_limit_with_crlf() {
785        let config_max = 50;
786        // Test with CRLF line endings
787        let line = format!("data: {}\r\n", "A".repeat(60)); // Exceeds limit
788        let mut reader = LineReader::with_max_length(Cursor::new(line), config_max);
789
790        let result = reader.next_line();
791        assert!(result.is_err());
792        assert!(matches!(
793            result.unwrap_err(),
794            StreamError::LineTooLong { .. }
795        ));
796    }
797
798    #[test]
799    fn test_line_length_across_buffer_boundaries() {
800        let config_max = 1000;
801        // Create a line that's larger than typical buffer sizes
802        let long_line = format!("data: {}\n", "A".repeat(2000));
803        let mut reader =
804            LineReader::with_capacity_and_max_length(Cursor::new(long_line), 64, config_max);
805
806        let result = reader.next_line();
807        assert!(result.is_err());
808        assert!(matches!(
809            result.unwrap_err(),
810            StreamError::LineTooLong { .. }
811        ));
812    }
813
814    #[test]
815    fn test_unlimited_config_allows_any_length() {
816        // Use usize::MAX to simulate unlimited config
817        let config_max = usize::MAX;
818        let line = format!("data: {}\n", "A".repeat(1_000_000));
819        let mut reader = LineReader::with_max_length(Cursor::new(line), config_max);
820
821        let result = reader.next_line();
822        assert!(result.is_ok());
823    }
824
825    #[test]
826    fn test_empty_line_respects_limit() {
827        let config_max = 10;
828        let input = "\n"; // Empty line should be fine
829        let mut reader = LineReader::with_max_length(Cursor::new(input), config_max);
830
831        let result = reader.next_line();
832        assert!(result.is_ok());
833        assert_eq!(result.unwrap(), Some((1, String::new())));
834    }
835
836    #[test]
837    fn test_zero_length_limit() {
838        let config_max = 0;
839        let input = "x\n";
840        let mut reader = LineReader::with_max_length(Cursor::new(input), config_max);
841
842        let result = reader.next_line();
843        assert!(result.is_err());
844        assert!(matches!(
845            result.unwrap_err(),
846            StreamError::LineTooLong { .. }
847        ));
848    }
849
850    #[test]
851    fn test_line_too_long_error_message() {
852        let config_max = 100;
853        let line = format!("data: {}\n", "A".repeat(200));
854        let mut reader = LineReader::with_max_length(Cursor::new(line), config_max);
855
856        let result = reader.next_line();
857        let err = result.unwrap_err();
858        let msg = format!("{err}");
859
860        assert!(msg.contains("exceeds maximum length"));
861        assert!(msg.contains("100"));
862    }
863
864    #[test]
865    fn test_mixed_valid_and_invalid_lines() {
866        let config_max = 50;
867        let input = format!(
868            "short\n{}\nanother short\n{}\n",
869            "A".repeat(100), // Too long
870            "B".repeat(200)  // Also too long
871        );
872        let mut reader = LineReader::with_max_length(Cursor::new(input.as_str()), config_max);
873
874        // First line OK
875        assert!(reader.next_line().is_ok());
876
877        // Second line should fail
878        let result = reader.next_line();
879        assert!(result.is_err());
880        if let Err(StreamError::LineTooLong { line, .. }) = result {
881            assert_eq!(line, 2);
882        }
883    }
884
885    #[test]
886    fn test_line_length_checked_before_memory_allocation() {
887        // This test verifies that we check the limit BEFORE allocating excessive memory.
888        // The key insight is that with chunk-based reading using fill_buf, we check
889        // the length as we read chunks, not after reading everything into memory.
890        let config_max = 100;
891
892        // Create a line that's way over the limit
893        let huge_line = "A".repeat(10_000_000); // 10MB
894        let input = format!("{huge_line}\n");
895
896        let mut reader =
897            LineReader::with_capacity_and_max_length(Cursor::new(input), 64, config_max);
898
899        // This should fail quickly without allocating 10MB
900        let result = reader.next_line();
901        assert!(result.is_err());
902
903        if let Err(StreamError::LineTooLong { length, limit, .. }) = result {
904            assert!(length > limit);
905            assert_eq!(limit, 100);
906        }
907    }
908
909    // ==================== Security: UTF-8 validation tests ====================
910
911    #[test]
912    fn test_invalid_utf8_rejected() {
913        let mut reader = LineReader::new(Cursor::new(vec![0xFF, 0xFE, 0xFD, 0x0A]));
914
915        let result = reader.next_line();
916        assert!(result.is_err());
917        assert!(matches!(
918            result.unwrap_err(),
919            StreamError::InvalidUtf8 { .. }
920        ));
921    }
922
923    #[test]
924    fn test_invalid_utf8_in_middle_of_line() {
925        let mut input = vec![];
926        input.extend_from_slice(b"valid start");
927        input.extend_from_slice(&[0xFF, 0xFE]); // Invalid UTF-8
928        input.extend_from_slice(b" end\n");
929
930        let mut reader = LineReader::new(Cursor::new(input));
931
932        let result = reader.next_line();
933        assert!(result.is_err());
934        assert!(matches!(
935            result.unwrap_err(),
936            StreamError::InvalidUtf8 { .. }
937        ));
938    }
939
940    #[test]
941    fn test_invalid_utf8_error_includes_line_number() {
942        let mut reader = LineReader::new(Cursor::new(vec![0xFF, 0xFE, 0x0A]));
943
944        let result = reader.next_line();
945        if let Err(StreamError::InvalidUtf8 { line, .. }) = result {
946            assert_eq!(line, 1);
947        } else {
948            panic!("Expected InvalidUtf8 error");
949        }
950    }
951
952    #[test]
953    fn test_valid_utf8_multibyte_characters() {
954        // Test with valid multibyte UTF-8
955        let input = "Hello δΈ–η•Œ πŸŽ‰\n";
956        let mut reader = LineReader::new(Cursor::new(input));
957
958        let result = reader.next_line();
959        assert!(result.is_ok());
960        assert_eq!(result.unwrap(), Some((1, "Hello δΈ–η•Œ πŸŽ‰".to_string())));
961    }
962
963    // ==================== With max_length constructors tests ====================
964
965    #[test]
966    fn test_with_max_length_constructor() {
967        let input = "test\n";
968        let reader = LineReader::with_max_length(Cursor::new(input), 500);
969        assert_eq!(reader.line_number(), 0);
970    }
971
972    #[test]
973    fn test_with_capacity_and_max_length_constructor() {
974        let input = "test\n";
975        let reader = LineReader::with_capacity_and_max_length(Cursor::new(input), 1024, 500);
976        assert_eq!(reader.line_number(), 0);
977    }
978}