Skip to main content

hedl_stream/
reader.rs

1// Dweve HEDL - Hierarchical Entity Data Language
2//
3// Copyright (c) 2025 Dweve IP B.V. and individual contributors.
4//
5// SPDX-License-Identifier: Apache-2.0
6//
7// Licensed under the Apache License, Version 2.0 (the "License");
8// you may not use this file except in compliance with the License.
9// You may obtain a copy of the License in the LICENSE file at the
10// root of this repository or at: http://www.apache.org/licenses/LICENSE-2.0
11//
12// Unless required by applicable law or agreed to in writing, software
13// distributed under the License is distributed on an "AS IS" BASIS,
14// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15// See the License for the specific language governing permissions and
16// limitations under the License.
17
18//! Line reader for streaming parser.
19//!
20//! Provides buffered line-by-line reading with line number tracking, peek support,
21//! and the ability to push back lines for re-parsing.
22//!
23//! This module is primarily an internal implementation detail of the streaming
24//! parser, but is exposed for advanced use cases.
25
26use crate::error::{StreamError, StreamResult};
27use memchr::memchr;
28use std::io::{BufRead, BufReader, Read};
29
30/// Buffered line reader with line number tracking.
31///
32/// Reads input line-by-line, automatically handling different line endings
33/// (LF, CRLF) and tracking the current line number for error reporting.
34///
35/// # Features
36///
37/// - **Buffered I/O**: Efficient reading with configurable buffer size
38/// - **Line Number Tracking**: Automatic tracking for error messages
39/// - **Peek Support**: Look ahead without consuming lines
40/// - **Push Back**: Re-read previously consumed lines
41/// - **Iterator**: Standard Rust iterator interface
42///
43/// # Examples
44///
45/// ## Basic Line Reading
46///
47/// ```rust
48/// use hedl_stream::LineReader;
49/// use std::io::Cursor;
50///
51/// let input = "line1\nline2\nline3";
52/// let mut reader = LineReader::new(Cursor::new(input));
53///
54/// assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
55/// assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
56/// assert_eq!(reader.next_line().unwrap(), Some((3, "line3".to_string())));
57/// assert_eq!(reader.next_line().unwrap(), None);
58/// ```
59///
60/// ## Peeking Ahead
61///
62/// ```rust
63/// use hedl_stream::LineReader;
64/// use std::io::Cursor;
65///
66/// let input = "line1\nline2";
67/// let mut reader = LineReader::new(Cursor::new(input));
68///
69/// // Peek without consuming
70/// assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
71/// assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
72///
73/// // Now consume it
74/// assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
75/// ```
76///
77/// ## Push Back for Re-parsing
78///
79/// ```rust
80/// use hedl_stream::LineReader;
81/// use std::io::Cursor;
82///
83/// let input = "line1\nline2";
84/// let mut reader = LineReader::new(Cursor::new(input));
85///
86/// let line = reader.next_line().unwrap().unwrap();
87/// assert_eq!(line, (1, "line1".to_string()));
88///
89/// // Push it back
90/// reader.push_back(line.0, line.1);
91///
92/// // Read it again
93/// assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
94/// ```
95pub struct LineReader<R: Read> {
96    reader: BufReader<R>,
97    line_number: usize,
98    buffer: String,
99    peeked: Option<(usize, String)>,
100    max_line_length: usize,
101}
102
103impl<R: Read> LineReader<R> {
104    /// Create a new line reader with default max line length (1MB).
105    pub fn new(reader: R) -> Self {
106        Self {
107            reader: BufReader::new(reader),
108            line_number: 0,
109            buffer: String::new(),
110            peeked: None,
111            max_line_length: 1_000_000,
112        }
113    }
114
115    /// Create with a specific buffer capacity and default max line length (1MB).
116    pub fn with_capacity(reader: R, capacity: usize) -> Self {
117        Self {
118            reader: BufReader::with_capacity(capacity, reader),
119            line_number: 0,
120            buffer: String::new(),
121            peeked: None,
122            max_line_length: 1_000_000,
123        }
124    }
125
126    /// Create with a specific max line length.
127    pub fn with_max_length(reader: R, max_line_length: usize) -> Self {
128        Self {
129            reader: BufReader::new(reader),
130            line_number: 0,
131            buffer: String::new(),
132            peeked: None,
133            max_line_length,
134        }
135    }
136
137    /// Create with a specific buffer capacity and max line length.
138    pub fn with_capacity_and_max_length(
139        reader: R,
140        capacity: usize,
141        max_line_length: usize,
142    ) -> Self {
143        Self {
144            reader: BufReader::with_capacity(capacity, reader),
145            line_number: 0,
146            buffer: String::new(),
147            peeked: None,
148            max_line_length,
149        }
150    }
151
152    /// Get the current line number.
153    #[inline]
154    pub fn line_number(&self) -> usize {
155        self.line_number
156    }
157
158    /// Read the next line.
159    pub fn next_line(&mut self) -> StreamResult<Option<(usize, String)>> {
160        // Return peeked line if available
161        if let Some(peeked) = self.peeked.take() {
162            return Ok(Some(peeked));
163        }
164
165        self.read_line_internal()
166    }
167
168    /// Peek at the next line without consuming it.
169    pub fn peek_line(&mut self) -> StreamResult<Option<&(usize, String)>> {
170        if self.peeked.is_none() {
171            self.peeked = self.read_line_internal()?;
172        }
173        Ok(self.peeked.as_ref())
174    }
175
176    /// Push a line back to be read again.
177    #[inline]
178    pub fn push_back(&mut self, line_num: usize, line: String) {
179        self.peeked = Some((line_num, line));
180    }
181
182    fn read_line_internal(&mut self) -> StreamResult<Option<(usize, String)>> {
183        self.buffer.clear();
184
185        loop {
186            // Read from BufReader's internal buffer (zero-copy)
187            let available = match self.reader.fill_buf() {
188                Ok(buf) => buf,
189                Err(e) => return Err(StreamError::Io(e)),
190            };
191
192            if available.is_empty() {
193                // EOF
194                if self.buffer.is_empty() {
195                    return Ok(None);
196                }
197                // Return partial line (no trailing newline)
198                self.line_number += 1;
199                return Ok(Some((self.line_number, self.buffer.clone())));
200            }
201
202            // Find newline in available data
203            if let Some(newline_pos) = memchr(b'\n', available) {
204                // Check limit BEFORE appending
205                if self.buffer.len() + newline_pos > self.max_line_length {
206                    // CRITICAL: Consume the oversized line data to prevent infinite loop
207                    // Consume up to and including the newline character
208                    self.reader.consume(newline_pos + 1);
209                    let total_length = self.buffer.len() + newline_pos;
210                    self.line_number += 1;
211                    self.buffer.clear();
212                    return Err(StreamError::LineTooLong {
213                        line: self.line_number,
214                        length: total_length,
215                        limit: self.max_line_length,
216                    });
217                }
218
219                // Append up to newline (excluding the newline itself)
220                let _line_data = &available[..newline_pos];
221                let mut line_end = newline_pos;
222
223                // Handle CRLF: if newline is preceded by CR, exclude it too
224                if newline_pos > 0 && available[newline_pos - 1] == b'\r' {
225                    line_end = newline_pos - 1;
226                }
227
228                let to_append = &available[..line_end];
229
230                // Validate UTF-8 before appending
231                let line_str =
232                    std::str::from_utf8(to_append).map_err(|e| StreamError::InvalidUtf8 {
233                        line: self.line_number + 1,
234                        error: e,
235                    })?;
236
237                self.buffer.push_str(line_str);
238
239                // Consume bytes including newline
240                self.reader.consume(newline_pos + 1);
241
242                self.line_number += 1;
243                return Ok(Some((self.line_number, self.buffer.clone())));
244            } else {
245                // No newline yet, check if adding entire buffer exceeds limit
246                if self.buffer.len() + available.len() > self.max_line_length {
247                    // CRITICAL: Consume all available data and skip to end of line
248                    // to prevent infinite loop on subsequent reads
249                    let accumulated = self.buffer.len() + available.len();
250                    let consumed = available.len();
251                    self.reader.consume(consumed);
252
253                    // Continue reading and discarding until we find the end of line
254                    self.skip_to_end_of_line()?;
255
256                    self.line_number += 1;
257                    self.buffer.clear();
258                    return Err(StreamError::LineTooLong {
259                        line: self.line_number,
260                        length: accumulated,
261                        limit: self.max_line_length,
262                    });
263                }
264
265                // Validate UTF-8 before appending
266                let chunk_str =
267                    std::str::from_utf8(available).map_err(|e| StreamError::InvalidUtf8 {
268                        line: self.line_number + 1,
269                        error: e,
270                    })?;
271
272                // Append entire buffer and continue reading
273                self.buffer.push_str(chunk_str);
274
275                let len = available.len();
276                self.reader.consume(len);
277            }
278        }
279    }
280
281    /// Skip to end of line when handling oversized line errors.
282    /// Consumes data until a newline is found or EOF is reached.
283    fn skip_to_end_of_line(&mut self) -> StreamResult<()> {
284        loop {
285            let available = match self.reader.fill_buf() {
286                Ok(buf) => buf,
287                Err(e) => return Err(StreamError::Io(e)),
288            };
289
290            if available.is_empty() {
291                // EOF reached, line is done
292                return Ok(());
293            }
294
295            if let Some(newline_pos) = memchr(b'\n', available) {
296                // Found newline, consume up to and including it
297                self.reader.consume(newline_pos + 1);
298                return Ok(());
299            } else {
300                // No newline, consume all and continue
301                let len = available.len();
302                self.reader.consume(len);
303            }
304        }
305    }
306}
307
308impl<R: Read> Iterator for LineReader<R> {
309    type Item = StreamResult<(usize, String)>;
310
311    fn next(&mut self) -> Option<Self::Item> {
312        match self.next_line() {
313            Ok(Some(line)) => Some(Ok(line)),
314            Ok(None) => None,
315            Err(e) => Some(Err(e)),
316        }
317    }
318}
319
320#[cfg(test)]
321mod tests {
322    use super::*;
323    use std::io::Cursor;
324
325    #[test]
326    fn test_read_lines() {
327        let input = "line1\nline2\nline3";
328        let mut reader = LineReader::new(Cursor::new(input));
329
330        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
331        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
332        assert_eq!(reader.next_line().unwrap(), Some((3, "line3".to_string())));
333        assert_eq!(reader.next_line().unwrap(), None);
334    }
335
336    #[test]
337    fn test_peek_and_push_back() {
338        let input = "line1\nline2";
339        let mut reader = LineReader::new(Cursor::new(input));
340
341        let peeked = reader.peek_line().unwrap().cloned();
342        assert_eq!(peeked, Some((1, "line1".to_string())));
343
344        // Should still return the same line
345        let line = reader.next_line().unwrap();
346        assert_eq!(line, Some((1, "line1".to_string())));
347
348        // Push back
349        reader.push_back(1, "line1".to_string());
350        let line = reader.next_line().unwrap();
351        assert_eq!(line, Some((1, "line1".to_string())));
352    }
353
354    // ==================== Empty input tests ====================
355
356    #[test]
357    fn test_empty_input() {
358        let input = "";
359        let mut reader = LineReader::new(Cursor::new(input));
360        assert_eq!(reader.next_line().unwrap(), None);
361    }
362
363    #[test]
364    fn test_single_empty_line() {
365        let input = "\n";
366        let mut reader = LineReader::new(Cursor::new(input));
367        assert_eq!(reader.next_line().unwrap(), Some((1, String::new())));
368        assert_eq!(reader.next_line().unwrap(), None);
369    }
370
371    #[test]
372    fn test_multiple_empty_lines() {
373        let input = "\n\n\n";
374        let mut reader = LineReader::new(Cursor::new(input));
375        assert_eq!(reader.next_line().unwrap(), Some((1, String::new())));
376        assert_eq!(reader.next_line().unwrap(), Some((2, String::new())));
377        assert_eq!(reader.next_line().unwrap(), Some((3, String::new())));
378        assert_eq!(reader.next_line().unwrap(), None);
379    }
380
381    // ==================== Line ending tests ====================
382
383    #[test]
384    fn test_crlf_line_endings() {
385        let input = "line1\r\nline2\r\nline3";
386        let mut reader = LineReader::new(Cursor::new(input));
387        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
388        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
389        assert_eq!(reader.next_line().unwrap(), Some((3, "line3".to_string())));
390    }
391
392    #[test]
393    fn test_mixed_line_endings() {
394        let input = "line1\nline2\r\nline3\nline4";
395        let mut reader = LineReader::new(Cursor::new(input));
396        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
397        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
398        assert_eq!(reader.next_line().unwrap(), Some((3, "line3".to_string())));
399        assert_eq!(reader.next_line().unwrap(), Some((4, "line4".to_string())));
400    }
401
402    #[test]
403    fn test_trailing_newline() {
404        let input = "line1\n";
405        let mut reader = LineReader::new(Cursor::new(input));
406        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
407        assert_eq!(reader.next_line().unwrap(), None);
408    }
409
410    #[test]
411    fn test_no_trailing_newline() {
412        let input = "line1";
413        let mut reader = LineReader::new(Cursor::new(input));
414        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
415        assert_eq!(reader.next_line().unwrap(), None);
416    }
417
418    // ==================== Line number tests ====================
419
420    #[test]
421    fn test_line_number_initial() {
422        let reader: LineReader<Cursor<&str>> = LineReader::new(Cursor::new("test"));
423        assert_eq!(reader.line_number(), 0);
424    }
425
426    #[test]
427    fn test_line_number_after_read() {
428        let input = "line1\nline2\nline3";
429        let mut reader = LineReader::new(Cursor::new(input));
430
431        reader.next_line().unwrap();
432        assert_eq!(reader.line_number(), 1);
433
434        reader.next_line().unwrap();
435        assert_eq!(reader.line_number(), 2);
436
437        reader.next_line().unwrap();
438        assert_eq!(reader.line_number(), 3);
439    }
440
441    #[test]
442    fn test_line_number_after_eof() {
443        let input = "line1";
444        let mut reader = LineReader::new(Cursor::new(input));
445
446        reader.next_line().unwrap();
447        assert_eq!(reader.line_number(), 1);
448
449        reader.next_line().unwrap(); // EOF
450        assert_eq!(reader.line_number(), 1); // Line number unchanged
451    }
452
453    // ==================== Peek tests ====================
454
455    #[test]
456    fn test_peek_empty_input() {
457        let mut reader = LineReader::new(Cursor::new(""));
458        assert_eq!(reader.peek_line().unwrap(), None);
459    }
460
461    #[test]
462    fn test_peek_multiple_times() {
463        let input = "line1\nline2";
464        let mut reader = LineReader::new(Cursor::new(input));
465
466        // Peek multiple times should return the same line
467        assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
468        assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
469        assert_eq!(reader.peek_line().unwrap(), Some(&(1, "line1".to_string())));
470
471        // Consume it
472        reader.next_line().unwrap();
473
474        // Next peek should be the second line
475        assert_eq!(reader.peek_line().unwrap(), Some(&(2, "line2".to_string())));
476    }
477
478    #[test]
479    fn test_peek_then_read() {
480        let input = "line1\nline2";
481        let mut reader = LineReader::new(Cursor::new(input));
482
483        reader.peek_line().unwrap();
484        let line = reader.next_line().unwrap();
485        assert_eq!(line, Some((1, "line1".to_string())));
486    }
487
488    // ==================== Push back tests ====================
489
490    #[test]
491    fn test_push_back_with_different_line_number() {
492        let input = "line1\nline2";
493        let mut reader = LineReader::new(Cursor::new(input));
494
495        reader.next_line().unwrap(); // line1
496        reader.push_back(99, "pushed".to_string());
497
498        let line = reader.next_line().unwrap();
499        assert_eq!(line, Some((99, "pushed".to_string())));
500    }
501
502    #[test]
503    fn test_push_back_overwrites_peek() {
504        let input = "line1\nline2";
505        let mut reader = LineReader::new(Cursor::new(input));
506
507        reader.peek_line().unwrap(); // Peek line1
508        reader.push_back(42, "pushed".to_string());
509
510        let line = reader.next_line().unwrap();
511        assert_eq!(line, Some((42, "pushed".to_string())));
512    }
513
514    // ==================== Iterator tests ====================
515
516    #[test]
517    fn test_iterator() {
518        let input = "line1\nline2\nline3";
519        let reader = LineReader::new(Cursor::new(input));
520
521        let lines: Vec<_> = reader.filter_map(std::result::Result::ok).collect();
522
523        assert_eq!(lines.len(), 3);
524        assert_eq!(lines[0], (1, "line1".to_string()));
525        assert_eq!(lines[1], (2, "line2".to_string()));
526        assert_eq!(lines[2], (3, "line3".to_string()));
527    }
528
529    #[test]
530    fn test_iterator_empty() {
531        let reader = LineReader::new(Cursor::new(""));
532        let lines: Vec<_> = reader.filter_map(std::result::Result::ok).collect();
533        assert!(lines.is_empty());
534    }
535
536    #[test]
537    fn test_iterator_single_line() {
538        let reader = LineReader::new(Cursor::new("single"));
539        let lines: Vec<_> = reader.filter_map(std::result::Result::ok).collect();
540        assert_eq!(lines, vec![(1, "single".to_string())]);
541    }
542
543    // ==================== With capacity tests ====================
544
545    #[test]
546    fn test_with_capacity() {
547        let input = "line1\nline2";
548        let mut reader = LineReader::with_capacity(Cursor::new(input), 1024);
549
550        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
551        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
552    }
553
554    #[test]
555    fn test_with_small_capacity() {
556        let input = "line1\nline2";
557        let mut reader = LineReader::with_capacity(Cursor::new(input), 1);
558
559        assert_eq!(reader.next_line().unwrap(), Some((1, "line1".to_string())));
560        assert_eq!(reader.next_line().unwrap(), Some((2, "line2".to_string())));
561    }
562
563    // ==================== Unicode tests ====================
564
565    #[test]
566    fn test_unicode_content() {
567        let input = "δ½ ε₯½\nδΈ–η•Œ\nπŸŽ‰";
568        let mut reader = LineReader::new(Cursor::new(input));
569
570        assert_eq!(reader.next_line().unwrap(), Some((1, "δ½ ε₯½".to_string())));
571        assert_eq!(reader.next_line().unwrap(), Some((2, "δΈ–η•Œ".to_string())));
572        assert_eq!(reader.next_line().unwrap(), Some((3, "πŸŽ‰".to_string())));
573    }
574
575    #[test]
576    fn test_unicode_line_with_emoji() {
577        let input = "Hello 🌍 World";
578        let mut reader = LineReader::new(Cursor::new(input));
579        assert_eq!(
580            reader.next_line().unwrap(),
581            Some((1, "Hello 🌍 World".to_string()))
582        );
583    }
584
585    // ==================== Whitespace tests ====================
586
587    #[test]
588    fn test_line_with_spaces() {
589        let input = "  indented  \n\ttabbed\t";
590        let mut reader = LineReader::new(Cursor::new(input));
591
592        assert_eq!(
593            reader.next_line().unwrap(),
594            Some((1, "  indented  ".to_string()))
595        );
596        assert_eq!(
597            reader.next_line().unwrap(),
598            Some((2, "\ttabbed\t".to_string()))
599        );
600    }
601
602    #[test]
603    fn test_only_whitespace_lines() {
604        let input = "   \n\t\t\n  \t  ";
605        let mut reader = LineReader::new(Cursor::new(input));
606
607        assert_eq!(reader.next_line().unwrap(), Some((1, "   ".to_string())));
608        assert_eq!(reader.next_line().unwrap(), Some((2, "\t\t".to_string())));
609        assert_eq!(reader.next_line().unwrap(), Some((3, "  \t  ".to_string())));
610    }
611
612    // ==================== Long line tests ====================
613
614    #[test]
615    fn test_long_line() {
616        let long_line = "a".repeat(10000);
617        let mut reader = LineReader::new(Cursor::new(long_line.clone()));
618        assert_eq!(reader.next_line().unwrap(), Some((1, long_line)));
619    }
620
621    #[test]
622    fn test_many_lines() {
623        let lines: Vec<String> = (0..1000).map(|i| format!("line{i}")).collect();
624        let input = lines.join("\n");
625        let mut reader = LineReader::new(Cursor::new(input));
626
627        for (i, expected) in lines.iter().enumerate() {
628            let result = reader.next_line().unwrap();
629            assert_eq!(result, Some((i + 1, expected.clone())));
630        }
631        assert_eq!(reader.next_line().unwrap(), None);
632    }
633
634    // ==================== Security: Line length enforcement tests ====================
635
636    #[test]
637    fn test_line_length_limit_enforced() {
638        let config_max = 100;
639
640        // Create a line with 101 characters (exceeds limit of 100)
641        let long_line = format!("data: {}\n", "A".repeat(95)); // "data: " (6 chars) + 95 A's + newline = 102 total
642        let input = Cursor::new(long_line.as_str());
643        let mut reader = LineReader::with_max_length(input, config_max);
644
645        let result = reader.next_line();
646        assert!(result.is_err());
647
648        let err = result.unwrap_err();
649        assert!(matches!(err, StreamError::LineTooLong { .. }));
650
651        if let StreamError::LineTooLong {
652            line,
653            length,
654            limit,
655        } = err
656        {
657            assert_eq!(line, 1);
658            assert!(length > 100);
659            assert_eq!(limit, 100);
660        }
661    }
662
663    #[test]
664    fn test_line_length_limit_exactly_at_boundary() {
665        let config_max = 100;
666        // Exactly 100 characters (should succeed)
667        let line = format!("data: {}\n", "A".repeat(93)); // "data: " + 93 A's + newline = 100 chars
668        let mut reader = LineReader::with_max_length(Cursor::new(line), config_max);
669
670        let result = reader.next_line();
671        assert!(result.is_ok());
672        let (line_num, content) = result.unwrap().unwrap();
673        assert_eq!(line_num, 1);
674        assert_eq!(content.len(), 99); // "data: " + 93 A's (without newline)
675    }
676
677    #[test]
678    fn test_line_length_limit_one_over_boundary() {
679        let config_max = 100;
680        // Exactly 101 characters WITHOUT newline (should fail)
681        // "data: " (6 chars) + 95 A's = 101 chars (no newline yet)
682        let line = format!("data: {}", "A".repeat(95));
683        let mut reader =
684            LineReader::with_capacity_and_max_length(Cursor::new(line), 64, config_max);
685
686        let result = reader.next_line();
687        assert!(result.is_err());
688        assert!(matches!(
689            result.unwrap_err(),
690            StreamError::LineTooLong { .. }
691        ));
692    }
693
694    #[test]
695    fn test_default_limit_allows_reasonable_lines() {
696        // 100KB line (well under 1MB default)
697        let line = format!("data: {}\n", "A".repeat(100_000));
698        let mut reader = LineReader::new(Cursor::new(line));
699
700        let result = reader.next_line();
701        assert!(result.is_ok());
702    }
703
704    #[test]
705    fn test_default_limit_rejects_huge_lines() {
706        // 2MB line (exceeds 1MB default)
707        let line = format!("data: {}\n", "A".repeat(2_000_000));
708        let mut reader = LineReader::new(Cursor::new(line));
709
710        let result = reader.next_line();
711        assert!(result.is_err());
712        assert!(matches!(
713            result.unwrap_err(),
714            StreamError::LineTooLong { .. }
715        ));
716    }
717
718    #[test]
719    fn test_multiple_long_lines() {
720        let config_max = 50;
721        let input = format!(
722            "version: 1.0\nid: 1\ndata: {}\nid: 2\ndata: {}\n",
723            "A".repeat(60), // First line OK, second line exceeds
724            "B".repeat(60)
725        );
726        let mut reader = LineReader::with_max_length(Cursor::new(input), config_max);
727
728        // Should successfully read lines that are within the limit
729        assert!(reader.next_line().is_ok()); // version
730        assert!(reader.next_line().is_ok()); // id: 1
731
732        // Should fail on first overly long line
733        let result = reader.next_line();
734        assert!(result.is_err());
735        if let Err(StreamError::LineTooLong { line, .. }) = result {
736            assert_eq!(line, 3); // Third line (the first long data line)
737        }
738    }
739
740    #[test]
741    fn test_line_without_newline_checked() {
742        let config_max = 100;
743        // No trailing newline, 101 chars
744        let input = format!("data: {}", "A".repeat(95));
745        let mut reader = LineReader::with_max_length(Cursor::new(input), config_max);
746
747        let result = reader.next_line();
748        assert!(result.is_err());
749        assert!(matches!(
750            result.unwrap_err(),
751            StreamError::LineTooLong { .. }
752        ));
753    }
754
755    #[test]
756    fn test_very_long_single_line_without_newline() {
757        let config_max = 1000;
758        // Create a line that's way over the limit without any newline
759        let input = "A".repeat(10_000);
760        let mut reader = LineReader::with_max_length(Cursor::new(input), config_max);
761
762        let result = reader.next_line();
763        assert!(result.is_err());
764
765        if let Err(StreamError::LineTooLong {
766            line,
767            length,
768            limit,
769        }) = result
770        {
771            assert_eq!(line, 1);
772            // The actual length detected will be at least the limit
773            // It could be detected at the buffer boundary (8192) or the full length
774            assert!(length >= config_max);
775            assert_eq!(limit, 1000);
776        }
777    }
778
779    #[test]
780    fn test_line_length_limit_with_crlf() {
781        let config_max = 50;
782        // Test with CRLF line endings
783        let line = format!("data: {}\r\n", "A".repeat(60)); // Exceeds limit
784        let mut reader = LineReader::with_max_length(Cursor::new(line), config_max);
785
786        let result = reader.next_line();
787        assert!(result.is_err());
788        assert!(matches!(
789            result.unwrap_err(),
790            StreamError::LineTooLong { .. }
791        ));
792    }
793
794    #[test]
795    fn test_line_length_across_buffer_boundaries() {
796        let config_max = 1000;
797        // Create a line that's larger than typical buffer sizes
798        let long_line = format!("data: {}\n", "A".repeat(2000));
799        let mut reader =
800            LineReader::with_capacity_and_max_length(Cursor::new(long_line), 64, config_max);
801
802        let result = reader.next_line();
803        assert!(result.is_err());
804        assert!(matches!(
805            result.unwrap_err(),
806            StreamError::LineTooLong { .. }
807        ));
808    }
809
810    #[test]
811    fn test_unlimited_config_allows_any_length() {
812        // Use usize::MAX to simulate unlimited config
813        let config_max = usize::MAX;
814        let line = format!("data: {}\n", "A".repeat(1_000_000));
815        let mut reader = LineReader::with_max_length(Cursor::new(line), config_max);
816
817        let result = reader.next_line();
818        assert!(result.is_ok());
819    }
820
821    #[test]
822    fn test_empty_line_respects_limit() {
823        let config_max = 10;
824        let input = "\n"; // Empty line should be fine
825        let mut reader = LineReader::with_max_length(Cursor::new(input), config_max);
826
827        let result = reader.next_line();
828        assert!(result.is_ok());
829        assert_eq!(result.unwrap(), Some((1, String::new())));
830    }
831
832    #[test]
833    fn test_zero_length_limit() {
834        let config_max = 0;
835        let input = "x\n";
836        let mut reader = LineReader::with_max_length(Cursor::new(input), config_max);
837
838        let result = reader.next_line();
839        assert!(result.is_err());
840        assert!(matches!(
841            result.unwrap_err(),
842            StreamError::LineTooLong { .. }
843        ));
844    }
845
846    #[test]
847    fn test_line_too_long_error_message() {
848        let config_max = 100;
849        let line = format!("data: {}\n", "A".repeat(200));
850        let mut reader = LineReader::with_max_length(Cursor::new(line), config_max);
851
852        let result = reader.next_line();
853        let err = result.unwrap_err();
854        let msg = format!("{err}");
855
856        assert!(msg.contains("exceeds maximum length"));
857        assert!(msg.contains("100"));
858    }
859
860    #[test]
861    fn test_mixed_valid_and_invalid_lines() {
862        let config_max = 50;
863        let input = format!(
864            "short\n{}\nanother short\n{}\n",
865            "A".repeat(100), // Too long
866            "B".repeat(200)  // Also too long
867        );
868        let mut reader = LineReader::with_max_length(Cursor::new(input.as_str()), config_max);
869
870        // First line OK
871        assert!(reader.next_line().is_ok());
872
873        // Second line should fail
874        let result = reader.next_line();
875        assert!(result.is_err());
876        if let Err(StreamError::LineTooLong { line, .. }) = result {
877            assert_eq!(line, 2);
878        }
879    }
880
881    #[test]
882    fn test_line_length_checked_before_memory_allocation() {
883        // This test verifies that we check the limit BEFORE allocating excessive memory.
884        // The key insight is that with chunk-based reading using fill_buf, we check
885        // the length as we read chunks, not after reading everything into memory.
886        let config_max = 100;
887
888        // Create a line that's way over the limit
889        let huge_line = "A".repeat(10_000_000); // 10MB
890        let input = format!("{huge_line}\n");
891
892        let mut reader =
893            LineReader::with_capacity_and_max_length(Cursor::new(input), 64, config_max);
894
895        // This should fail quickly without allocating 10MB
896        let result = reader.next_line();
897        assert!(result.is_err());
898
899        if let Err(StreamError::LineTooLong { length, limit, .. }) = result {
900            assert!(length > limit);
901            assert_eq!(limit, 100);
902        }
903    }
904
905    // ==================== Security: UTF-8 validation tests ====================
906
907    #[test]
908    fn test_invalid_utf8_rejected() {
909        let mut reader = LineReader::new(Cursor::new(vec![0xFF, 0xFE, 0xFD, 0x0A]));
910
911        let result = reader.next_line();
912        assert!(result.is_err());
913        assert!(matches!(
914            result.unwrap_err(),
915            StreamError::InvalidUtf8 { .. }
916        ));
917    }
918
919    #[test]
920    fn test_invalid_utf8_in_middle_of_line() {
921        let mut input = vec![];
922        input.extend_from_slice(b"valid start");
923        input.extend_from_slice(&[0xFF, 0xFE]); // Invalid UTF-8
924        input.extend_from_slice(b" end\n");
925
926        let mut reader = LineReader::new(Cursor::new(input));
927
928        let result = reader.next_line();
929        assert!(result.is_err());
930        assert!(matches!(
931            result.unwrap_err(),
932            StreamError::InvalidUtf8 { .. }
933        ));
934    }
935
936    #[test]
937    fn test_invalid_utf8_error_includes_line_number() {
938        let mut reader = LineReader::new(Cursor::new(vec![0xFF, 0xFE, 0x0A]));
939
940        let result = reader.next_line();
941        if let Err(StreamError::InvalidUtf8 { line, .. }) = result {
942            assert_eq!(line, 1);
943        } else {
944            panic!("Expected InvalidUtf8 error");
945        }
946    }
947
948    #[test]
949    fn test_valid_utf8_multibyte_characters() {
950        // Test with valid multibyte UTF-8
951        let input = "Hello δΈ–η•Œ πŸŽ‰\n";
952        let mut reader = LineReader::new(Cursor::new(input));
953
954        let result = reader.next_line();
955        assert!(result.is_ok());
956        assert_eq!(result.unwrap(), Some((1, "Hello δΈ–η•Œ πŸŽ‰".to_string())));
957    }
958
959    // ==================== With max_length constructors tests ====================
960
961    #[test]
962    fn test_with_max_length_constructor() {
963        let input = "test\n";
964        let reader = LineReader::with_max_length(Cursor::new(input), 500);
965        assert_eq!(reader.line_number(), 0);
966    }
967
968    #[test]
969    fn test_with_capacity_and_max_length_constructor() {
970        let input = "test\n";
971        let reader = LineReader::with_capacity_and_max_length(Cursor::new(input), 1024, 500);
972        assert_eq!(reader.line_number(), 0);
973    }
974}