copybook_codec/
iterator.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2//! Record iterator for streaming access to decoded records
3//!
4//! This module provides iterator-based access to records for programmatic processing,
5//! allowing users to process records one at a time without loading entire files into memory.
6//!
7//! # Overview
8//!
9//! The iterator module implements streaming record processing with bounded memory usage.
10//! It provides low-level iterator primitives for reading COBOL data files sequentially,
11//! supporting both fixed-length and RDW (Record Descriptor Word) variable-length formats.
12//!
13//! Key capabilities:
14//!
15//! 1. **Streaming iteration** ([`RecordIterator`]) - Process records one at a time
16//! 2. **Format flexibility** - Handle both fixed-length and RDW variable-length records
17//! 3. **Raw access** ([`RecordIterator::read_raw_record`]) - Access undecoded record bytes
18//! 4. **Convenience functions** ([`iter_records_from_file`], [`iter_records`]) - Simplified creation
19//!
20//! # Performance Characteristics
21//!
22//! The iterator uses buffered I/O and maintains bounded memory usage:
23//! - **Memory**: One record buffer (typically <32 KiB per record)
24//! - **Throughput**: Depends on decode complexity (DISPLAY vs COMP-3)
25//! - **Latency**: Sequential I/O optimized with `BufReader`
26//!
27//! For high-throughput parallel processing, consider using [`crate::decode_file_to_jsonl`]
28//! which provides parallel worker pools and streaming output.
29//!
30//! # Examples
31//!
32//! ## Basic Fixed-Length Record Iteration
33//!
34//! ```rust
35//! use copybook_codec::{iter_records_from_file, DecodeOptions, Codepage, RecordFormat};
36//! use copybook_core::parse_copybook;
37//!
38//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
39//! // Parse copybook schema
40//! let copybook_text = r#"
41//!     01 CUSTOMER-RECORD.
42//!        05 CUSTOMER-ID    PIC 9(5).
43//!        05 CUSTOMER-NAME  PIC X(20).
44//!        05 BALANCE        PIC S9(7)V99 COMP-3.
45//! "#;
46//! let schema = parse_copybook(copybook_text)?;
47//!
48//! // Configure decoding options
49//! let options = DecodeOptions::new()
50//!     .with_codepage(Codepage::CP037)
51//!     .with_format(RecordFormat::Fixed);
52//!
53//! // Create iterator from file
54//! # #[cfg(not(test))]
55//! let iterator = iter_records_from_file("customers.bin", &schema, &options)?;
56//!
57//! // Process records one at a time
58//! # #[cfg(not(test))]
59//! for (index, result) in iterator.enumerate() {
60//!     match result {
61//!         Ok(json_value) => {
62//!             println!("Record {}: {}", index + 1, json_value);
63//!         }
64//!         Err(error) => {
65//!             eprintln!("Error in record {}: {}", index + 1, error);
66//!             break; // Stop on first error
67//!         }
68//!     }
69//! }
70//! # Ok(())
71//! # }
72//! ```
73//!
74//! ## RDW Variable-Length Records
75//!
76//! ```rust
77//! use copybook_codec::{RecordIterator, DecodeOptions, RecordFormat};
78//! use copybook_core::parse_copybook;
79//! use std::fs::File;
80//!
81//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
82//! let copybook_text = r#"
83//!     01 TRANSACTION.
84//!        05 TRAN-ID       PIC 9(10).
85//!        05 TRAN-AMOUNT   PIC S9(9)V99 COMP-3.
86//!        05 TRAN-DESC     PIC X(100).
87//! "#;
88//! let schema = parse_copybook(copybook_text)?;
89//!
90//! let options = DecodeOptions::new()
91//!     .with_format(RecordFormat::RDW);  // RDW variable-length format
92//!
93//! # #[cfg(not(test))]
94//! let file = File::open("transactions.dat")?;
95//! # #[cfg(test)]
96//! # let file = std::io::Cursor::new(vec![]);
97//! let mut iterator = RecordIterator::new(file, &schema, &options)?;
98//!
99//! // Process with error recovery
100//! let mut processed = 0;
101//! let mut errors = 0;
102//!
103//! for (index, result) in iterator.enumerate() {
104//!     match result {
105//!         Ok(json_value) => {
106//!             processed += 1;
107//!             // Process record...
108//!         }
109//!         Err(error) => {
110//!             errors += 1;
111//!             eprintln!("Record {}: {}", index + 1, error);
112//!
113//!             if errors > 10 {
114//!                 eprintln!("Too many errors, stopping");
115//!                 break;
116//!             }
117//!         }
118//!     }
119//! }
120//!
121//! println!("Processed: {}, Errors: {}", processed, errors);
122//! # Ok(())
123//! # }
124//! ```
125//!
126//! ## Raw Record Access (No Decoding)
127//!
128//! ```rust
129//! use copybook_codec::{RecordIterator, DecodeOptions, RecordFormat};
130//! use copybook_core::parse_copybook;
131//! use std::io::Cursor;
132//!
133//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
134//! let copybook_text = "01 RECORD.\n   05 DATA PIC X(10).";
135//! let schema = parse_copybook(copybook_text)?;
136//!
137//! let options = DecodeOptions::new()
138//!     .with_format(RecordFormat::Fixed);
139//!
140//! let data = b"RECORD0001RECORD0002";
141//! let mut iterator = RecordIterator::new(Cursor::new(data), &schema, &options)?;
142//!
143//! // Read raw bytes without JSON decoding
144//! while let Some(raw_bytes) = iterator.read_raw_record()? {
145//!     println!("Raw record {}: {} bytes",
146//!              iterator.current_record_index(),
147//!              raw_bytes.len());
148//!
149//!     // Process raw bytes directly...
150//!     // (useful for binary analysis, checksums, etc.)
151//! }
152//! # Ok(())
153//! # }
154//! ```
155//!
156//! ## Collecting Records into a Vec
157//!
158//! ```rust
159//! use copybook_codec::{iter_records, DecodeOptions};
160//! use copybook_core::parse_copybook;
161//! use serde_json::Value;
162//! use std::io::Cursor;
163//!
164//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
165//! let copybook_text = "01 RECORD.\n   05 ID PIC 9(5).";
166//! let schema = parse_copybook(copybook_text)?;
167//! let options = DecodeOptions::default();
168//!
169//! let data = b"0000100002";
170//! let iterator = iter_records(Cursor::new(data), &schema, &options)?;
171//!
172//! // Collect all successful records
173//! let records: Vec<Value> = iterator
174//!     .filter_map(Result::ok)  // Skip errors
175//!     .collect();
176//!
177//! println!("Collected {} records", records.len());
178//! # Ok(())
179//! # }
180//! ```
181//!
182//! ## Using with `DecodeOptions` and Metadata
183//!
184//! ```rust
185//! use copybook_codec::{iter_records_from_file, DecodeOptions, Codepage, JsonNumberMode};
186//! use copybook_core::parse_copybook;
187//!
188//! # fn example() -> Result<(), Box<dyn std::error::Error>> {
189//! let copybook_text = r#"
190//!     01 RECORD.
191//!        05 AMOUNT PIC S9(9)V99 COMP-3.
192//! "#;
193//! let schema = parse_copybook(copybook_text)?;
194//!
195//! // Configure with lossless numbers and metadata
196//! let options = DecodeOptions::new()
197//!     .with_codepage(Codepage::CP037)
198//!     .with_json_number_mode(JsonNumberMode::Lossless)
199//!     .with_emit_meta(true);  // Include field metadata
200//!
201//! # #[cfg(not(test))]
202//! let iterator = iter_records_from_file("data.bin", &schema, &options)?;
203//!
204//! # #[cfg(not(test))]
205//! for result in iterator {
206//!     let json_value = result?;
207//!     // JSON includes metadata: {"AMOUNT": "123.45", "_meta": {...}}
208//!     println!("{}", serde_json::to_string_pretty(&json_value)?);
209//! }
210//! # Ok(())
211//! # }
212//! ```
213
214use crate::options::{DecodeOptions, RecordFormat};
215use copybook_core::{Error, ErrorCode, Result, Schema};
216use copybook_rdw::RdwHeader;
217use serde_json::Value;
218use std::io::{BufReader, Read};
219
220const FIXED_FORMAT_LRECL_MISSING: &str = "Fixed format requires a fixed record length (LRECL). \
221     Set `schema.lrecl_fixed` or use `RecordFormat::Variable`.";
222
223/// Iterator over records in a data file, yielding decoded JSON values
224///
225/// This iterator provides streaming access to records, processing them one at a time
226/// to maintain bounded memory usage even for very large files.
227///
228/// # Examples
229///
230/// ```rust,no_run
231/// use copybook_codec::{RecordIterator, DecodeOptions};
232/// use copybook_core::parse_copybook;
233/// # use std::io::Cursor;
234///
235/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
236/// let copybook_text = "01 RECORD.\n   05 ID PIC 9(5).\n   05 NAME PIC X(20).";
237/// let mut schema = parse_copybook(copybook_text)?;
238/// schema.lrecl_fixed = Some(25);
239/// let options = DecodeOptions::default();
240/// # let record_bytes = b"00001ALICE               ";
241/// # let file = Cursor::new(&record_bytes[..]);
242/// // let file = std::fs::File::open("data.bin")?;
243///
244/// let mut iterator = RecordIterator::new(file, &schema, &options)?;
245///
246/// for (record_index, result) in iterator.enumerate() {
247///     match result {
248///         Ok(json_value) => {
249///             println!("Record {}: {}", record_index + 1, json_value);
250///         }
251///         Err(error) => {
252///             eprintln!("Error in record {}: {}", record_index + 1, error);
253///         }
254///     }
255/// }
256/// # Ok(())
257/// # }
258/// ```
259pub struct RecordIterator<R: Read> {
260    /// The buffered reader
261    reader: BufReader<R>,
262    /// The schema for decoding records
263    schema: Schema,
264    /// Decoding options
265    options: DecodeOptions,
266    /// Current record index (1-based)
267    record_index: u64,
268    /// Whether the iterator has reached EOF
269    eof_reached: bool,
270    /// Buffer for reading record data
271    buffer: Vec<u8>,
272}
273
274impl<R: Read> RecordIterator<R> {
275    /// Create a new record iterator
276    ///
277    /// # Arguments
278    ///
279    /// * `reader` - The input stream to read from
280    /// * `schema` - The parsed copybook schema
281    /// * `options` - Decoding options
282    ///
283    /// # Errors
284    /// Returns an error if the record format is incompatible with the schema.
285    #[inline]
286    #[must_use = "Handle the Result or propagate the error"]
287    pub fn new(reader: R, schema: &Schema, options: &DecodeOptions) -> Result<Self> {
288        Ok(Self {
289            reader: BufReader::new(reader),
290            schema: schema.clone(),
291            options: options.clone(),
292            record_index: 0,
293            eof_reached: false,
294            buffer: Vec::new(),
295        })
296    }
297
298    /// Get the current record index (1-based)
299    ///
300    /// This returns the index of the last record that was successfully read,
301    /// or 0 if no records have been read yet.
302    #[inline]
303    #[must_use]
304    pub fn current_record_index(&self) -> u64 {
305        self.record_index
306    }
307
308    /// Check if the iterator has reached the end of the file
309    #[inline]
310    #[must_use]
311    pub fn is_eof(&self) -> bool {
312        self.eof_reached
313    }
314
315    /// Get a reference to the schema being used
316    #[inline]
317    #[must_use]
318    pub fn schema(&self) -> &Schema {
319        &self.schema
320    }
321
322    /// Get a reference to the decode options being used
323    #[inline]
324    #[must_use]
325    pub fn options(&self) -> &DecodeOptions {
326        &self.options
327    }
328
329    /// Read the next record without decoding it
330    ///
331    /// This method reads the raw bytes of the next record without performing
332    /// JSON decoding. Useful for applications that need access to raw record data
333    /// for binary analysis, checksums, or custom processing.
334    ///
335    /// # Returns
336    ///
337    /// * `Ok(Some(bytes))` - The raw record bytes
338    /// * `Ok(None)` - End of file reached
339    /// * `Err(error)` - An error occurred while reading
340    ///
341    /// # Errors
342    /// Returns an error if underlying I/O operations fail or the record format is invalid.
343    ///
344    /// # Examples
345    ///
346    /// ```rust
347    /// use copybook_codec::{RecordIterator, DecodeOptions, RecordFormat};
348    /// use copybook_core::parse_copybook;
349    /// use std::io::Cursor;
350    ///
351    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
352    /// let copybook_text = "01 RECORD.\n   05 DATA PIC X(8).";
353    /// let schema = parse_copybook(copybook_text)?;
354    ///
355    /// let options = DecodeOptions::new()
356    ///     .with_format(RecordFormat::Fixed);
357    ///
358    /// let data = b"RECORD01RECORD02";
359    /// let mut iterator = RecordIterator::new(Cursor::new(data), &schema, &options)?;
360    ///
361    /// // Read raw bytes
362    /// if let Some(raw_bytes) = iterator.read_raw_record()? {
363    ///     assert_eq!(raw_bytes, b"RECORD01");
364    ///     assert_eq!(iterator.current_record_index(), 1);
365    /// }
366    ///
367    /// if let Some(raw_bytes) = iterator.read_raw_record()? {
368    ///     assert_eq!(raw_bytes, b"RECORD02");
369    ///     assert_eq!(iterator.current_record_index(), 2);
370    /// }
371    ///
372    /// // End of file
373    /// assert!(iterator.read_raw_record()?.is_none());
374    /// assert!(iterator.is_eof());
375    /// # Ok(())
376    /// # }
377    /// ```
378    #[inline]
379    #[must_use = "Handle the Result or propagate the error"]
380    pub fn read_raw_record(&mut self) -> Result<Option<Vec<u8>>> {
381        if self.eof_reached {
382            return Ok(None);
383        }
384
385        self.buffer.clear();
386
387        let record_data = match self.options.format {
388            RecordFormat::Fixed => {
389                let lrecl = self.schema.lrecl_fixed.ok_or_else(|| {
390                    Error::new(ErrorCode::CBKI001_INVALID_STATE, FIXED_FORMAT_LRECL_MISSING)
391                })? as usize;
392                self.buffer.resize(lrecl, 0);
393
394                match self.reader.read_exact(&mut self.buffer) {
395                    Ok(()) => {
396                        self.record_index += 1;
397                        Some(self.buffer.clone())
398                    }
399                    Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
400                        self.eof_reached = true;
401                        return Ok(None);
402                    }
403                    Err(e) => {
404                        return Err(Error::new(
405                            ErrorCode::CBKD301_RECORD_TOO_SHORT,
406                            format!("Failed to read fixed record: {e}"),
407                        ));
408                    }
409                }
410            }
411            RecordFormat::RDW => {
412                // Read RDW header
413                let mut rdw_header = [0u8; 4];
414                match self.reader.read_exact(&mut rdw_header) {
415                    Ok(()) => {}
416                    Err(e) if e.kind() == std::io::ErrorKind::UnexpectedEof => {
417                        self.eof_reached = true;
418                        return Ok(None);
419                    }
420                    Err(e) => {
421                        return Err(Error::new(
422                            ErrorCode::CBKF221_RDW_UNDERFLOW,
423                            format!("Failed to read RDW header: {e}"),
424                        ));
425                    }
426                }
427
428                // Parse length (payload bytes only)
429                let length = usize::from(RdwHeader::from_bytes(rdw_header).length());
430
431                // Read payload
432                self.buffer.resize(length, 0);
433                match self.reader.read_exact(&mut self.buffer) {
434                    Ok(()) => {
435                        self.record_index += 1;
436                        Some(self.buffer.clone())
437                    }
438                    Err(e) => {
439                        return Err(Error::new(
440                            ErrorCode::CBKF221_RDW_UNDERFLOW,
441                            format!("Failed to read RDW payload: {e}"),
442                        ));
443                    }
444                }
445            }
446        };
447
448        Ok(record_data)
449    }
450
451    /// Decode the next record to JSON
452    ///
453    /// This is the main method used by the Iterator implementation.
454    /// It reads and decodes the next record in one operation.
455    #[inline]
456    fn decode_next_record(&mut self) -> Result<Option<Value>> {
457        match self.read_raw_record()? {
458            Some(record_bytes) => {
459                let json_value = crate::decode_record(&self.schema, &record_bytes, &self.options)?;
460                Ok(Some(json_value))
461            }
462            None => Ok(None),
463        }
464    }
465}
466
467impl<R: Read> Iterator for RecordIterator<R> {
468    type Item = Result<Value>;
469
470    #[inline]
471    fn next(&mut self) -> Option<Self::Item> {
472        if self.eof_reached {
473            return None;
474        }
475
476        match self.decode_next_record() {
477            Ok(Some(value)) => Some(Ok(value)),
478            Ok(None) => {
479                self.eof_reached = true;
480                None
481            }
482            Err(error) => {
483                // On error, we still advance the record index if we were able to read something
484                Some(Err(error))
485            }
486        }
487    }
488}
489
490/// Convenience function to create a record iterator from a file path
491///
492/// This is the most common way to create an iterator for processing COBOL data files.
493/// It handles file opening and iterator creation in a single call.
494///
495/// # Arguments
496///
497/// * `file_path` - Path to the data file
498/// * `schema` - The parsed copybook schema
499/// * `options` - Decoding options
500///
501/// # Errors
502/// Returns an error if the file cannot be opened or the iterator cannot be created.
503///
504/// # Examples
505///
506/// ## Basic Usage with Fixed-Length Records
507///
508/// ```rust,no_run
509/// use copybook_codec::{iter_records_from_file, DecodeOptions, Codepage, RecordFormat};
510/// use copybook_core::parse_copybook;
511///
512/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
513/// let copybook_text = r#"
514///     01 EMPLOYEE-RECORD.
515///        05 EMP-ID        PIC 9(6).
516///        05 EMP-NAME      PIC X(30).
517///        05 EMP-SALARY    PIC S9(7)V99 COMP-3.
518/// "#;
519/// let schema = parse_copybook(copybook_text)?;
520///
521/// let options = DecodeOptions::new()
522///     .with_codepage(Codepage::CP037)
523///     .with_format(RecordFormat::Fixed);
524///
525/// let iterator = iter_records_from_file("employees.dat", &schema, &options)?;
526///
527/// for (index, result) in iterator.enumerate() {
528///     match result {
529///         Ok(employee) => println!("Employee {}: {}", index + 1, employee),
530///         Err(e) => eprintln!("Error at record {}: {}", index + 1, e),
531///     }
532/// }
533/// # Ok(())
534/// # }
535/// ```
536///
537/// ## Processing with Error Limits
538///
539/// ```rust,no_run
540/// use copybook_codec::{iter_records_from_file, DecodeOptions};
541/// use copybook_core::parse_copybook;
542///
543/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
544/// # let schema = parse_copybook("01 R.\n   05 F PIC X(1).")?;
545/// # let options = DecodeOptions::default();
546/// let iterator = iter_records_from_file("data.bin", &schema, &options)?;
547///
548/// let mut success_count = 0;
549/// let mut error_count = 0;
550/// const MAX_ERRORS: usize = 100;
551///
552/// for result in iterator {
553///     match result {
554///         Ok(_) => success_count += 1,
555///         Err(e) => {
556///             error_count += 1;
557///             eprintln!("Error: {}", e);
558///
559///             if error_count >= MAX_ERRORS {
560///                 eprintln!("Too many errors, aborting");
561///                 break;
562///             }
563///         }
564///     }
565/// }
566///
567/// println!("Success: {}, Errors: {}", success_count, error_count);
568/// # Ok(())
569/// # }
570/// ```
571#[inline]
572#[must_use = "Handle the Result or propagate the error"]
573pub fn iter_records_from_file<P: AsRef<std::path::Path>>(
574    file_path: P,
575    schema: &Schema,
576    options: &DecodeOptions,
577) -> Result<RecordIterator<std::fs::File>> {
578    let file = std::fs::File::open(file_path)
579        .map_err(|e| Error::new(ErrorCode::CBKF104_RDW_SUSPECT_ASCII, e.to_string()))?;
580
581    RecordIterator::new(file, schema, options)
582}
583
584/// Convenience function to create a record iterator from any readable source
585///
586/// This function provides maximum flexibility by accepting any type that implements
587/// the `Read` trait, including files, cursors, network streams, or custom readers.
588///
589/// # Arguments
590///
591/// * `reader` - Any type implementing Read (File, Cursor, `TcpStream`, etc.)
592/// * `schema` - The parsed copybook schema
593/// * `options` - Decoding options
594///
595/// # Errors
596/// Returns an error if the iterator cannot be created.
597///
598/// # Examples
599///
600/// ## Using with In-Memory Data (Cursor)
601///
602/// ```rust
603/// use copybook_codec::{iter_records, DecodeOptions, RecordFormat};
604/// use copybook_core::parse_copybook;
605/// use std::io::Cursor;
606///
607/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
608/// let copybook_text = "01 RECORD.\n   05 ID PIC 9(3).\n   05 NAME PIC X(5).";
609/// let schema = parse_copybook(copybook_text)?;
610///
611/// let options = DecodeOptions::new()
612///     .with_format(RecordFormat::Fixed);
613///
614/// // Create iterator from in-memory data
615/// let data = b"001ALICE002BOB  003CAROL";
616/// let iterator = iter_records(Cursor::new(data), &schema, &options)?;
617///
618/// let records: Vec<_> = iterator.collect::<Result<Vec<_>, _>>()?;
619/// assert_eq!(records.len(), 3);
620/// # Ok(())
621/// # }
622/// ```
623///
624/// ## Using with File
625///
626/// ```rust,no_run
627/// use copybook_codec::{iter_records, DecodeOptions};
628/// use copybook_core::parse_copybook;
629/// use std::fs::File;
630///
631/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
632/// let schema = parse_copybook("01 RECORD.\n   05 DATA PIC X(10).")?;
633/// let options = DecodeOptions::default();
634///
635/// let file = File::open("data.bin")?;
636/// let iterator = iter_records(file, &schema, &options)?;
637///
638/// for result in iterator {
639///     let record = result?;
640///     println!("{}", record);
641/// }
642/// # Ok(())
643/// # }
644/// ```
645///
646/// ## Using with Compressed Data
647///
648/// ```text
649/// use copybook_codec::{iter_records, DecodeOptions};
650/// use copybook_core::parse_copybook;
651/// use std::fs::File;
652/// use flate2::read::GzDecoder;
653///
654/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
655/// let schema = parse_copybook("01 RECORD.\n   05 DATA PIC X(10).")?;
656/// let options = DecodeOptions::default();
657///
658/// // Read from gzipped file
659/// let file = File::open("data.bin.gz")?;
660/// let decoder = GzDecoder::new(file);
661/// let iterator = iter_records(decoder, &schema, &options)?;
662///
663/// for result in iterator {
664///     let record = result?;
665///     // Process decompressed record...
666/// }
667/// # Ok(())
668/// # }
669/// ```
670#[inline]
671#[must_use = "Handle the Result or propagate the error"]
672pub fn iter_records<R: Read>(
673    reader: R,
674    schema: &Schema,
675    options: &DecodeOptions,
676) -> Result<RecordIterator<R>> {
677    RecordIterator::new(reader, schema, options)
678}
679
680#[cfg(test)]
681#[allow(clippy::expect_used)]
682#[allow(clippy::unwrap_used)]
683#[allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)]
684mod tests {
685    use super::*;
686    use crate::Codepage;
687    use copybook_core::parse_copybook;
688    use std::io::Cursor;
689
690    #[test]
691    fn test_record_iterator_basic() {
692        let copybook_text = r"
693            01 RECORD.
694               05 ID PIC 9(3).
695               05 NAME PIC X(5).
696        ";
697
698        let schema = parse_copybook(copybook_text).unwrap();
699
700        // Create test data: two 8-byte fixed records
701        let test_data = b"001ALICE002BOB  ";
702        let cursor = Cursor::new(test_data);
703
704        let options = DecodeOptions {
705            format: RecordFormat::Fixed,
706            ..DecodeOptions::default()
707        };
708
709        let iterator = RecordIterator::new(cursor, &schema, &options).unwrap();
710
711        // Just test that the iterator can be created successfully
712        assert_eq!(iterator.current_record_index(), 0);
713        assert!(!iterator.is_eof());
714    }
715
716    #[test]
717    fn test_record_iterator_rdw() {
718        let copybook_text = r"
719            01 RECORD.
720               05 ID PIC 9(3).
721               05 NAME PIC X(5).
722        ";
723
724        let schema = parse_copybook(copybook_text).unwrap();
725
726        // Create RDW test data:
727        // Record 1: length=8, reserved=0, data="001ALICE"
728        // Record 2: length=6, reserved=0, data="002BOB"
729        let test_data = vec![
730            0x00, 0x08, 0x00, 0x00, // RDW header: length=8, reserved=0
731            b'0', b'0', b'1', b'A', b'L', b'I', b'C', b'E', // Record 1 data
732            0x00, 0x06, 0x00, 0x00, // RDW header: length=6, reserved=0
733            b'0', b'0', b'2', b'B', b'O', b'B', // Record 2 data
734        ];
735
736        let cursor = Cursor::new(test_data);
737
738        let options = DecodeOptions {
739            format: RecordFormat::RDW,
740            ..DecodeOptions::default()
741        };
742
743        let iterator = RecordIterator::new(cursor, &schema, &options).unwrap();
744
745        // Just test that the iterator can be created successfully
746        assert_eq!(iterator.current_record_index(), 0);
747        assert!(!iterator.is_eof());
748    }
749
750    #[test]
751    fn test_raw_record_reading() {
752        let copybook_text = r"
753            01 RECORD.
754               05 ID PIC 9(3).
755               05 NAME PIC X(5).
756        ";
757
758        let schema = parse_copybook(copybook_text).unwrap();
759
760        let test_data = b"001ALICE";
761        let cursor = Cursor::new(test_data);
762
763        let options = DecodeOptions {
764            format: RecordFormat::Fixed,
765            ..DecodeOptions::default()
766        };
767
768        let mut iterator = RecordIterator::new(cursor, &schema, &options).unwrap();
769
770        // Read raw record
771        let raw_record = iterator.read_raw_record().unwrap().unwrap();
772        assert_eq!(raw_record, b"001ALICE");
773        assert_eq!(iterator.current_record_index(), 1);
774
775        // End of file
776        assert!(iterator.read_raw_record().unwrap().is_none());
777    }
778
779    #[test]
780    fn test_iterator_error_handling() {
781        let copybook_text = r"
782            01 RECORD.
783               05 ID PIC 9(3).
784               05 NAME PIC X(5).
785        ";
786
787        let schema = parse_copybook(copybook_text).unwrap();
788
789        // Create incomplete record (only 4 bytes instead of 8)
790        let test_data = b"001A";
791        let cursor = Cursor::new(test_data);
792
793        let options = DecodeOptions {
794            format: RecordFormat::Fixed,
795            ..DecodeOptions::default()
796        };
797
798        let mut iterator = RecordIterator::new(cursor, &schema, &options).unwrap();
799
800        // Should yield EOF (Ok(None)) when encountering truncated fixed-length data
801        assert!(iterator.next().is_none());
802    }
803
804    #[test]
805    fn test_iterator_fixed_format_missing_lrecl_errors_on_next() {
806        // A schema without a fixed record length
807        let copybook_text = "01 SOME-GROUP. 05 SOME-FIELD PIC X(1).";
808        let mut schema = parse_copybook(copybook_text).unwrap();
809        schema.lrecl_fixed = None; // Ensure it's None
810
811        let test_data = b"";
812        let cursor = Cursor::new(test_data);
813
814        let options = DecodeOptions {
815            format: RecordFormat::Fixed,
816            ..DecodeOptions::default()
817        };
818
819        let mut iterator = RecordIterator::new(cursor, &schema, &options).unwrap();
820
821        let first = iterator.next().unwrap();
822        assert!(first.is_err());
823        if let Err(e) = first {
824            assert_eq!(e.code, ErrorCode::CBKI001_INVALID_STATE);
825            assert_eq!(e.message, FIXED_FORMAT_LRECL_MISSING);
826        }
827    }
828
829    #[test]
830    fn test_iterator_schema_and_options_accessors() {
831        let copybook_text = r"
832            01 RECORD.
833               05 ID PIC 9(3).
834               05 NAME PIC X(5).
835        ";
836
837        let mut schema = parse_copybook(copybook_text).unwrap();
838        schema.lrecl_fixed = Some(8);
839        let test_data = b"001ALICE";
840        let cursor = Cursor::new(test_data);
841
842        let options = DecodeOptions {
843            format: RecordFormat::Fixed,
844            codepage: Codepage::ASCII,
845            ..DecodeOptions::default()
846        };
847
848        let iterator = RecordIterator::new(cursor, &schema, &options).unwrap();
849
850        // Test schema accessor
851        assert_eq!(iterator.schema().fields[0].name, "RECORD");
852
853        // Test options accessor
854        assert_eq!(iterator.options().format, RecordFormat::Fixed);
855    }
856
857    #[test]
858    fn test_iterator_multiple_fixed_records() {
859        let copybook_text = r"
860            01 RECORD.
861               05 ID PIC 9(3).
862               05 NAME PIC X(5).
863        ";
864
865        let mut schema = parse_copybook(copybook_text).unwrap();
866        schema.lrecl_fixed = Some(8);
867
868        // Create test data: three 8-byte fixed records
869        let test_data = b"001ALICE002BOB  003CAROL";
870        let cursor = Cursor::new(test_data);
871
872        let options = DecodeOptions {
873            format: RecordFormat::Fixed,
874            codepage: Codepage::ASCII,
875            ..DecodeOptions::default()
876        };
877
878        let mut iterator = RecordIterator::new(cursor, &schema, &options).unwrap();
879
880        // Read all records
881        let mut count = 0;
882        for result in iterator.by_ref() {
883            assert!(result.is_ok(), "Record {count} should decode successfully");
884            count += 1;
885        }
886
887        assert_eq!(count, 3);
888        assert_eq!(iterator.current_record_index(), 3);
889        assert!(iterator.is_eof());
890    }
891
892    #[test]
893    fn test_iterator_rdw_multiple_records() {
894        let copybook_text = r"
895            01 RECORD.
896               05 ID PIC 9(3).
897               05 NAME PIC X(5).
898        ";
899
900        let schema = parse_copybook(copybook_text).unwrap();
901
902        // Create RDW test data with three records
903        let test_data = vec![
904            // Record 1
905            0x00, 0x08, 0x00, 0x00, // RDW header: length=8
906            b'0', b'0', b'1', b'A', b'L', b'I', b'C', b'E', // Record 2
907            0x00, 0x06, 0x00, 0x00, // RDW header: length=6
908            b'0', b'0', b'2', b'B', b'O', b'B', // Record 3
909            0x00, 0x08, 0x00, 0x00, // RDW header: length=8
910            b'0', b'0', b'3', b'C', b'A', b'R', b'O', b'L',
911        ];
912
913        let cursor = Cursor::new(test_data);
914
915        let options = DecodeOptions {
916            format: RecordFormat::RDW,
917            codepage: Codepage::ASCII,
918            ..DecodeOptions::default()
919        };
920
921        let mut iterator = RecordIterator::new(cursor, &schema, &options).unwrap();
922
923        // Read all records
924        let mut count = 0;
925        for result in iterator.by_ref() {
926            assert!(result.is_ok(), "Record {count} should decode successfully");
927            count += 1;
928        }
929
930        assert_eq!(count, 3);
931        assert_eq!(iterator.current_record_index(), 3);
932        assert!(iterator.is_eof());
933    }
934
935    #[test]
936    fn test_iter_records_convenience() {
937        let copybook_text = r"
938            01 RECORD.
939               05 ID PIC 9(3).
940               05 NAME PIC X(5).
941        ";
942
943        let schema = parse_copybook(copybook_text).unwrap();
944
945        let test_data = b"001ALICE002BOB  ";
946        let cursor = Cursor::new(test_data);
947
948        let options = DecodeOptions {
949            format: RecordFormat::Fixed,
950            ..DecodeOptions::default()
951        };
952
953        let iterator = iter_records(cursor, &schema, &options).unwrap();
954
955        assert_eq!(iterator.current_record_index(), 0);
956        assert!(!iterator.is_eof());
957    }
958
959    #[test]
960    fn test_iterator_with_empty_data() {
961        let copybook_text = r"
962            01 RECORD.
963               05 ID PIC 9(3).
964               05 NAME PIC X(5).
965        ";
966
967        let mut schema = parse_copybook(copybook_text).unwrap();
968        schema.lrecl_fixed = Some(8);
969
970        let test_data = b"";
971        let cursor = Cursor::new(test_data);
972
973        let options = DecodeOptions {
974            format: RecordFormat::Fixed,
975            ..DecodeOptions::default()
976        };
977
978        let mut iterator = RecordIterator::new(cursor, &schema, &options).unwrap();
979
980        // Should immediately return None for empty data
981        assert!(iterator.next().is_none());
982        assert!(iterator.is_eof());
983        assert_eq!(iterator.current_record_index(), 0);
984    }
985
986    #[test]
987    fn test_iterator_raw_record_eof() {
988        let copybook_text = r"
989            01 RECORD.
990               05 ID PIC 9(3).
991               05 NAME PIC X(5).
992        ";
993
994        let schema = parse_copybook(copybook_text).unwrap();
995
996        let test_data = b"001ALICE";
997        let cursor = Cursor::new(test_data);
998
999        let options = DecodeOptions {
1000            format: RecordFormat::Fixed,
1001            ..DecodeOptions::default()
1002        };
1003
1004        let mut iterator = RecordIterator::new(cursor, &schema, &options).unwrap();
1005
1006        // Read first record
1007        assert!(iterator.read_raw_record().unwrap().is_some());
1008        assert_eq!(iterator.current_record_index(), 1);
1009
1010        // Read second record (should be None)
1011        assert!(iterator.read_raw_record().unwrap().is_none());
1012        assert!(iterator.is_eof());
1013    }
1014
1015    #[test]
1016    fn test_iterator_collect_results() {
1017        let copybook_text = r"
1018            01 RECORD.
1019               05 ID PIC 9(3).
1020               05 NAME PIC X(5).
1021        ";
1022
1023        let mut schema = parse_copybook(copybook_text).unwrap();
1024        schema.lrecl_fixed = Some(8);
1025
1026        let test_data = b"001ALICE002BOB  003CAROL";
1027        let cursor = Cursor::new(test_data);
1028
1029        let options = DecodeOptions {
1030            format: RecordFormat::Fixed,
1031            codepage: Codepage::ASCII,
1032            ..DecodeOptions::default()
1033        };
1034
1035        let iterator = RecordIterator::new(cursor, &schema, &options).unwrap();
1036
1037        // Collect all results
1038        let results: Vec<Result<Value>> = iterator.collect();
1039
1040        assert_eq!(results.len(), 3);
1041        for result in results {
1042            assert!(result.is_ok());
1043        }
1044    }
1045
1046    #[test]
1047    fn test_iterator_with_decode_error() {
1048        let copybook_text = r"
1049            01 RECORD.
1050               05 ID PIC 9(3).
1051               05 NAME PIC X(5).
1052        ";
1053
1054        let mut schema = parse_copybook(copybook_text).unwrap();
1055        schema.lrecl_fixed = Some(8);
1056
1057        // Create data that will decode successfully for first record
1058        let test_data = b"001ALICE";
1059        let cursor = Cursor::new(test_data);
1060
1061        let options = DecodeOptions {
1062            format: RecordFormat::Fixed,
1063            codepage: Codepage::ASCII,
1064            ..DecodeOptions::default()
1065        };
1066
1067        let mut iterator = RecordIterator::new(cursor, &schema, &options).unwrap();
1068
1069        // First record should decode successfully
1070        let first = iterator.next();
1071        assert!(first.is_some());
1072        assert!(first.unwrap().is_ok());
1073
1074        // Second call should return None (EOF)
1075        assert!(iterator.next().is_none());
1076    }
1077}
copybook_codec/iterator.rs

copybook_codec/
iterator.rs