use super::column::Column;
use super::column_anchor::ColumnAnchor;
use super::dct_error::{DctError, Result};
use super::dct_reader_state::{RelativeOffsetCache, parse_field, resolve_runtime_offset};
use super::value::Value;
#[derive(Debug)]
pub struct LazyRecord<'a> {
lines: &'a [String],
columns: &'a [Column],
observation: usize,
relative_offset_cache: &'a RelativeOffsetCache,
}
impl<'a> LazyRecord<'a> {
#[must_use]
pub(crate) fn new(
lines: &'a [String],
columns: &'a [Column],
observation: usize,
relative_offset_cache: &'a RelativeOffsetCache,
) -> Self {
Self {
lines,
columns,
observation,
relative_offset_cache,
}
}
#[must_use]
#[inline]
pub fn len(&self) -> usize {
self.columns.len()
}
#[must_use]
#[inline]
pub fn is_empty(&self) -> bool {
self.columns.is_empty()
}
pub fn value(&self, index: usize) -> Result<Value<'a>> {
let column = self
.columns
.get(index)
.ok_or_else(|| DctError::Io(std::io::Error::other("column index out of bounds")))?;
let line = self.lines.get(column.line_offset()).ok_or_else(|| {
DctError::Io(std::io::Error::other(
"internal invariant violated: line_offset exceeds lines_per_observation",
))
})?;
let runtime_offset = match column.anchor() {
ColumnAnchor::Absolute(offset) => offset,
ColumnAnchor::RelativeToCursor { .. } => self.resolve_relative(index, line)?,
};
parse_field(line, runtime_offset, column, self.observation, None)
}
fn resolve_relative(&self, index: usize, line: &str) -> Result<usize> {
if let Some(cached) = self.relative_offset_cache.borrow()[index] {
return Ok(cached);
}
let resolved = resolve_runtime_offset(line, self.columns, index, self.observation)?;
self.relative_offset_cache.borrow_mut()[index] = Some(resolved);
Ok(resolved)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::stata::dct::dct_source::DctSource;
use crate::stata::stata_byte::StataByte;
use crate::stata::stata_int::StataInt;
use std::io::Cursor;
fn parse_with_data(input: &[u8]) -> crate::stata::dct::dct_reader::DctReader<Cursor<&[u8]>> {
let source = DctSource::options()
.from_reader(Cursor::new(input))
.unwrap();
let DctSource::Embedded { schema, reader } = source else {
panic!("expected embedded data")
};
crate::stata::dct::dct_reader::DctReader::options(schema).from_reader(reader)
}
#[test]
fn lazy_record_decodes_individual_columns() {
let input = b"dictionary {\n\
_column(1) byte b1 %3.0f\n\
_column(4) int i1 %5.0f\n\
_column(9) str s1 %5s\n\
}\n\
04212345hello\n";
let mut reader = parse_with_data(input);
let record = reader.read_lazy_record().unwrap().unwrap();
assert_eq!(record.len(), 3);
match record.value(2).unwrap() {
Value::String(s) => assert_eq!(s.as_ref(), "hello"),
other => panic!("expected string, got {other:?}"),
}
assert!(matches!(
record.value(0).unwrap(),
Value::Byte(StataByte::Present(42))
));
assert!(matches!(
record.value(1).unwrap(),
Value::Int(StataInt::Present(12345))
));
}
#[test]
fn lazy_record_index_out_of_bounds_errors() {
let input = b"dictionary {\n\
_column(1) byte b1 %3.0f\n\
}\n\
042\n";
let mut reader = parse_with_data(input);
let record = reader.read_lazy_record().unwrap().unwrap();
assert!(record.value(99).is_err());
}
#[test]
fn lazy_reader_iterates_to_eof() {
let input = b"dictionary {\n\
_column(1) byte b1 %3.0f\n\
}\n\
001\n\
002\n\
003\n";
let mut reader = parse_with_data(input);
let mut count = 0;
while let Some(record) = reader.read_lazy_record().unwrap() {
assert!(matches!(record.value(0).unwrap(), Value::Byte(_)));
count += 1;
}
assert_eq!(count, 3);
}
#[test]
fn lazy_record_resolves_free_format_chain_at_runtime() {
let input = b"dictionary {\n\
_column(1) byte b1 %f\n\
byte b2 %f\n\
byte b3 %f\n\
}\n\
10 20 30\n";
let mut reader = parse_with_data(input);
let record = reader.read_lazy_record().unwrap().unwrap();
assert!(matches!(
record.value(0).unwrap(),
Value::Byte(StataByte::Present(10))
));
assert!(matches!(
record.value(1).unwrap(),
Value::Byte(StataByte::Present(20))
));
assert!(matches!(
record.value(2).unwrap(),
Value::Byte(StataByte::Present(30))
));
}
#[test]
fn lazy_record_resolves_skip_after_free_format_at_runtime() {
let input = b"dictionary {\n\
_column(1) byte b1 %f\n\
_skip(2) byte b2 %f\n\
}\n\
10 20\n";
let mut reader = parse_with_data(input);
let record = reader.read_lazy_record().unwrap().unwrap();
assert!(matches!(
record.value(0).unwrap(),
Value::Byte(StataByte::Present(10))
));
assert!(matches!(
record.value(1).unwrap(),
Value::Byte(StataByte::Present(20))
));
}
}