use std::fs::File;
use std::io::{BufReader, Read, Seek, SeekFrom};
use std::path::Path;
use crate::config::ReadOptions;
use crate::dataset::{Column, ColumnData, Dataset, Format};
use crate::error::{Error, Result};
use super::obs::ObservationReader;
use super::parse::{XptMemberInfo, parse_header};
#[derive(Debug, Clone)]
pub struct XptInfo {
pub members: Vec<XptMemberInfo>,
pub library_label: Option<String>,
pub created: Option<String>,
pub modified: Option<String>,
}
impl XptInfo {
pub fn member_names(&self) -> impl Iterator<Item = &str> {
self.members.iter().map(|m| m.name.as_str())
}
#[must_use]
pub fn find_member(&self, name: &str) -> Option<&XptMemberInfo> {
self.members
.iter()
.find(|m| m.name.eq_ignore_ascii_case(name))
}
}
pub struct XptReader<R: Read + Seek> {
reader: BufReader<R>,
file_info: XptInfo,
}
impl<R: Read + Seek> XptReader<R> {
pub fn new(reader: R) -> Result<Self> {
let mut buf_reader = BufReader::new(reader);
let file_info = parse_header(&mut buf_reader)?;
Ok(Self {
reader: buf_reader,
file_info,
})
}
#[must_use]
pub fn file_info(&self) -> &XptInfo {
&self.file_info
}
pub(crate) fn read_member(&mut self, name: &str, options: &ReadOptions) -> Result<Dataset> {
let member = self
.file_info
.find_member(name)
.ok_or_else(|| Error::MemberNotFound {
domain_code: name.to_string(),
})?
.clone();
self.read_member_data(&member, options)
}
pub(crate) fn read_all(&mut self, options: &ReadOptions) -> Result<Vec<Dataset>> {
let members: Vec<_> = self.file_info.members.clone();
let mut datasets = Vec::with_capacity(members.len());
for member in members {
let ds = self.read_member_data(&member, options)?;
datasets.push(ds);
}
Ok(datasets)
}
fn read_member_data(
&mut self,
member: &XptMemberInfo,
options: &ReadOptions,
) -> Result<Dataset> {
self.reader
.seek(SeekFrom::Start(member.obs_offset))
.map_err(Error::Io)?;
let mut obs_reader = ObservationReader::new(&mut self.reader, &member.variables, options)?;
let row_limit = options.row_limit.unwrap_or(usize::MAX);
let mut rows_read = 0;
let mut columns: Vec<ColumnData> = member
.variables
.iter()
.map(|v| {
if v.xpt_type().is_numeric() {
ColumnData::F64(Vec::new())
} else {
ColumnData::String(Vec::new())
}
})
.collect();
while rows_read < row_limit {
match obs_reader.read_observation()? {
Some(row) => {
for (i, value) in row.into_iter().enumerate() {
match (&mut columns[i], value) {
(ColumnData::F64(vec), ObsValue::Numeric(v)) => vec.push(v),
(ColumnData::String(vec), ObsValue::Character(v)) => vec.push(v),
_ => {
return Err(Error::corrupt("type mismatch in observation data"));
}
}
}
rows_read += 1;
}
None => break,
}
}
let cols: Vec<Column> = member
.variables
.iter()
.zip(columns)
.map(|(var, data)| {
let mut col = Column::new(&var.nname, data);
if !var.nlabel.is_empty() {
col = col.with_label(var.nlabel.as_str());
}
if !var.nform.is_empty() {
col = col
.with_format(Format::from_namestr(&var.nform, var.nfl, var.nfd, var.nfj));
}
if !var.niform.is_empty() {
col = col.with_informat(Format::from_namestr(
&var.niform,
var.nifl,
var.nifd,
0, ));
}
if var.xpt_type().is_character() {
col = col.with_length(var.length());
}
col
})
.collect();
let mut dataset = Dataset::new(member.name.clone(), cols)?;
if let Some(ref label) = member.label {
dataset.set_label(label.as_str());
}
Ok(dataset)
}
}
impl XptReader<BufReader<File>> {
pub fn open(path: impl AsRef<Path>) -> Result<Self> {
let file = File::open(path.as_ref()).map_err(Error::Io)?;
Self::new(BufReader::new(file))
}
}
#[derive(Debug, Clone)]
pub enum ObsValue {
Numeric(Option<f64>),
Character(Option<String>),
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_xpt_file_find_member() {
let file = XptInfo {
members: vec![XptMemberInfo {
name: "AE".into(),
label: Some("Adverse Events".into()),
variables: vec![],
obs_offset: 0,
obs_count: 0,
row_len: 0,
}],
library_label: None,
created: None,
modified: None,
};
assert!(file.find_member("AE").is_some());
assert!(file.find_member("ae").is_some());
assert!(file.find_member("DM").is_none());
}
}