use memchr::memchr;
use failure::Fail;
use std::marker::PhantomData;
use std::io;
pub fn next_field(delim: u8, record: &str) -> (&str, &str) {
let record = record.trim();
if let Some(pos) = memchr(delim, record.as_bytes()) {
(&record[0..pos].trim(), &record[pos+1..])
} else {
(record, "")
}
}
pub fn enum_fields<'a>(delim: u8, record: &'a str) -> EnumFields<'a> {
EnumFields {
delim: delim,
record: record.trim(),
_phantom: PhantomData,
}
}
pub struct EnumFields<'a> {
delim: u8,
record: &'a str,
_phantom: PhantomData<&'a str>,
}
impl<'a> Iterator for EnumFields<'a> {
type Item = &'a str;
fn next(&mut self) -> Option<Self::Item> {
if self.record.len() == 0 {
None
} else {
let (field, result) = next_field(self.delim, self.record);
self.record = result;
Some(field)
}
}
}
#[derive(Debug)]
pub struct DataRecordReaderBuilder<RdType, FdType> {
record_delimiter: RdType,
field_delimiter: FdType,
}
impl DataRecordReaderBuilder<(), ()> {
pub fn new() -> Self {
DataRecordReaderBuilder {
record_delimiter: (),
field_delimiter: (),
}
}
}
impl DataRecordReaderBuilder<u8, u8> {
pub fn build<R: io::BufRead>(self, stream: R) -> DataRecordReader<R> {
DataRecordReader {
stream: stream,
record_delimiter: self.record_delimiter,
field_delimiter: self.field_delimiter,
buffer: Vec::new(),
peek_buf: None,
}
}
}
impl<RdType, FdType> DataRecordReaderBuilder<RdType, FdType> {
pub fn record_delimiter(self, delim: u8) -> DataRecordReaderBuilder<u8, FdType> {
DataRecordReaderBuilder {
record_delimiter: delim,
field_delimiter: self.field_delimiter,
}
}
pub fn field_delimiter(self, delim: u8) -> DataRecordReaderBuilder<RdType, u8> {
DataRecordReaderBuilder {
record_delimiter: self.record_delimiter,
field_delimiter: delim,
}
}
}
#[derive(Debug, Fail)]
pub enum ReaderError {
#[fail(display = "IO error: {}", _0)]
Io(#[cause] std::io::Error),
#[fail(display = "From UTF-8 error: {}", _0)]
FromUTF8(#[cause] std::string::FromUtf8Error),
}
#[derive(Debug)]
pub struct DataRecordReader<R: io::BufRead> {
stream: R,
record_delimiter: u8,
field_delimiter: u8,
buffer: Vec<u8>,
peek_buf: Option<DataRecord>,
}
#[derive(Debug)]
pub enum DataRecord {
Fields(Vec<String>),
Comment(String),
Blank,
EOF,
}
impl<R: io::BufRead> DataRecordReader<R> {
pub fn new(stream: R) -> Self {
DataRecordReader {
stream: stream,
record_delimiter: b'\n',
field_delimiter: b',',
buffer: Vec::new(),
peek_buf: None,
}
}
pub fn field_delimiter(&self) -> &u8 {
&self.field_delimiter
}
pub fn set_field_delimiter(&mut self, delim: u8) {
self.field_delimiter = delim;
}
pub fn peek_record(&mut self) -> Result<&DataRecord, failure::Error> {
if self.peek_buf.is_none() {
let record = self.next_record()?;
let _ = self.peek_buf.replace(record);
}
Ok(self.peek_buf.as_ref().unwrap())
}
pub fn next_record(&mut self) -> Result<DataRecord, failure::Error> {
if let Some(record) = self.peek_buf.take() {
return Ok(record);
}
let result = self.stream.read_until(self.record_delimiter, &mut self.buffer)
.map_err(|e| ReaderError::Io(e))?;
if result == 0 {
Ok(DataRecord::EOF)
} else {
if self.buffer[0] == b'#' {
let comment = String::from_utf8(self.buffer.clone())
.map_err(|e| ReaderError::FromUTF8(e))?;
self.buffer.clear();
Ok(DataRecord::Comment(comment))
} else {
let s = String::from_utf8(self.buffer.clone())
.map_err(|e| ReaderError::FromUTF8(e))?;
self.buffer.clear();
let fields: Vec<String>
= enum_fields(self.field_delimiter, s.as_str()).map(|s| s.to_owned()).collect();
if fields.len() == 0 {
Ok(DataRecord::Blank)
} else {
Ok(DataRecord::Fields(fields))
}
}
}
}
}
#[derive(Debug)]
pub struct DataBlockReader<T, R>
where
R: io::BufRead,
T: std::str::FromStr
{
reader: DataRecordReader<R>,
_phantom: PhantomData<fn() -> T>,
}
impl<T, R> DataBlockReader<T, R>
where
R: io::BufRead,
T: std::str::FromStr,
<T as std::str::FromStr>::Err: failure::Fail,
{
pub fn new(reader: DataRecordReader<R>) -> Self {
DataBlockReader {
reader: reader,
_phantom: PhantomData,
}
}
pub fn into_inner(self) -> DataRecordReader<R> {
self.reader
}
pub fn next_block(&mut self) -> Result<Option<Vec<Vec<T>>>, failure::Error> {
let mut block: Option<Vec<Vec<T>>> = None;
loop {
let record = self.reader.peek_record()?;
match record {
DataRecord::EOF | DataRecord::Blank => break,
_ => {},
};
let record = self.reader.next_record()?;
match record {
DataRecord::Comment(_) => continue,
DataRecord::Fields(fields) => {
let vec = fields.iter().map(|f| T::from_str(f))
.collect::<Result<Vec<T>, _>>();
match vec {
Ok(vec) => { block.get_or_insert_with(|| Vec::new()).push(vec); },
Err(e) => { return Err(e.into()); },
};
},
_ => panic!("unreachable!"),
};
};
Ok(block)
}
pub fn consume_blanks(&mut self) -> Result<usize, failure::Error> {
let mut count = 0;
loop {
let record = self.reader.peek_record()?;
match record {
DataRecord::Blank | DataRecord::Comment(_) => {
count += 1;
self.reader.next_record().unwrap();
},
_ => break,
};
};
Ok(count)
}
}