use crate::{error::Error, LineBreak, Result};
use std::{
fs,
io::{self, Read},
path::Path,
};
const BUFFER_SIZE: usize = 8 * (1 << 10);
pub struct ByteReader<'a, R: 'a> {
r: &'a mut Reader<R>,
}
pub struct StringReader<'a, R: 'a> {
r: &'a mut Reader<R>,
}
pub struct Reader<R> {
rdr: io::BufReader<R>,
buf: Vec<u8>,
linebreak_buf: Vec<u8>,
eof: bool,
pub record_width: usize,
pub linebreak: LineBreak,
}
impl<R> Reader<R>
where
R: Read,
{
pub fn from_reader(rdr: R) -> Self {
Reader {
rdr: io::BufReader::with_capacity(BUFFER_SIZE, rdr),
record_width: 0,
buf: Vec::new(),
linebreak: LineBreak::None,
linebreak_buf: Vec::new(),
eof: false,
}
}
pub fn string_reader(&mut self) -> StringReader<R> {
StringReader { r: self }
}
pub fn byte_reader(&mut self) -> ByteReader<R> {
ByteReader { r: self }
}
pub fn next_record(&mut self) -> Option<Result<&[u8]>> {
if self.eof {
return None;
}
match self.fill_buf() {
Ok(0) => return None,
Ok(_) => {}
Err(e) => return Some(Err(e)),
}
if let Err(e) = self.read_linebreak() {
return Some(Err(e));
}
Some(Ok(&self.buf))
}
pub fn width(mut self, width: usize) -> Self {
self.buf = vec![0; width];
self.record_width = width;
self
}
pub fn linebreak(mut self, linebreak: LineBreak) -> Self {
self.linebreak_buf = vec![0; linebreak.byte_width()];
self.linebreak = linebreak;
self
}
#[inline]
fn has_linebreak(&self) -> bool {
!matches!(self.linebreak, LineBreak::None)
}
#[inline]
fn fill_buf(&mut self) -> Result<usize> {
match self.rdr.read_exact(&mut self.buf) {
Ok(_) => Ok(self.record_width),
Err(e) => match e.kind() {
io::ErrorKind::UnexpectedEof => {
self.eof = true;
Ok(0)
}
_ => Err(Error::from(e)),
},
}
}
#[inline]
fn read_linebreak(&mut self) -> Result<()> {
if !self.has_linebreak() {
return Ok(());
}
if let Err(e) = self.rdr.read_exact(&mut self.linebreak_buf) {
match e.kind() {
io::ErrorKind::UnexpectedEof => self.eof = true,
_ => return Err(Error::from(e)),
}
}
Ok(())
}
}
impl Reader<fs::File> {
pub fn from_file<P: AsRef<Path>>(path: P) -> Result<Self> {
Ok(Self::from_reader(fs::File::open(path)?))
}
}
impl Reader<io::Cursor<Vec<u8>>> {
pub fn from_bytes<T>(bytes: T) -> Self
where
T: Into<Vec<u8>>,
{
Self::from_reader(io::Cursor::new(bytes.into()))
}
pub fn from_string<T>(s: T) -> Self
where
T: Into<String>,
{
Self::from_bytes(s.into().into_bytes())
}
}
impl<R> Read for Reader<R>
where
R: Read,
{
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
self.rdr.read(buf)
}
}
impl<'a, R> Iterator for ByteReader<'a, R>
where
R: Read,
{
type Item = Result<Vec<u8>>;
fn next(&mut self) -> Option<Self::Item> {
self.r
.next_record()
.map(|record| record.map(|r| r.to_vec()))
}
}
impl<'a, R> Iterator for StringReader<'a, R>
where
R: Read,
{
type Item = Result<String>;
fn next(&mut self) -> Option<Self::Item> {
self.r
.next_record()
.map(|record| record.map(|r| String::from_utf8_lossy(r).to_string()))
}
}
#[cfg(test)]
#[allow(dead_code)]
mod test {
use super::*;
use crate::{FieldSet, FixedWidth};
use serde_derive::Deserialize;
use std::result;
#[test]
fn read_next_record() {
let s = "111122223333444411112222333344441111222233334444";
let mut rdr = Reader::from_string(s).width(16);
let mut count = 0;
while let Some(r) = rdr.next_record() {
count += 1;
assert_eq!(b"1111222233334444", r.unwrap());
}
assert_eq!(3, count);
}
#[test]
fn read_from_string() {
let s = "111122223333444411112222333344441111222233334444";
let mut rdr = Reader::from_string(s).width(16);
let rows = rdr
.string_reader()
.filter_map(result::Result::ok)
.collect::<Vec<String>>();
assert_eq!(rows.len(), 3);
for row in rows {
assert_eq!("1111222233334444", row);
}
}
#[test]
fn read_from_string_with_newlines() {
let s = "1111222233334444\n1111222233334444\n1111222233334444";
let mut rdr = Reader::from_string(s)
.width(16)
.linebreak(LineBreak::Newline);
let rows = rdr
.string_reader()
.filter_map(result::Result::ok)
.collect::<Vec<String>>();
assert_eq!(rows.len(), 3);
for row in rows {
assert_eq!("1111222233334444", row);
}
}
#[test]
fn read_from_string_with_crlf() {
let s = "1111222233334444\r\n1111222233334444\r\n1111222233334444";
let mut rdr = Reader::from_string(s).width(16).linebreak(LineBreak::CRLF);
let rows = rdr
.string_reader()
.filter_map(result::Result::ok)
.collect::<Vec<String>>();
assert_eq!(rows.len(), 3);
for row in rows {
assert_eq!("1111222233334444", row);
}
}
#[test]
fn read_from_bytes() {
let b = "111122223333444411112222333344441111222233334444".as_bytes();
let mut rdr = Reader::from_bytes(b).width(16);
let rows = rdr
.string_reader()
.filter_map(result::Result::ok)
.collect::<Vec<String>>();
assert_eq!(rows.len(), 3);
for row in rows {
assert_eq!("1111222233334444", row);
}
}
#[test]
fn read_from_bytes_with_crlf() {
let b = "1111222233334444\r\n1111222233334444\r\n1111222233334444".as_bytes();
let mut rdr = Reader::from_bytes(b).width(16).linebreak(LineBreak::CRLF);
let rows = rdr
.byte_reader()
.filter_map(result::Result::ok)
.collect::<Vec<Vec<u8>>>();
assert_eq!(rows.len(), 3);
for row in rows {
assert_eq!(b"1111222233334444".to_vec(), row);
}
}
#[derive(Deserialize)]
struct Test {
a: String,
b: String,
c: usize,
}
impl FixedWidth for Test {
fn fields() -> FieldSet {
FieldSet::Seq(vec![
FieldSet::new_field(0..4),
FieldSet::new_field(4..8),
FieldSet::new_field(8..16),
])
}
}
#[test]
fn test_read() {
let b = "111122223333444411112222333344441111222233334444".as_bytes();
let mut rdr = Reader::from_bytes(b);
let mut buf = vec![0; 16];
let bytes_read = rdr.read(&mut buf).unwrap();
assert_eq!(buf, b[..bytes_read].to_vec());
}
}