use std::io;
use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt};
use csv;
pub struct RandomAccessSimple<R> {
rdr: R,
len: u64,
}
impl<W: io::Write> RandomAccessSimple<W> {
pub fn create<R: io::Read>(
rdr: &mut csv::Reader<R>,
mut wtr: W,
) -> csv::Result<()> {
let mut len = 0;
if rdr.has_headers() {
let header = rdr.byte_headers()?;
if !header.is_empty() {
let pos = header.position().expect("position on header row");
wtr.write_u64::<BigEndian>(pos.byte())?;
len += 1;
}
}
let mut record = csv::ByteRecord::new();
while rdr.read_byte_record(&mut record)? {
let pos = record.position().expect("position on row");
wtr.write_u64::<BigEndian>(pos.byte())?;
len += 1;
}
wtr.write_u64::<BigEndian>(len)?;
Ok(())
}
}
impl<R: io::Read + io::Seek> RandomAccessSimple<R> {
pub fn open(mut rdr: R) -> csv::Result<RandomAccessSimple<R>> {
rdr.seek(io::SeekFrom::End(-8))?;
let len = rdr.read_u64::<BigEndian>()?;
Ok(RandomAccessSimple { rdr: rdr, len: len })
}
pub fn get(&mut self, i: u64) -> csv::Result<csv::Position> {
if i >= self.len {
let msg = format!(
"invalid record index {} (there are {} records)",
i, self.len
);
let err = io::Error::new(io::ErrorKind::Other, msg);
return Err(csv::Error::from(err));
}
self.rdr.seek(io::SeekFrom::Start(i * 8))?;
let offset = self.rdr.read_u64::<BigEndian>()?;
let mut pos = csv::Position::new();
pos.set_byte(offset).set_record(i);
Ok(pos)
}
pub fn len(&self) -> u64 {
self.len
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
}
#[cfg(test)]
mod tests {
use std::io;
use csv;
use super::RandomAccessSimple;
struct Indexed<'a> {
csv: csv::Reader<io::Cursor<&'a str>>,
idx: RandomAccessSimple<io::Cursor<Vec<u8>>>,
}
impl<'a> Indexed<'a> {
fn new(headers: bool, csv_data: &'a str) -> Indexed<'a> {
let mut rdr = csv::ReaderBuilder::new()
.has_headers(headers)
.from_reader(io::Cursor::new(csv_data));
let mut idxbuf = io::Cursor::new(vec![]);
RandomAccessSimple::create(&mut rdr, &mut idxbuf).unwrap();
Indexed {
csv: rdr,
idx: RandomAccessSimple::open(idxbuf).unwrap(),
}
}
fn read_at(&mut self, record: u64) -> csv::StringRecord {
let pos = self.idx.get(record).unwrap();
self.csv.seek(pos).unwrap();
self.csv.records().next().unwrap().unwrap()
}
}
#[test]
fn headers_empty() {
let idx = Indexed::new(true, "");
assert_eq!(idx.idx.len(), 0);
}
#[test]
fn headers_one_field() {
let mut idx = Indexed::new(true, "h1\na\nb\nc\n");
assert_eq!(idx.idx.len(), 4);
assert_eq!(idx.read_at(0), vec!["h1"]);
assert_eq!(idx.read_at(1), vec!["a"]);
assert_eq!(idx.read_at(2), vec!["b"]);
assert_eq!(idx.read_at(3), vec!["c"]);
}
#[test]
fn headers_many_fields() {
let mut idx = Indexed::new(
true,
"\
h1,h2,h3
a,b,c
d,e,f
g,h,i
",
);
assert_eq!(idx.idx.len(), 4);
assert_eq!(idx.read_at(0), vec!["h1", "h2", "h3"]);
assert_eq!(idx.read_at(1), vec!["a", "b", "c"]);
assert_eq!(idx.read_at(2), vec!["d", "e", "f"]);
assert_eq!(idx.read_at(3), vec!["g", "h", "i"]);
}
#[test]
fn no_headers_one_field() {
let mut idx = Indexed::new(false, "h1\na\nb\nc\n");
assert_eq!(idx.idx.len(), 4);
assert_eq!(idx.read_at(0), vec!["h1"]);
assert_eq!(idx.read_at(1), vec!["a"]);
assert_eq!(idx.read_at(2), vec!["b"]);
assert_eq!(idx.read_at(3), vec!["c"]);
}
#[test]
fn no_headers_many_fields() {
let mut idx = Indexed::new(
false,
"\
h1,h2,h3
a,b,c
d,e,f
g,h,i
",
);
assert_eq!(idx.idx.len(), 4);
assert_eq!(idx.read_at(0), vec!["h1", "h2", "h3"]);
assert_eq!(idx.read_at(1), vec!["a", "b", "c"]);
assert_eq!(idx.read_at(2), vec!["d", "e", "f"]);
assert_eq!(idx.read_at(3), vec!["g", "h", "i"]);
}
#[test]
fn headers_one_field_newlines() {
let mut idx = Indexed::new(
true,
"
h1
a
b
c
",
);
assert_eq!(idx.idx.len(), 4);
assert_eq!(idx.read_at(0), vec!["h1"]);
assert_eq!(idx.read_at(1), vec!["a"]);
assert_eq!(idx.read_at(2), vec!["b"]);
assert_eq!(idx.read_at(3), vec!["c"]);
}
}