use std::cmp;
use std::fmt;
use std::iter::FromIterator;
use std::ops::{self, Range};
use std::result;
use bstr::{BString, ByteSlice};
use crate::error::{new_utf8_error, Utf8Error};
use crate::string_record::StringRecord;
#[derive(Clone, Eq)]
pub struct ByteRecord(Box<ByteRecordInner>);
impl PartialEq for ByteRecord {
fn eq(&self, other: &ByteRecord) -> bool {
if self.len() != other.len() {
return false;
}
self.iter().zip(other.iter()).all(|e| e.0 == e.1)
}
}
impl<T: AsRef<[u8]>> PartialEq<Vec<T>> for ByteRecord {
fn eq(&self, other: &Vec<T>) -> bool {
self.iter_eq(other)
}
}
impl<'a, T: AsRef<[u8]>> PartialEq<Vec<T>> for &'a ByteRecord {
fn eq(&self, other: &Vec<T>) -> bool {
self.iter_eq(other)
}
}
impl<T: AsRef<[u8]>> PartialEq<[T]> for ByteRecord {
fn eq(&self, other: &[T]) -> bool {
self.iter_eq(other)
}
}
impl<'a, T: AsRef<[u8]>> PartialEq<[T]> for &'a ByteRecord {
fn eq(&self, other: &[T]) -> bool {
self.iter_eq(other)
}
}
impl fmt::Debug for ByteRecord {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let mut fields = vec![];
for field in self {
fields.push(BString::from(field.to_vec()));
}
write!(f, "ByteRecord({:?})", fields)
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
struct ByteRecordInner {
pos: Option<Position>,
fields: Vec<u8>,
bounds: Bounds,
}
impl Default for ByteRecord {
#[inline]
fn default() -> ByteRecord {
ByteRecord::new()
}
}
impl ByteRecord {
#[inline]
pub fn new() -> ByteRecord {
ByteRecord::with_capacity(0, 0)
}
#[inline]
pub fn with_capacity(buffer: usize, fields: usize) -> ByteRecord {
ByteRecord(Box::new(ByteRecordInner {
pos: None,
fields: vec![0; buffer],
bounds: Bounds::with_capacity(fields),
}))
}
#[inline]
pub fn iter(&self) -> ByteRecordIter {
self.into_iter()
}
#[inline]
pub fn get(&self, i: usize) -> Option<&[u8]> {
self.0.bounds.get(i).map(|range| &self.0.fields[range])
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn len(&self) -> usize {
self.0.bounds.len()
}
#[inline]
pub fn truncate(&mut self, n: usize) {
if n <= self.len() {
self.0.bounds.len = n;
}
}
#[inline]
pub fn clear(&mut self) {
self.truncate(0);
}
pub fn trim(&mut self) {
let length = self.len();
if length == 0 {
return;
}
let mut trimmed =
ByteRecord::with_capacity(self.as_slice().len(), self.len());
trimmed.set_position(self.position().cloned());
for field in &*self {
trimmed.push_field(field.trim());
}
*self = trimmed;
}
#[inline]
pub fn push_field(&mut self, field: &[u8]) {
let (s, e) = (self.0.bounds.end(), self.0.bounds.end() + field.len());
while e > self.0.fields.len() {
self.expand_fields();
}
self.0.fields[s..e].copy_from_slice(field);
self.0.bounds.add(e);
}
#[inline]
pub fn position(&self) -> Option<&Position> {
self.0.pos.as_ref()
}
#[inline]
pub fn set_position(&mut self, pos: Option<Position>) {
self.0.pos = pos;
}
#[inline]
pub fn range(&self, i: usize) -> Option<Range<usize>> {
self.0.bounds.get(i)
}
#[inline]
pub fn as_slice(&self) -> &[u8] {
&self.0.fields[..self.0.bounds.end()]
}
#[inline]
pub(crate) fn as_parts(&mut self) -> (&mut Vec<u8>, &mut Vec<usize>) {
let inner = &mut *self.0;
(&mut inner.fields, &mut inner.bounds.ends)
}
#[inline]
pub(crate) fn set_len(&mut self, len: usize) {
self.0.bounds.len = len;
}
#[inline]
pub(crate) fn expand_fields(&mut self) {
let new_len = self.0.fields.len().checked_mul(2).unwrap();
self.0.fields.resize(cmp::max(4, new_len), 0);
}
#[inline]
pub(crate) fn expand_ends(&mut self) {
self.0.bounds.expand();
}
#[inline]
pub(crate) fn validate(&self) -> result::Result<(), Utf8Error> {
if self.0.fields[..self.0.bounds.end()].is_ascii() {
return Ok(());
}
for (i, field) in self.iter().enumerate() {
if let Err(err) = field.to_str() {
return Err(new_utf8_error(i, err.valid_up_to()));
}
}
Ok(())
}
pub(crate) fn iter_eq<I, T>(&self, other: I) -> bool
where
I: IntoIterator<Item = T>,
T: AsRef<[u8]>,
{
let mut it_record = self.iter();
let mut it_other = other.into_iter();
loop {
match (it_record.next(), it_other.next()) {
(None, None) => return true,
(None, Some(_)) | (Some(_), None) => return false,
(Some(x), Some(y)) => {
if x != y.as_ref() {
return false;
}
}
}
}
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Position {
byte: u64,
line: u64,
record: u64,
}
impl Position {
#[inline]
pub fn new() -> Position {
Position { byte: 0, line: 1, record: 0 }
}
#[inline]
pub fn byte(&self) -> u64 {
self.byte
}
#[inline]
pub fn line(&self) -> u64 {
self.line
}
#[inline]
pub fn record(&self) -> u64 {
self.record
}
#[inline]
pub fn set_byte(&mut self, byte: u64) -> &mut Position {
self.byte = byte;
self
}
#[inline]
pub fn set_line(&mut self, line: u64) -> &mut Position {
assert!(line > 0);
self.line = line;
self
}
#[inline]
pub fn set_record(&mut self, record: u64) -> &mut Position {
self.record = record;
self
}
}
#[derive(Clone, Debug, Eq, PartialEq)]
struct Bounds {
ends: Vec<usize>,
len: usize,
}
impl Default for Bounds {
#[inline]
fn default() -> Bounds {
Bounds::with_capacity(0)
}
}
impl Bounds {
#[inline]
fn with_capacity(capacity: usize) -> Bounds {
Bounds { ends: vec![0; capacity], len: 0 }
}
#[inline]
fn get(&self, i: usize) -> Option<Range<usize>> {
if i >= self.len {
return None;
}
let end = match self.ends.get(i) {
None => return None,
Some(&end) => end,
};
let start = match i.checked_sub(1).and_then(|i| self.ends.get(i)) {
None => 0,
Some(&start) => start,
};
Some(ops::Range { start: start, end: end })
}
#[inline]
fn ends(&self) -> &[usize] {
&self.ends[..self.len]
}
#[inline]
fn end(&self) -> usize {
self.ends().last().map(|&i| i).unwrap_or(0)
}
#[inline]
fn len(&self) -> usize {
self.len
}
#[inline]
fn expand(&mut self) {
let new_len = self.ends.len().checked_mul(2).unwrap();
self.ends.resize(cmp::max(4, new_len), 0);
}
#[inline]
fn add(&mut self, pos: usize) {
if self.len >= self.ends.len() {
self.expand();
}
self.ends[self.len] = pos;
self.len += 1;
}
}
impl ops::Index<usize> for ByteRecord {
type Output = [u8];
#[inline]
fn index(&self, i: usize) -> &[u8] {
self.get(i).unwrap()
}
}
impl From<StringRecord> for ByteRecord {
#[inline]
fn from(record: StringRecord) -> ByteRecord {
record.into_byte_record()
}
}
impl<T: AsRef<[u8]>> From<Vec<T>> for ByteRecord {
#[inline]
fn from(xs: Vec<T>) -> ByteRecord {
ByteRecord::from_iter(&xs)
}
}
impl<'a, T: AsRef<[u8]>> From<&'a [T]> for ByteRecord {
#[inline]
fn from(xs: &'a [T]) -> ByteRecord {
ByteRecord::from_iter(xs)
}
}
impl<T: AsRef<[u8]>> FromIterator<T> for ByteRecord {
#[inline]
fn from_iter<I: IntoIterator<Item = T>>(iter: I) -> ByteRecord {
let mut record = ByteRecord::new();
record.extend(iter);
record
}
}
impl<T: AsRef<[u8]>> Extend<T> for ByteRecord {
#[inline]
fn extend<I: IntoIterator<Item = T>>(&mut self, iter: I) {
for x in iter {
self.push_field(x.as_ref());
}
}
}
pub struct ByteRecordIter<'r> {
r: &'r ByteRecord,
last_start: usize,
last_end: usize,
i_forward: usize,
i_reverse: usize,
}
impl<'r> IntoIterator for &'r ByteRecord {
type IntoIter = ByteRecordIter<'r>;
type Item = &'r [u8];
#[inline]
fn into_iter(self) -> ByteRecordIter<'r> {
ByteRecordIter {
r: self,
last_start: self.as_slice().len(),
last_end: 0,
i_forward: 0,
i_reverse: self.len(),
}
}
}
impl<'r> ExactSizeIterator for ByteRecordIter<'r> {}
impl<'r> Iterator for ByteRecordIter<'r> {
type Item = &'r [u8];
#[inline]
fn next(&mut self) -> Option<&'r [u8]> {
if self.i_forward == self.i_reverse {
None
} else {
let start = self.last_end;
let end = self.r.0.bounds.ends()[self.i_forward];
self.i_forward += 1;
self.last_end = end;
Some(&self.r.0.fields[start..end])
}
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let x = self.i_reverse - self.i_forward;
(x, Some(x))
}
#[inline]
fn count(self) -> usize {
self.len()
}
}
impl<'r> DoubleEndedIterator for ByteRecordIter<'r> {
#[inline]
fn next_back(&mut self) -> Option<&'r [u8]> {
if self.i_forward == self.i_reverse {
None
} else {
self.i_reverse -= 1;
let start = self
.i_reverse
.checked_sub(1)
.map(|i| self.r.0.bounds.ends()[i])
.unwrap_or(0);
let end = self.last_start;
self.last_start = start;
Some(&self.r.0.fields[start..end])
}
}
}
#[cfg(test)]
mod tests {
use crate::string_record::StringRecord;
use super::ByteRecord;
fn b(s: &str) -> &[u8] {
s.as_bytes()
}
#[test]
fn record_1() {
let mut rec = ByteRecord::new();
rec.push_field(b"foo");
assert_eq!(rec.len(), 1);
assert_eq!(rec.get(0), Some(b("foo")));
assert_eq!(rec.get(1), None);
assert_eq!(rec.get(2), None);
}
#[test]
fn record_2() {
let mut rec = ByteRecord::new();
rec.push_field(b"foo");
rec.push_field(b"quux");
assert_eq!(rec.len(), 2);
assert_eq!(rec.get(0), Some(b("foo")));
assert_eq!(rec.get(1), Some(b("quux")));
assert_eq!(rec.get(2), None);
assert_eq!(rec.get(3), None);
}
#[test]
fn empty_record() {
let rec = ByteRecord::new();
assert_eq!(rec.len(), 0);
assert_eq!(rec.get(0), None);
assert_eq!(rec.get(1), None);
}
#[test]
fn trim_whitespace_only() {
let mut rec = ByteRecord::from(vec![b" \t\n\r\x0c"]);
rec.trim();
assert_eq!(rec.get(0), Some(b("")));
}
#[test]
fn trim_front() {
let mut rec = ByteRecord::from(vec![b" abc"]);
rec.trim();
assert_eq!(rec.get(0), Some(b("abc")));
let mut rec = ByteRecord::from(vec![b(" abc"), b(" xyz")]);
rec.trim();
assert_eq!(rec.get(0), Some(b("abc")));
assert_eq!(rec.get(1), Some(b("xyz")));
}
#[test]
fn trim_back() {
let mut rec = ByteRecord::from(vec![b"abc "]);
rec.trim();
assert_eq!(rec.get(0), Some(b("abc")));
let mut rec = ByteRecord::from(vec![b("abc "), b("xyz ")]);
rec.trim();
assert_eq!(rec.get(0), Some(b("abc")));
assert_eq!(rec.get(1), Some(b("xyz")));
}
#[test]
fn trim_both() {
let mut rec = ByteRecord::from(vec![b" abc "]);
rec.trim();
assert_eq!(rec.get(0), Some(b("abc")));
let mut rec = ByteRecord::from(vec![b(" abc "), b(" xyz ")]);
rec.trim();
assert_eq!(rec.get(0), Some(b("abc")));
assert_eq!(rec.get(1), Some(b("xyz")));
}
#[test]
fn trim_does_not_panic_on_empty_records_1() {
let mut rec = ByteRecord::from(vec![b""]);
rec.trim();
assert_eq!(rec.get(0), Some(b("")));
}
#[test]
fn trim_does_not_panic_on_empty_records_2() {
let mut rec = ByteRecord::from(vec![b"", b""]);
rec.trim();
assert_eq!(rec.get(0), Some(b("")));
assert_eq!(rec.get(1), Some(b("")));
}
#[test]
fn trim_does_not_panic_on_empty_records_3() {
let mut rec = ByteRecord::new();
rec.trim();
assert_eq!(rec.as_slice().len(), 0);
}
#[test]
fn empty_field_1() {
let mut rec = ByteRecord::new();
rec.push_field(b"");
assert_eq!(rec.len(), 1);
assert_eq!(rec.get(0), Some(b("")));
assert_eq!(rec.get(1), None);
assert_eq!(rec.get(2), None);
}
#[test]
fn empty_field_2() {
let mut rec = ByteRecord::new();
rec.push_field(b"");
rec.push_field(b"");
assert_eq!(rec.len(), 2);
assert_eq!(rec.get(0), Some(b("")));
assert_eq!(rec.get(1), Some(b("")));
assert_eq!(rec.get(2), None);
assert_eq!(rec.get(3), None);
}
#[test]
fn empty_surround_1() {
let mut rec = ByteRecord::new();
rec.push_field(b"foo");
rec.push_field(b"");
rec.push_field(b"quux");
assert_eq!(rec.len(), 3);
assert_eq!(rec.get(0), Some(b("foo")));
assert_eq!(rec.get(1), Some(b("")));
assert_eq!(rec.get(2), Some(b("quux")));
assert_eq!(rec.get(3), None);
assert_eq!(rec.get(4), None);
}
#[test]
fn empty_surround_2() {
let mut rec = ByteRecord::new();
rec.push_field(b"foo");
rec.push_field(b"");
rec.push_field(b"quux");
rec.push_field(b"");
assert_eq!(rec.len(), 4);
assert_eq!(rec.get(0), Some(b("foo")));
assert_eq!(rec.get(1), Some(b("")));
assert_eq!(rec.get(2), Some(b("quux")));
assert_eq!(rec.get(3), Some(b("")));
assert_eq!(rec.get(4), None);
assert_eq!(rec.get(5), None);
}
#[test]
fn utf8_error_1() {
let mut rec = ByteRecord::new();
rec.push_field(b"foo");
rec.push_field(b"b\xFFar");
let err = StringRecord::from_byte_record(rec).unwrap_err();
assert_eq!(err.utf8_error().field(), 1);
assert_eq!(err.utf8_error().valid_up_to(), 1);
}
#[test]
fn utf8_error_2() {
let mut rec = ByteRecord::new();
rec.push_field(b"\xFF");
let err = StringRecord::from_byte_record(rec).unwrap_err();
assert_eq!(err.utf8_error().field(), 0);
assert_eq!(err.utf8_error().valid_up_to(), 0);
}
#[test]
fn utf8_error_3() {
let mut rec = ByteRecord::new();
rec.push_field(b"a\xFF");
let err = StringRecord::from_byte_record(rec).unwrap_err();
assert_eq!(err.utf8_error().field(), 0);
assert_eq!(err.utf8_error().valid_up_to(), 1);
}
#[test]
fn utf8_error_4() {
let mut rec = ByteRecord::new();
rec.push_field(b"a");
rec.push_field(b"b");
rec.push_field(b"c");
rec.push_field(b"d");
rec.push_field(b"xyz\xFF");
let err = StringRecord::from_byte_record(rec).unwrap_err();
assert_eq!(err.utf8_error().field(), 4);
assert_eq!(err.utf8_error().valid_up_to(), 3);
}
#[test]
fn utf8_error_5() {
let mut rec = ByteRecord::new();
rec.push_field(b"a");
rec.push_field(b"b");
rec.push_field(b"c");
rec.push_field(b"d");
rec.push_field(b"\xFFxyz");
let err = StringRecord::from_byte_record(rec).unwrap_err();
assert_eq!(err.utf8_error().field(), 4);
assert_eq!(err.utf8_error().valid_up_to(), 0);
}
#[test]
fn utf8_error_6() {
let mut rec = ByteRecord::new();
rec.push_field(b"a\xc9");
rec.push_field(b"\x91b");
let err = StringRecord::from_byte_record(rec).unwrap_err();
assert_eq!(err.utf8_error().field(), 0);
assert_eq!(err.utf8_error().valid_up_to(), 1);
}
#[test]
fn utf8_clear_ok() {
let mut rec = ByteRecord::new();
rec.push_field(b"\xFF");
assert!(StringRecord::from_byte_record(rec).is_err());
let mut rec = ByteRecord::new();
rec.push_field(b"\xFF");
rec.clear();
assert!(StringRecord::from_byte_record(rec).is_ok());
}
#[test]
fn iter() {
let data = vec!["foo", "bar", "baz", "quux", "wat"];
let rec = ByteRecord::from(&*data);
let got: Vec<&str> =
rec.iter().map(|x| ::std::str::from_utf8(x).unwrap()).collect();
assert_eq!(data, got);
}
#[test]
fn iter_reverse() {
let mut data = vec!["foo", "bar", "baz", "quux", "wat"];
let rec = ByteRecord::from(&*data);
let got: Vec<&str> = rec
.iter()
.rev()
.map(|x| ::std::str::from_utf8(x).unwrap())
.collect();
data.reverse();
assert_eq!(data, got);
}
#[test]
fn iter_forward_and_reverse() {
let data = vec!["foo", "bar", "baz", "quux", "wat"];
let rec = ByteRecord::from(data);
let mut it = rec.iter();
assert_eq!(it.next_back(), Some(b("wat")));
assert_eq!(it.next(), Some(b("foo")));
assert_eq!(it.next(), Some(b("bar")));
assert_eq!(it.next_back(), Some(b("quux")));
assert_eq!(it.next(), Some(b("baz")));
assert_eq!(it.next_back(), None);
assert_eq!(it.next(), None);
}
#[test]
fn eq_field_boundaries() {
let test1 = ByteRecord::from(vec!["12", "34"]);
let test2 = ByteRecord::from(vec!["123", "4"]);
assert_ne!(test1, test2);
}
#[test]
fn eq_record_len() {
let test1 = ByteRecord::from(vec!["12", "34", "56"]);
let test2 = ByteRecord::from(vec!["12", "34"]);
assert_ne!(test1, test2);
}
}