use crate::{
csv_headers::{Headers, HeadersParsed},
csv_parse_result::{CsvByteRecordWithHash, CsvLeftRightParseResult, Position, RecordHash},
csv_parser_hasher::HashMapValue,
diff_row::*,
};
use ahash::AHashMap as HashMap;
use crossbeam_channel::{Receiver, Sender};
use std::{
cmp::{max, Ordering},
collections::{hash_map::IntoIter, VecDeque},
convert::TryInto,
};
use thiserror::Error;
#[derive(Debug, PartialEq, Clone)]
pub struct DiffByteRecords {
inner: Vec<DiffByteRecord>,
headers: Headers,
num_columns: Option<usize>,
}
impl DiffByteRecords {
pub(crate) fn new(
inner: Vec<DiffByteRecord>,
headers: Headers,
num_columns: Option<usize>,
) -> Self {
Self {
inner,
headers,
num_columns,
}
}
pub fn headers(&self) -> &Headers {
&self.headers
}
pub fn num_columns(&self) -> Option<usize> {
self.num_columns
}
pub fn sort_by_line(&mut self) {
self.inner.sort_by(DiffByteRecord::cmp_by_line)
}
pub fn sort_by_columns<E: Into<ColumnIdx>, I: IntoIterator<Item = E>>(
&mut self,
cols: I,
) -> Result<(), ColumnIdxError> {
let cols_to_sort = cols.into_iter().map(|e| e.into()).collect::<Vec<_>>();
let mut error_maybe: Result<(), ColumnIdxError> = Ok(());
if !cols_to_sort.is_empty() {
self.inner.sort_by(|a, b| match (a, b) {
(DiffByteRecord::Add(add_l), DiffByteRecord::Add(add_r)) => cols_to_sort
.iter()
.find_map(|col_idx| {
match (add_l, add_r)
.cmp_by_col(col_idx)
.map(|ord| (!ord.is_eq()).then_some(ord))
{
Ok(ord) => ord,
Err(e) => {
if error_maybe.is_ok() {
error_maybe = Err(e);
}
None
}
}
})
.unwrap_or(Ordering::Equal),
(
DiffByteRecord::Add(left),
DiffByteRecord::Modify {
delete: mod_del,
add: mod_add,
field_indices: _field_indices,
},
) => cols_to_sort
.iter()
.find_map(|col_idx| {
match (left, mod_del)
.cmp_by_col(col_idx)
.and_then(|ord| match ord {
Ordering::Equal => (left, mod_add)
.cmp_by_col(col_idx)
.map(|ord| (!ord.is_eq()).then_some(ord)),
_ => Ok(Some(ord)),
}) {
Ok(ord) => ord,
Err(e) => {
if error_maybe.is_ok() {
error_maybe = Err(e);
}
None
}
}
})
.unwrap_or(Ordering::Greater),
(DiffByteRecord::Add(add), DiffByteRecord::Delete(del)) => cols_to_sort
.iter()
.find_map(|col_idx| {
match (add, del)
.cmp_by_col(col_idx)
.map(|ord| (!ord.is_eq()).then_some(ord))
{
Ok(ord) => ord,
Err(e) => {
if error_maybe.is_ok() {
error_maybe = Err(e);
}
None
}
}
})
.unwrap_or(Ordering::Greater),
(
DiffByteRecord::Modify {
delete: mod_del,
add: mod_add,
field_indices: _field_indices,
},
DiffByteRecord::Add(add),
) => cols_to_sort
.iter()
.find_map(|col_idx| {
match (mod_del, add)
.cmp_by_col(col_idx)
.and_then(|ord| match ord {
Ordering::Equal => (mod_add, add)
.cmp_by_col(col_idx)
.map(|ord| (!ord.is_eq()).then_some(ord)),
_ => Ok(Some(ord)),
}) {
Ok(ord) => ord,
Err(e) => {
if error_maybe.is_ok() {
error_maybe = Err(e);
}
None
}
}
})
.unwrap_or(Ordering::Less),
(
DiffByteRecord::Modify {
delete: delete_l,
add: add_l,
field_indices: _field_indices_l,
},
DiffByteRecord::Modify {
delete: delete_r,
add: add_r,
field_indices: _field_indices_r,
},
) => cols_to_sort
.iter()
.find_map(|col_idx| {
match (delete_l, delete_r)
.cmp_by_col(col_idx)
.and_then(|ord| match ord {
Ordering::Equal => (add_l, add_r)
.cmp_by_col(col_idx)
.map(|ord| (!ord.is_eq()).then_some(ord)),
_ => Ok(Some(ord)),
}) {
Ok(ord) => ord,
Err(e) => {
if error_maybe.is_ok() {
error_maybe = Err(e);
}
None
}
}
})
.unwrap_or(Ordering::Equal),
(
DiffByteRecord::Modify {
delete: mod_del,
add: mod_add,
field_indices: _field_indices,
},
DiffByteRecord::Delete(del),
) => cols_to_sort
.iter()
.find_map(|col_idx| {
match (mod_del, del)
.cmp_by_col(col_idx)
.and_then(|ord| match ord {
Ordering::Equal => (mod_add, del)
.cmp_by_col(col_idx)
.map(|ord| (!ord.is_eq()).then_some(ord)),
_ => Ok(Some(ord)),
}) {
Ok(ord) => ord,
Err(e) => {
if error_maybe.is_ok() {
error_maybe = Err(e);
}
None
}
}
})
.unwrap_or(Ordering::Greater),
(DiffByteRecord::Delete(del), DiffByteRecord::Add(add)) => cols_to_sort
.iter()
.find_map(|col_idx| {
match (del, add)
.cmp_by_col(col_idx)
.map(|ord| (!ord.is_eq()).then_some(ord))
{
Ok(ord) => ord,
Err(e) => {
if error_maybe.is_ok() {
error_maybe = Err(e);
}
None
}
}
})
.unwrap_or(Ordering::Less),
(
DiffByteRecord::Delete(del),
DiffByteRecord::Modify {
delete: mod_del,
add: mod_add,
field_indices: _field_indices,
},
) => cols_to_sort
.iter()
.find_map(|col_idx| {
match (del, mod_del)
.cmp_by_col(col_idx)
.and_then(|ord| match ord {
Ordering::Equal => (del, mod_add)
.cmp_by_col(col_idx)
.map(|ord| (!ord.is_eq()).then_some(ord)),
_ => Ok(Some(ord)),
}) {
Ok(ord) => ord,
Err(e) => {
if error_maybe.is_ok() {
error_maybe = Err(e);
}
None
}
}
})
.unwrap_or(Ordering::Less),
(DiffByteRecord::Delete(del_l), DiffByteRecord::Delete(del_r)) => cols_to_sort
.iter()
.find_map(|col_idx| {
match (del_l, del_r)
.cmp_by_col(col_idx)
.map(|ord| (!ord.is_eq()).then_some(ord))
{
Ok(ord) => ord,
Err(e) => {
if error_maybe.is_ok() {
error_maybe = Err(e);
}
None
}
}
})
.unwrap_or(Ordering::Equal),
});
}
error_maybe
}
#[cfg_attr(
feature = "rayon-threads",
doc = r##"
use csv_diff::{csv_diff::CsvByteDiffLocal, csv::Csv};
use std::collections::HashSet;
use std::iter::FromIterator;
# fn main() -> Result<(), Box<dyn std::error::Error>> {
// some csv data with a header, where the first column is a unique id
let csv_data_left = "id,name,kind\n\
1,lemon,fruit\n\
2,strawberry,fruit";
let csv_data_right = "id,name,kind\n\
1,lemon,fruit\n\
2,strawberry,nut\n\
3,cherry,fruit";
let csv_byte_diff = CsvByteDiffLocal::new()?;
let mut diff_byte_records = csv_byte_diff.diff(
Csv::with_reader_seek(csv_data_left.as_bytes()),
Csv::with_reader_seek(csv_data_right.as_bytes()),
)?;
let diff_byte_record_slice = diff_byte_records.as_slice();
assert_eq!(
diff_byte_record_slice.len(),
2
);
Ok(())
# }
"##
)]
pub fn as_slice(&self) -> &[DiffByteRecord] {
self.inner.as_slice()
}
pub fn iter(&self) -> core::slice::Iter<'_, DiffByteRecord> {
self.inner.iter()
}
}
trait CmpByColumn {
fn cmp_by_col(&self, col_idx: &ColumnIdx) -> Result<Ordering, ColumnIdxError>;
}
impl CmpByColumn for (&ByteRecordLineInfo, &ByteRecordLineInfo) {
#[inline]
fn cmp_by_col(&self, col_idx: &ColumnIdx) -> Result<Ordering, ColumnIdxError> {
let idx_for_both = col_idx
.idx_for_both()
.expect("idx, because it is the only enum variant");
let &(brli_left, brli_right) = self;
brli_left
.byte_record()
.get(idx_for_both)
.zip(brli_right.byte_record().get(idx_for_both))
.map(|(a, b)| a.cmp(b))
.ok_or(ColumnIdxError::IdxOutOfBounds {
idx: idx_for_both,
len: brli_left.byte_record().len(),
})
}
}
pub enum ColumnIdx {
IdxForBoth(usize),
}
impl ColumnIdx {
#[inline]
fn idx_for_both(&self) -> Option<usize> {
match self {
&Self::IdxForBoth(idx) => Some(idx),
}
}
}
impl From<usize> for ColumnIdx {
fn from(value: usize) -> Self {
Self::IdxForBoth(value)
}
}
#[derive(Debug, Error, PartialEq)]
pub enum ColumnIdxError {
#[error("the column index `{idx}` exceeds the total number of columns ({len})")]
IdxOutOfBounds { idx: usize, len: usize },
}
impl IntoIterator for DiffByteRecords {
type Item = DiffByteRecord;
type IntoIter = DiffByteRecordsIntoIterator;
fn into_iter(self) -> Self::IntoIter {
let num_columns = self.num_columns();
DiffByteRecordsIntoIterator {
inner: self.inner.into_iter(),
headers: self.headers,
num_columns,
}
}
}
pub struct DiffByteRecordsIntoIterator {
inner: std::vec::IntoIter<DiffByteRecord>,
headers: Headers,
num_columns: Option<usize>,
}
impl Iterator for DiffByteRecordsIntoIterator {
type Item = DiffByteRecord;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next()
}
}
impl DiffByteRecordsIntoIterator {
pub fn headers(&self) -> &Headers {
&self.headers
}
pub fn num_columns(&self) -> Option<usize> {
self.num_columns
}
}
pub(crate) type CsvHashValueMap = HashMap<u128, HashMapValue<Position, RecordHash>>;
pub(crate) type CsvByteRecordValueMap = HashMap<u128, HashMapValue<csv::ByteRecord>>;
struct MaxCapacityThreshold(usize);
impl MaxCapacityThreshold {
#[inline]
fn value(&self) -> usize {
self.0
}
fn calc_new(&mut self, current_line: u64) {
if current_line % 100 == 0 {
self.0 = max(
10,
(current_line / 100)
.try_into()
.unwrap_or(usize::MAX),
);
}
}
}
pub struct DiffByteRecordsIterator {
buf: VecDeque<csv::Result<DiffByteRecord>>,
headers: HeadersParsed,
num_columns: Option<usize>,
csv_left_right_parse_results: std::iter::Chain<
std::vec::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
crossbeam_channel::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
>,
csv_records_left_map: CsvByteRecordValueMap,
csv_records_left_map_iter: Option<IntoIter<u128, HashMapValue<csv::ByteRecord>>>,
csv_records_right_map: CsvByteRecordValueMap,
csv_records_right_map_iter: Option<IntoIter<u128, HashMapValue<csv::ByteRecord>>>,
max_capacity_left_map: MaxCapacityThreshold,
max_capacity_right_map: MaxCapacityThreshold,
sender_csv_records_recycle: Sender<csv::ByteRecord>,
}
impl DiffByteRecordsIterator {
pub(crate) fn new(
csv_left_right_parse_results: std::iter::Chain<
std::vec::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
crossbeam_channel::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
>,
sender_csv_records_recycle: Sender<csv::ByteRecord>,
headers: HeadersParsed,
num_columns: Option<usize>,
) -> Self {
Self {
buf: Default::default(),
headers,
num_columns,
csv_left_right_parse_results,
csv_records_left_map: HashMap::new(),
csv_records_left_map_iter: None,
csv_records_right_map: HashMap::new(),
csv_records_right_map_iter: None,
max_capacity_left_map: MaxCapacityThreshold(10),
max_capacity_right_map: MaxCapacityThreshold(10),
sender_csv_records_recycle,
}
}
pub fn headers(&self) -> &HeadersParsed {
&self.headers
}
pub fn num_columns(&self) -> Option<usize> {
self.num_columns
}
pub fn try_to_diff_byte_records(mut self) -> csv::Result<DiffByteRecords> {
let num_cols = self.num_columns();
let headers_parsed = std::mem::take(&mut self.headers);
let headers: Headers = headers_parsed.try_into()?;
let diff_records = self.collect::<csv::Result<_>>()?;
Ok(DiffByteRecords::new(diff_records, headers, num_cols))
}
}
impl Iterator for DiffByteRecordsIterator {
type Item = csv::Result<DiffByteRecord>;
fn next(&mut self) -> Option<Self::Item> {
if !self.buf.is_empty() {
return self.buf.pop_front();
}
for csv_left_right_parse_result in &mut self.csv_left_right_parse_results {
match csv_left_right_parse_result {
CsvLeftRightParseResult::Left(CsvByteRecordWithHash {
byte_record: Ok(byte_record_left),
record_hash: record_hash_left,
}) => {
let byte_record_left_line =
byte_record_left.position().map_or(0, |pos| pos.line());
match self.csv_records_right_map.get_mut(&record_hash_left.key) {
Some(hash_map_val) => {
if let HashMapValue::Initial(record_hash_right, byte_record_right) =
hash_map_val
{
if record_hash_left.record_hash != *record_hash_right {
*hash_map_val = HashMapValue::Modified(
byte_record_left,
std::mem::take(byte_record_right),
);
} else {
*hash_map_val = HashMapValue::Equal(
byte_record_left,
std::mem::take(byte_record_right),
);
}
}
}
None => {
self.csv_records_left_map.insert(
record_hash_left.key,
HashMapValue::Initial(
record_hash_left.record_hash,
byte_record_left,
),
);
}
}
if self.max_capacity_right_map.value() > 0
&& byte_record_left_line % self.max_capacity_right_map.value() as u64 == 0
{
self.max_capacity_right_map.calc_new(byte_record_left_line);
for (_k, v) in self
.csv_records_right_map
.extract_if(|_k, v| !matches!(v, HashMapValue::Initial(..)))
{
match v {
HashMapValue::Equal(byte_record_left, byte_record_right) => {
let _ = self.sender_csv_records_recycle.send(byte_record_left);
let _ = self.sender_csv_records_recycle.send(byte_record_right);
}
HashMapValue::Modified(left_byte_record, right_byte_record) => {
let fields_modified = left_byte_record
.iter()
.enumerate()
.zip(right_byte_record.iter())
.fold(
Vec::new(),
|mut acc, ((idx, field_left), field_right)| {
if field_left != field_right {
acc.push(idx);
}
acc
},
);
let left_byte_record_line = left_byte_record
.position()
.expect("a record position")
.line();
let right_byte_record_line = right_byte_record
.position()
.expect("a record position")
.line();
self.buf.push_back(Ok(DiffByteRecord::Modify {
add: ByteRecordLineInfo::new(
right_byte_record,
right_byte_record_line,
),
delete: ByteRecordLineInfo::new(
left_byte_record,
left_byte_record_line,
),
field_indices: fields_modified,
}));
}
HashMapValue::Initial(..) => {
unreachable!("reached a hashmap value that shouldn't be there")
}
}
}
if !self.buf.is_empty() {
break;
}
}
}
CsvLeftRightParseResult::Left(CsvByteRecordWithHash {
byte_record: Err(byte_record_left_err),
..
}) => {
self.buf.push_back(Err(byte_record_left_err));
break;
}
CsvLeftRightParseResult::Right(CsvByteRecordWithHash {
byte_record: Ok(byte_record_right),
record_hash: record_hash_right,
}) => {
let byte_record_right_line =
byte_record_right.position().map_or(0, |pos| pos.line());
match self.csv_records_left_map.get_mut(&record_hash_right.key) {
Some(hash_map_val) => {
if let HashMapValue::Initial(record_hash_left, byte_record_left) =
hash_map_val
{
if *record_hash_left != record_hash_right.record_hash {
*hash_map_val = HashMapValue::Modified(
std::mem::take(byte_record_left),
byte_record_right,
);
} else {
*hash_map_val = HashMapValue::Equal(
std::mem::take(byte_record_left),
byte_record_right,
);
}
}
}
None => {
self.csv_records_right_map.insert(
record_hash_right.key,
HashMapValue::Initial(
record_hash_right.record_hash,
byte_record_right,
),
);
}
}
if self.max_capacity_left_map.value() > 0
&& byte_record_right_line % self.max_capacity_left_map.value() as u64 == 0
{
self.max_capacity_left_map.calc_new(byte_record_right_line);
for (_k, v) in self
.csv_records_left_map
.extract_if(|_k, v| !matches!(v, HashMapValue::Initial(..)))
{
match v {
HashMapValue::Equal(byte_record_left, byte_record_right) => {
let _ = self.sender_csv_records_recycle.send(byte_record_left);
let _ = self.sender_csv_records_recycle.send(byte_record_right);
}
HashMapValue::Modified(left_byte_record, right_byte_record) => {
let fields_modified = left_byte_record
.iter()
.enumerate()
.zip(right_byte_record.iter())
.fold(
Vec::new(),
|mut acc, ((idx, field_left), field_right)| {
if field_left != field_right {
acc.push(idx);
}
acc
},
);
let left_byte_record_line = left_byte_record
.position()
.expect("a record position")
.line();
let right_byte_record_line = right_byte_record
.position()
.expect("a record position")
.line();
self.buf.push_back(Ok(DiffByteRecord::Modify {
add: ByteRecordLineInfo::new(
right_byte_record,
right_byte_record_line,
),
delete: ByteRecordLineInfo::new(
left_byte_record,
left_byte_record_line,
),
field_indices: fields_modified,
}));
}
HashMapValue::Initial(..) => {
unreachable!("reached a hashmap value that shouldn't be there")
}
}
}
if !self.buf.is_empty() {
break;
}
}
}
CsvLeftRightParseResult::Right(CsvByteRecordWithHash {
byte_record: Err(e),
..
}) => {
self.buf.push_back(Err(e));
break;
}
}
}
if !self.buf.is_empty() {
return self.buf.pop_front();
}
let iter_left_map = self
.csv_records_left_map_iter
.get_or_insert(std::mem::take(&mut self.csv_records_left_map).into_iter());
let iter_left_map =
iter_left_map.skip_while(|(_, v)| matches!(v, HashMapValue::Equal(_, _)));
if let Some(value) = get_next_diff(iter_left_map, DiffByteRecord::Delete) {
return value;
}
let iter_right_map = self
.csv_records_right_map_iter
.get_or_insert(std::mem::take(&mut self.csv_records_right_map).into_iter());
let iter_right_map =
iter_right_map.skip_while(|(_, v)| matches!(v, HashMapValue::Equal(_, _)));
if let Some(value) = get_next_diff(iter_right_map, DiffByteRecord::Add) {
return value;
}
None
}
}
#[inline]
fn get_next_diff<I: Iterator<Item = (u128, HashMapValue<csv::ByteRecord>)>>(
mut iter_map: I,
diff_byte_record_add_or_delete: fn(ByteRecordLineInfo) -> DiffByteRecord,
) -> Option<Option<Result<DiffByteRecord, csv::Error>>> {
match iter_map.next() {
Some((_, HashMapValue::Initial(_hash, byte_record))) => {
let line = byte_record.position().expect("a record position").line();
return Some(Some(Ok(diff_byte_record_add_or_delete(
ByteRecordLineInfo::new(byte_record, line),
))));
}
Some((_, HashMapValue::Modified(left_byte_record, right_byte_record))) => {
let fields_modified = left_byte_record
.iter()
.enumerate()
.zip(right_byte_record.iter())
.fold(Vec::new(), |mut acc, ((idx, field_left), field_right)| {
if field_left != field_right {
acc.push(idx);
}
acc
});
let left_byte_record_line = left_byte_record
.position()
.expect("a record position")
.line();
let right_byte_record_line = right_byte_record
.position()
.expect("a record position")
.line();
return Some(Some(Ok(DiffByteRecord::Modify {
add: ByteRecordLineInfo::new(right_byte_record, right_byte_record_line),
delete: ByteRecordLineInfo::new(left_byte_record, left_byte_record_line),
field_indices: fields_modified,
})));
}
_ => (),
}
None
}
pub(crate) struct DiffByteRecordFirstRow {
csv_left_right_parse_results: Receiver<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
sender_csv_records_recycle: Sender<csv::ByteRecord>,
headers: HeadersParsed,
}
impl DiffByteRecordFirstRow {
pub(crate) fn new(
csv_left_right_parse_results: Receiver<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
sender_csv_records_recycle: Sender<csv::ByteRecord>,
headers: HeadersParsed,
) -> Self {
Self {
csv_left_right_parse_results,
sender_csv_records_recycle,
headers,
}
}
pub(crate) fn into_diff_byte_record_iter(self) -> DiffByteRecordsIterator {
let parse_result = &self.csv_left_right_parse_results;
let (num_cols, first_few) = match self.headers.max_num_cols() {
nc @ Some(_) => (nc, Default::default()),
None => match (parse_result.recv(), parse_result.recv()) {
(Ok(csv_left_right_parse_result_first), Ok(csv_left_right_parse_result_second)) => {
let len_first = csv_left_right_parse_result_first
.byte_record_result()
.map(|csv| csv.len())
.ok();
let len_second = csv_left_right_parse_result_second
.byte_record_result()
.map(|csv| csv.len())
.ok();
(
max(len_first, len_second),
vec![
csv_left_right_parse_result_first,
csv_left_right_parse_result_second,
],
)
}
(Ok(csv_left_right_parse_result), Err(_))
| (Err(_), Ok(csv_left_right_parse_result)) => {
let num_cols = csv_left_right_parse_result
.byte_record_result()
.map(|csv| csv.len())
.ok();
(num_cols, vec![csv_left_right_parse_result])
}
(Err(_), Err(_)) => {
Default::default()
}
},
};
DiffByteRecordsIterator::new(
first_few
.into_iter()
.chain(self.csv_left_right_parse_results),
self.sender_csv_records_recycle,
self.headers,
num_cols,
)
}
}
trait ByteRecordResultFromParseResult {
fn byte_record_result(&self) -> Result<&csv::ByteRecord, &csv::Error>;
}
impl ByteRecordResultFromParseResult for CsvLeftRightParseResult<CsvByteRecordWithHash> {
fn byte_record_result(&self) -> Result<&csv::ByteRecord, &csv::Error> {
match self {
CsvLeftRightParseResult::Left(CsvByteRecordWithHash { byte_record, .. })
| CsvLeftRightParseResult::Right(CsvByteRecordWithHash { byte_record, .. }) => {
byte_record.as_ref()
}
}
}
}
#[cfg(test)]
mod tests {
use crate::{
diff_result::{ColumnIdx, ColumnIdxError},
diff_row::{ByteRecordLineInfo, DiffByteRecord},
};
use pretty_assertions::assert_eq;
use std::error::Error;
use super::DiffByteRecords;
#[test]
fn sort_by_line_delete_then_add_already_sorted() -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "_", "_"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "_", "_"]),
4,
)),
],
Default::default(),
None,
);
let expected = diff_records.clone();
diff_records.sort_by_line();
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_line_delete_then_add_not_sorted() -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "_", "_"]),
4,
)),
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "_", "_"]),
3,
)),
],
Default::default(),
None,
);
let expected = vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "_", "_"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "_", "_"]),
4,
)),
];
diff_records.sort_by_line();
assert_eq!(diff_records.as_slice(), expected);
Ok(())
}
#[test]
fn sort_by_line_modify_not_sorted_simple_one_sided() -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
field_indices: vec![],
},
],
Default::default(),
None,
);
let expected = vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
field_indices: vec![],
},
];
diff_records.sort_by_line();
assert_eq!(diff_records.as_slice(), expected);
Ok(())
}
#[test]
fn sort_by_line_modify_lines_equal_on_opposite_side_prefer_smaller_delete_side_first(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
field_indices: vec![],
},
],
Default::default(),
None,
);
let expected = vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
field_indices: vec![],
},
];
diff_records.sort_by_line();
assert_eq!(diff_records.as_slice(), expected);
Ok(())
}
#[test]
fn sort_by_line_modify_sum_lines_equal_minimum_on_add_side_prefer_smaller_add_side_first(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 4),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
field_indices: vec![],
},
],
Default::default(),
None,
);
let expected = vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 4),
field_indices: vec![],
},
];
diff_records.sort_by_line();
assert_eq!(diff_records.as_slice(), expected);
Ok(())
}
#[test]
fn sort_by_line_modify_not_sort_by_sum_lines_but_by_smallest() -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 3),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
field_indices: vec![],
},
],
Default::default(),
None,
);
let expected = vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 3),
field_indices: vec![],
},
];
diff_records.sort_by_line();
assert_eq!(diff_records.as_slice(), expected);
Ok(())
}
#[test]
fn sort_by_line_modify_complex_interleaved() -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
field_indices: vec![],
},
],
Default::default(),
None,
);
let expected = vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
field_indices: vec![],
},
];
diff_records.sort_by_line();
assert_eq!(diff_records.as_slice(), expected);
Ok(())
}
#[test]
fn sort_by_col_selection_of_cols_is_empty_order_does_not_change() -> Result<(), Box<dyn Error>>
{
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["d", "e", "f"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["a", "b", "c"]),
4,
)),
],
Default::default(),
None,
);
let expected = diff_records.clone();
diff_records.sort_by_columns::<ColumnIdx, _>(vec![])?;
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_all_equal_delete_before_add_order_does_not_change() -> Result<(), Box<dyn Error>>
{
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["a", "x", "y"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["a", "b", "c"]),
4,
)),
],
Default::default(),
None,
);
let expected = diff_records.clone();
diff_records.sort_by_columns(vec![0])?;
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_second_col_a_in_add_is_less_than_b_in_modify_delete() -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "b", "_"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "a", "_"]),
4,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![1])?;
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "a", "_"]),
4,
)),
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "b", "_"]),
3,
)),
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_certain_col_idx_twice_is_ok() -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["az", "_", "_"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["a", "_", "_"]),
4,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0, 0])?;
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["a", "_", "_"]),
4,
)),
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["az", "_", "_"]),
3,
)),
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_first_and_second_col_first_col_val_is_equal_so_second_col_decides_order(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["x", "b", "_"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["x", "a", "_"]),
4,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0, 1])?;
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["x", "a", "_"]),
4,
)),
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["x", "b", "_"]),
3,
)),
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_first_second_and_third_col_first_and_second_col_val_is_equal_so_third_col_decides_order(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["x", "a", "z"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["x", "a", "i"]),
4,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0, 1, 2])?;
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["x", "a", "i"]),
4,
)),
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["x", "a", "z"]),
3,
)),
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_first_second_and_third_col_back_to_front_third_and_second_col_val_is_equal_so_first_col_decides_order(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["2", "a", "z"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["1", "a", "z"]),
4,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![2, 1, 0])?;
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["1", "a", "z"]),
4,
)),
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["2", "a", "z"]),
3,
)),
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_delete_must_be_smaller_than_add_when_otherwise_identical(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["same", "_", "_"]),
4,
)),
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["same", "_", "_"]),
5,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0])?;
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["same", "_", "_"]),
5,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["same", "_", "_"]),
4,
)),
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_with_three_items_first_and_second_by_first_col_second_and_third_by_second_col(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["1", "b", "_"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["1", "a", "_"]),
4,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["0", "a", "_"]),
4,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0, 1])?;
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["0", "a", "_"]),
4,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["1", "a", "_"]),
4,
)),
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["1", "b", "_"]),
3,
)),
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_delete_compared_with_modify_delete() -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["b", "_", "_"]),
4,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0])?;
let expected = diff_records.clone();
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_delete_compared_with_modify_delete_are_equal_fall_back_to_compare_with_modify_add(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["c", "_", "_"]),
4,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0])?;
let expected = diff_records.clone();
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_delete_must_be_smaller_than_modify_when_otherwise_identical(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["c", "_", "_"]),
4,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0])?;
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["c", "_", "_"]),
4,
)),
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
field_indices: vec![],
},
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_modify_delete_compared_with_add() -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["b", "_", "_"]),
4,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0])?;
let expected = diff_records.clone();
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_add_compared_with_modify_delete() -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["b", "_", "_"]),
4,
)),
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
field_indices: vec![],
},
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0])?;
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["b", "_", "_"]),
4,
)),
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_modify_delete_compared_with_add_are_equal_fall_back_to_compare_with_modify_add(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["a", "_", "_"]),
4,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0])?;
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["a", "_", "_"]),
4,
)),
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
field_indices: vec![],
},
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_add_must_be_greater_than_modify_when_otherwise_identical(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["c", "_", "_"]),
4,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0])?;
let expected = diff_records.clone();
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_modify_delete_compared_with_modify_delete() -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["d", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
field_indices: vec![],
},
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0])?;
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["d", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
field_indices: vec![],
},
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_modify_delete_compared_with_modify_delete_are_equal_fall_back_to_compare_modify_add_with_modify_add(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
field_indices: vec![],
},
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0])?;
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
field_indices: vec![],
},
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_modify_cmp_with_add_cmp_with_modify_cmp_with_delete(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["a", "_", "_"]),
4,
)),
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["a", "_", "_"]),
4,
)),
],
Default::default(),
None,
);
diff_records.sort_by_columns(vec![0])?;
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["a", "_", "_"]),
4,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["a", "_", "_"]),
4,
)),
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
field_indices: vec![],
},
DiffByteRecord::Modify {
delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
field_indices: vec![],
},
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_idx_out_of_bounds_err() -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["a", "b", "c"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["a", "x", "y"]),
4,
)),
],
Default::default(),
None,
);
let res = diff_records.sort_by_columns(vec![3]);
assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
Ok(())
}
#[test]
fn sort_by_col_first_idx_ok_and_cmp_as_equal_second_idx_out_of_bounds_err_order_stays_the_same(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "same", "_"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "same", "_"]),
4,
)),
],
Default::default(),
None,
);
let res = diff_records.sort_by_columns(vec![1, 3]);
assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
let expected = diff_records.clone();
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_first_idx_ok_and_cmp_not_equal_second_idx_out_of_bounds_but_no_err_because_first_idx_already_sorted(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "b", "_"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "a", "_"]),
4,
)),
],
Default::default(),
None,
);
let res = diff_records.sort_by_columns(vec![1, 3]);
assert_eq!(res, Ok(()));
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "a", "_"]),
4,
)),
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "b", "_"]),
3,
)),
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_first_idx_out_of_bounds_err_second_idx_ok_sort_by_second_idx(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "b", "_"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "a", "_"]),
4,
)),
],
Default::default(),
None,
);
let res = diff_records.sort_by_columns(vec![3, 1]);
assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "a", "_"]),
4,
)),
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "b", "_"]),
3,
)),
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
#[test]
fn sort_by_col_first_idx_out_of_bounds_err_second_idx_ok_third_idx_out_of_bounds_sort_by_second_idx(
) -> Result<(), Box<dyn Error>> {
let mut diff_records = DiffByteRecords::new(
vec![
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "b", "_"]),
3,
)),
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "a", "_"]),
4,
)),
],
Default::default(),
None,
);
let res = diff_records.sort_by_columns(vec![3, 1, 4]);
assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
let expected = DiffByteRecords::new(
vec![
DiffByteRecord::Add(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "a", "_"]),
4,
)),
DiffByteRecord::Delete(ByteRecordLineInfo::new(
csv::ByteRecord::from(vec!["_", "b", "_"]),
3,
)),
],
Default::default(),
None,
);
assert_eq!(diff_records, expected);
Ok(())
}
}