1use crate::{
2 csv_headers::{Headers, HeadersParsed},
3 csv_parse_result::{CsvByteRecordWithHash, CsvLeftRightParseResult, Position, RecordHash},
4 csv_parser_hasher::HashMapValue,
5 diff_row::*,
6};
7use ahash::AHashMap as HashMap;
8use crossbeam_channel::{Receiver, Sender};
9use std::{
10 cmp::{max, Ordering},
11 collections::{hash_map::IntoIter, VecDeque},
12 convert::TryInto,
13};
14use thiserror::Error;
15
16#[derive(Debug, PartialEq, Clone)]
25pub struct DiffByteRecords {
26 inner: Vec<DiffByteRecord>,
27 headers: Headers,
28 num_columns: Option<usize>,
29}
30
31impl DiffByteRecords {
32 pub(crate) fn new(
33 inner: Vec<DiffByteRecord>,
34 headers: Headers,
35 num_columns: Option<usize>,
36 ) -> Self {
37 Self {
38 inner,
39 headers,
40 num_columns,
41 }
42 }
43
44 pub fn headers(&self) -> &Headers {
45 &self.headers
46 }
47
48 pub fn num_columns(&self) -> Option<usize> {
49 self.num_columns
50 }
51
52 pub fn sort_by_line(&mut self) {
57 self.inner.sort_by(DiffByteRecord::cmp_by_line)
58 }
59
60 pub fn sort_by_columns<E: Into<ColumnIdx>, I: IntoIterator<Item = E>>(
63 &mut self,
64 cols: I,
65 ) -> Result<(), ColumnIdxError> {
66 let cols_to_sort = cols.into_iter().map(|e| e.into()).collect::<Vec<_>>();
67 let mut error_maybe: Result<(), ColumnIdxError> = Ok(());
68 if !cols_to_sort.is_empty() {
69 self.inner.sort_by(|a, b| match (a, b) {
70 (DiffByteRecord::Add(add_l), DiffByteRecord::Add(add_r)) => cols_to_sort
71 .iter()
72 .find_map(|col_idx| {
73 match (add_l, add_r)
74 .cmp_by_col(col_idx)
75 .map(|ord| (!ord.is_eq()).then_some(ord))
76 {
77 Ok(ord) => ord,
78 Err(e) => {
79 if error_maybe.is_ok() {
80 error_maybe = Err(e);
81 }
82 None
83 }
84 }
85 })
86 .unwrap_or(Ordering::Equal),
87 (
88 DiffByteRecord::Add(left),
89 DiffByteRecord::Modify {
90 delete: mod_del,
91 add: mod_add,
92 field_indices: _field_indices,
93 },
94 ) => cols_to_sort
95 .iter()
96 .find_map(|col_idx| {
97 match (left, mod_del)
98 .cmp_by_col(col_idx)
99 .and_then(|ord| match ord {
100 Ordering::Equal => (left, mod_add)
101 .cmp_by_col(col_idx)
102 .map(|ord| (!ord.is_eq()).then_some(ord)),
103 _ => Ok(Some(ord)),
104 }) {
105 Ok(ord) => ord,
106 Err(e) => {
107 if error_maybe.is_ok() {
108 error_maybe = Err(e);
109 }
110 None
111 }
112 }
113 })
114 .unwrap_or(Ordering::Greater),
116 (DiffByteRecord::Add(add), DiffByteRecord::Delete(del)) => cols_to_sort
117 .iter()
118 .find_map(|col_idx| {
119 match (add, del)
120 .cmp_by_col(col_idx)
121 .map(|ord| (!ord.is_eq()).then_some(ord))
122 {
123 Ok(ord) => ord,
124 Err(e) => {
125 if error_maybe.is_ok() {
126 error_maybe = Err(e);
127 }
128 None
129 }
130 }
131 })
132 .unwrap_or(Ordering::Greater),
134 (
135 DiffByteRecord::Modify {
136 delete: mod_del,
137 add: mod_add,
138 field_indices: _field_indices,
139 },
140 DiffByteRecord::Add(add),
141 ) => cols_to_sort
142 .iter()
143 .find_map(|col_idx| {
144 match (mod_del, add)
145 .cmp_by_col(col_idx)
146 .and_then(|ord| match ord {
147 Ordering::Equal => (mod_add, add)
148 .cmp_by_col(col_idx)
149 .map(|ord| (!ord.is_eq()).then_some(ord)),
150 _ => Ok(Some(ord)),
151 }) {
152 Ok(ord) => ord,
153 Err(e) => {
154 if error_maybe.is_ok() {
155 error_maybe = Err(e);
156 }
157 None
158 }
159 }
160 })
161 .unwrap_or(Ordering::Less),
163 (
164 DiffByteRecord::Modify {
165 delete: delete_l,
166 add: add_l,
167 field_indices: _field_indices_l,
168 },
169 DiffByteRecord::Modify {
170 delete: delete_r,
171 add: add_r,
172 field_indices: _field_indices_r,
173 },
174 ) => cols_to_sort
175 .iter()
176 .find_map(|col_idx| {
177 match (delete_l, delete_r)
178 .cmp_by_col(col_idx)
179 .and_then(|ord| match ord {
180 Ordering::Equal => (add_l, add_r)
181 .cmp_by_col(col_idx)
182 .map(|ord| (!ord.is_eq()).then_some(ord)),
183 _ => Ok(Some(ord)),
184 }) {
185 Ok(ord) => ord,
186 Err(e) => {
187 if error_maybe.is_ok() {
188 error_maybe = Err(e);
189 }
190 None
191 }
192 }
193 })
194 .unwrap_or(Ordering::Equal),
195 (
196 DiffByteRecord::Modify {
197 delete: mod_del,
198 add: mod_add,
199 field_indices: _field_indices,
200 },
201 DiffByteRecord::Delete(del),
202 ) => cols_to_sort
203 .iter()
204 .find_map(|col_idx| {
205 match (mod_del, del)
206 .cmp_by_col(col_idx)
207 .and_then(|ord| match ord {
208 Ordering::Equal => (mod_add, del)
209 .cmp_by_col(col_idx)
210 .map(|ord| (!ord.is_eq()).then_some(ord)),
211 _ => Ok(Some(ord)),
212 }) {
213 Ok(ord) => ord,
214 Err(e) => {
215 if error_maybe.is_ok() {
216 error_maybe = Err(e);
217 }
218 None
219 }
220 }
221 })
222 .unwrap_or(Ordering::Greater),
224 (DiffByteRecord::Delete(del), DiffByteRecord::Add(add)) => cols_to_sort
225 .iter()
226 .find_map(|col_idx| {
227 match (del, add)
228 .cmp_by_col(col_idx)
229 .map(|ord| (!ord.is_eq()).then_some(ord))
230 {
231 Ok(ord) => ord,
232 Err(e) => {
233 if error_maybe.is_ok() {
234 error_maybe = Err(e);
235 }
236 None
237 }
238 }
239 })
240 .unwrap_or(Ordering::Less),
242 (
243 DiffByteRecord::Delete(del),
244 DiffByteRecord::Modify {
245 delete: mod_del,
246 add: mod_add,
247 field_indices: _field_indices,
248 },
249 ) => cols_to_sort
250 .iter()
251 .find_map(|col_idx| {
252 match (del, mod_del)
253 .cmp_by_col(col_idx)
254 .and_then(|ord| match ord {
255 Ordering::Equal => (del, mod_add)
256 .cmp_by_col(col_idx)
257 .map(|ord| (!ord.is_eq()).then_some(ord)),
258 _ => Ok(Some(ord)),
259 }) {
260 Ok(ord) => ord,
261 Err(e) => {
262 if error_maybe.is_ok() {
263 error_maybe = Err(e);
264 }
265 None
266 }
267 }
268 })
269 .unwrap_or(Ordering::Less),
271 (DiffByteRecord::Delete(del_l), DiffByteRecord::Delete(del_r)) => cols_to_sort
272 .iter()
273 .find_map(|col_idx| {
274 match (del_l, del_r)
275 .cmp_by_col(col_idx)
276 .map(|ord| (!ord.is_eq()).then_some(ord))
277 {
278 Ok(ord) => ord,
279 Err(e) => {
280 if error_maybe.is_ok() {
281 error_maybe = Err(e);
282 }
283 None
284 }
285 }
286 })
287 .unwrap_or(Ordering::Equal),
288 });
289 }
290 error_maybe
291 }
292
293 #[cfg_attr(
296 feature = "rayon-threads",
297 doc = r##"
298 use csv_diff::{csv_diff::CsvByteDiffLocal, csv::Csv};
299 use std::collections::HashSet;
300 use std::iter::FromIterator;
301 # fn main() -> Result<(), Box<dyn std::error::Error>> {
302 // some csv data with a header, where the first column is a unique id
303 let csv_data_left = "id,name,kind\n\
304 1,lemon,fruit\n\
305 2,strawberry,fruit";
306 let csv_data_right = "id,name,kind\n\
307 1,lemon,fruit\n\
308 2,strawberry,nut\n\
309 3,cherry,fruit";
310
311 let csv_byte_diff = CsvByteDiffLocal::new()?;
312
313 let mut diff_byte_records = csv_byte_diff.diff(
314 Csv::with_reader_seek(csv_data_left.as_bytes()),
315 Csv::with_reader_seek(csv_data_right.as_bytes()),
316 )?;
317
318 let diff_byte_record_slice = diff_byte_records.as_slice();
319
320 assert_eq!(
321 diff_byte_record_slice.len(),
322 2
323 );
324 Ok(())
325 # }
326 "##
327 )]
328 pub fn as_slice(&self) -> &[DiffByteRecord] {
329 self.inner.as_slice()
330 }
331
332 pub fn iter(&self) -> core::slice::Iter<'_, DiffByteRecord> {
334 self.inner.iter()
335 }
336}
337
338trait CmpByColumn {
339 fn cmp_by_col(&self, col_idx: &ColumnIdx) -> Result<Ordering, ColumnIdxError>;
340}
341
342impl CmpByColumn for (&ByteRecordLineInfo, &ByteRecordLineInfo) {
343 #[inline]
344 fn cmp_by_col(&self, col_idx: &ColumnIdx) -> Result<Ordering, ColumnIdxError> {
345 let idx_for_both = col_idx
346 .idx_for_both()
347 .expect("idx, because it is the only enum variant");
348 let &(brli_left, brli_right) = self;
349 brli_left
350 .byte_record()
351 .get(idx_for_both)
352 .zip(brli_right.byte_record().get(idx_for_both))
353 .map(|(a, b)| a.cmp(b))
354 .ok_or(ColumnIdxError::IdxOutOfBounds {
355 idx: idx_for_both,
356 len: brli_left.byte_record().len(),
357 })
358 }
359}
360
361pub enum ColumnIdx {
362 IdxForBoth(usize),
363 }
371
372impl ColumnIdx {
373 #[inline]
374 fn idx_for_both(&self) -> Option<usize> {
375 match self {
376 &Self::IdxForBoth(idx) => Some(idx),
377 }
378 }
379}
380
381impl From<usize> for ColumnIdx {
395 fn from(value: usize) -> Self {
396 Self::IdxForBoth(value)
397 }
398}
399
400#[derive(Debug, Error, PartialEq)]
401pub enum ColumnIdxError {
402 #[error("the column index `{idx}` exceeds the total number of columns ({len})")]
406 IdxOutOfBounds { idx: usize, len: usize },
407}
408
409impl IntoIterator for DiffByteRecords {
410 type Item = DiffByteRecord;
411 type IntoIter = DiffByteRecordsIntoIterator;
412
413 fn into_iter(self) -> Self::IntoIter {
414 let num_columns = self.num_columns();
415 DiffByteRecordsIntoIterator {
416 inner: self.inner.into_iter(),
417 headers: self.headers,
418 num_columns,
419 }
420 }
421}
422
423pub struct DiffByteRecordsIntoIterator {
425 inner: std::vec::IntoIter<DiffByteRecord>,
426 headers: Headers,
427 num_columns: Option<usize>,
428}
429
430impl Iterator for DiffByteRecordsIntoIterator {
431 type Item = DiffByteRecord;
432
433 fn next(&mut self) -> Option<Self::Item> {
434 self.inner.next()
435 }
436}
437
438impl DiffByteRecordsIntoIterator {
439 pub fn headers(&self) -> &Headers {
440 &self.headers
441 }
442
443 pub fn num_columns(&self) -> Option<usize> {
444 self.num_columns
445 }
446}
447
448pub(crate) type CsvHashValueMap = HashMap<u128, HashMapValue<Position, RecordHash>>;
449pub(crate) type CsvByteRecordValueMap = HashMap<u128, HashMapValue<csv::ByteRecord>>;
450
451struct MaxCapacityThreshold(usize);
452
453impl MaxCapacityThreshold {
454 #[inline]
455 fn value(&self) -> usize {
456 self.0
457 }
458 fn calc_new(&mut self, current_line: u64) {
459 if current_line % 100 == 0 {
460 self.0 = max(
461 10,
462 (current_line / 100)
463 .try_into()
464 .unwrap_or(usize::MAX),
465 );
466 }
467 }
468}
469
470pub struct DiffByteRecordsIterator {
481 buf: VecDeque<csv::Result<DiffByteRecord>>,
482 headers: HeadersParsed,
483 num_columns: Option<usize>,
484 csv_left_right_parse_results: std::iter::Chain<
485 std::vec::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
486 crossbeam_channel::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
487 >,
488 csv_records_left_map: CsvByteRecordValueMap,
489 csv_records_left_map_iter: Option<IntoIter<u128, HashMapValue<csv::ByteRecord>>>,
490 csv_records_right_map: CsvByteRecordValueMap,
491 csv_records_right_map_iter: Option<IntoIter<u128, HashMapValue<csv::ByteRecord>>>,
492 max_capacity_left_map: MaxCapacityThreshold,
493 max_capacity_right_map: MaxCapacityThreshold,
494 sender_csv_records_recycle: Sender<csv::ByteRecord>,
495}
496
497impl DiffByteRecordsIterator {
498 pub(crate) fn new(
499 csv_left_right_parse_results: std::iter::Chain<
500 std::vec::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
501 crossbeam_channel::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
502 >,
503 sender_csv_records_recycle: Sender<csv::ByteRecord>,
504 headers: HeadersParsed,
505 num_columns: Option<usize>,
506 ) -> Self {
507 Self {
508 buf: Default::default(),
509 headers,
510 num_columns,
511 csv_left_right_parse_results,
512 csv_records_left_map: HashMap::new(),
513 csv_records_left_map_iter: None,
514 csv_records_right_map: HashMap::new(),
515 csv_records_right_map_iter: None,
516 max_capacity_left_map: MaxCapacityThreshold(10),
517 max_capacity_right_map: MaxCapacityThreshold(10),
518 sender_csv_records_recycle,
519 }
520 }
521
522 pub fn headers(&self) -> &HeadersParsed {
523 &self.headers
524 }
525
526 pub fn num_columns(&self) -> Option<usize> {
535 self.num_columns
536 }
537
538 pub fn try_to_diff_byte_records(mut self) -> csv::Result<DiffByteRecords> {
539 let num_cols = self.num_columns();
540 let headers_parsed = std::mem::take(&mut self.headers);
541 let headers: Headers = headers_parsed.try_into()?;
542 let diff_records = self.collect::<csv::Result<_>>()?;
543 Ok(DiffByteRecords::new(diff_records, headers, num_cols))
544 }
545}
546
547impl Iterator for DiffByteRecordsIterator {
548 type Item = csv::Result<DiffByteRecord>;
549
550 fn next(&mut self) -> Option<Self::Item> {
551 if !self.buf.is_empty() {
552 return self.buf.pop_front();
553 }
554 for csv_left_right_parse_result in &mut self.csv_left_right_parse_results {
555 match csv_left_right_parse_result {
556 CsvLeftRightParseResult::Left(CsvByteRecordWithHash {
557 byte_record: Ok(byte_record_left),
558 record_hash: record_hash_left,
559 }) => {
560 let byte_record_left_line =
561 byte_record_left.position().map_or(0, |pos| pos.line());
563 match self.csv_records_right_map.get_mut(&record_hash_left.key) {
564 Some(hash_map_val) => {
565 if let HashMapValue::Initial(record_hash_right, byte_record_right) =
566 hash_map_val
567 {
568 if record_hash_left.record_hash != *record_hash_right {
569 *hash_map_val = HashMapValue::Modified(
570 byte_record_left,
571 std::mem::take(byte_record_right),
572 );
573 } else {
574 *hash_map_val = HashMapValue::Equal(
575 byte_record_left,
576 std::mem::take(byte_record_right),
577 );
578 }
579 }
580 }
581 None => {
582 self.csv_records_left_map.insert(
583 record_hash_left.key,
584 HashMapValue::Initial(
585 record_hash_left.record_hash,
586 byte_record_left,
587 ),
588 );
589 }
590 }
591 if self.max_capacity_right_map.value() > 0
592 && byte_record_left_line % self.max_capacity_right_map.value() as u64 == 0
593 {
594 self.max_capacity_right_map.calc_new(byte_record_left_line);
595 for (_k, v) in self
596 .csv_records_right_map
597 .extract_if(|_k, v| !matches!(v, HashMapValue::Initial(..)))
598 {
599 match v {
600 HashMapValue::Equal(byte_record_left, byte_record_right) => {
601 let _ = self.sender_csv_records_recycle.send(byte_record_left);
606 let _ = self.sender_csv_records_recycle.send(byte_record_right);
607 }
608 HashMapValue::Modified(left_byte_record, right_byte_record) => {
609 let fields_modified = left_byte_record
610 .iter()
611 .enumerate()
612 .zip(right_byte_record.iter())
613 .fold(
614 Vec::new(),
615 |mut acc, ((idx, field_left), field_right)| {
616 if field_left != field_right {
617 acc.push(idx);
618 }
619 acc
620 },
621 );
622 let left_byte_record_line = left_byte_record
623 .position()
624 .expect("a record position")
626 .line();
627 let right_byte_record_line = right_byte_record
628 .position()
629 .expect("a record position")
631 .line();
632 self.buf.push_back(Ok(DiffByteRecord::Modify {
633 add: ByteRecordLineInfo::new(
634 right_byte_record,
635 right_byte_record_line,
636 ),
637 delete: ByteRecordLineInfo::new(
638 left_byte_record,
639 left_byte_record_line,
640 ),
641 field_indices: fields_modified,
642 }));
643 }
644 HashMapValue::Initial(..) => {
645 unreachable!("reached a hashmap value that shouldn't be there")
646 }
647 }
648 }
649 if !self.buf.is_empty() {
650 break;
651 }
652 }
653 }
654 CsvLeftRightParseResult::Left(CsvByteRecordWithHash {
655 byte_record: Err(byte_record_left_err),
656 ..
657 }) => {
658 self.buf.push_back(Err(byte_record_left_err));
659 break;
660 }
661 CsvLeftRightParseResult::Right(CsvByteRecordWithHash {
662 byte_record: Ok(byte_record_right),
663 record_hash: record_hash_right,
664 }) => {
665 let byte_record_right_line =
667 byte_record_right.position().map_or(0, |pos| pos.line());
668 match self.csv_records_left_map.get_mut(&record_hash_right.key) {
669 Some(hash_map_val) => {
670 if let HashMapValue::Initial(record_hash_left, byte_record_left) =
671 hash_map_val
672 {
673 if *record_hash_left != record_hash_right.record_hash {
674 *hash_map_val = HashMapValue::Modified(
675 std::mem::take(byte_record_left),
676 byte_record_right,
677 );
678 } else {
679 *hash_map_val = HashMapValue::Equal(
680 std::mem::take(byte_record_left),
681 byte_record_right,
682 );
683 }
684 }
685 }
686 None => {
687 self.csv_records_right_map.insert(
688 record_hash_right.key,
689 HashMapValue::Initial(
690 record_hash_right.record_hash,
691 byte_record_right,
692 ),
693 );
694 }
695 }
696 if self.max_capacity_left_map.value() > 0
697 && byte_record_right_line % self.max_capacity_left_map.value() as u64 == 0
698 {
699 self.max_capacity_left_map.calc_new(byte_record_right_line);
700 for (_k, v) in self
701 .csv_records_left_map
702 .extract_if(|_k, v| !matches!(v, HashMapValue::Initial(..)))
703 {
704 match v {
705 HashMapValue::Equal(byte_record_left, byte_record_right) => {
706 let _ = self.sender_csv_records_recycle.send(byte_record_left);
711 let _ = self.sender_csv_records_recycle.send(byte_record_right);
712 }
713 HashMapValue::Modified(left_byte_record, right_byte_record) => {
714 let fields_modified = left_byte_record
715 .iter()
716 .enumerate()
717 .zip(right_byte_record.iter())
718 .fold(
719 Vec::new(),
720 |mut acc, ((idx, field_left), field_right)| {
721 if field_left != field_right {
722 acc.push(idx);
723 }
724 acc
725 },
726 );
727 let left_byte_record_line = left_byte_record
728 .position()
729 .expect("a record position")
730 .line();
731 let right_byte_record_line = right_byte_record
732 .position()
733 .expect("a record position")
734 .line();
735 self.buf.push_back(Ok(DiffByteRecord::Modify {
736 add: ByteRecordLineInfo::new(
737 right_byte_record,
738 right_byte_record_line,
739 ),
740 delete: ByteRecordLineInfo::new(
741 left_byte_record,
742 left_byte_record_line,
743 ),
744 field_indices: fields_modified,
745 }));
746 }
747 HashMapValue::Initial(..) => {
748 unreachable!("reached a hashmap value that shouldn't be there")
749 }
750 }
751 }
752 if !self.buf.is_empty() {
753 break;
754 }
755 }
756 }
757 CsvLeftRightParseResult::Right(CsvByteRecordWithHash {
758 byte_record: Err(e),
759 ..
760 }) => {
761 self.buf.push_back(Err(e));
762 break;
763 }
764 }
765 }
766
767 if !self.buf.is_empty() {
768 return self.buf.pop_front();
769 }
770
771 let iter_left_map = self
772 .csv_records_left_map_iter
773 .get_or_insert(std::mem::take(&mut self.csv_records_left_map).into_iter());
774
775 let iter_left_map =
776 iter_left_map.skip_while(|(_, v)| matches!(v, HashMapValue::Equal(_, _)));
777 if let Some(value) = get_next_diff(iter_left_map, DiffByteRecord::Delete) {
778 return value;
779 }
780
781 let iter_right_map = self
782 .csv_records_right_map_iter
783 .get_or_insert(std::mem::take(&mut self.csv_records_right_map).into_iter());
784
785 let iter_right_map =
786 iter_right_map.skip_while(|(_, v)| matches!(v, HashMapValue::Equal(_, _)));
787 if let Some(value) = get_next_diff(iter_right_map, DiffByteRecord::Add) {
788 return value;
789 }
790 None
791 }
792}
793
794#[inline]
795fn get_next_diff<I: Iterator<Item = (u128, HashMapValue<csv::ByteRecord>)>>(
796 mut iter_map: I,
797 diff_byte_record_add_or_delete: fn(ByteRecordLineInfo) -> DiffByteRecord,
798) -> Option<Option<Result<DiffByteRecord, csv::Error>>> {
799 match iter_map.next() {
800 Some((_, HashMapValue::Initial(_hash, byte_record))) => {
801 let line = byte_record.position().expect("a record position").line();
802 return Some(Some(Ok(diff_byte_record_add_or_delete(
803 ByteRecordLineInfo::new(byte_record, line),
804 ))));
805 }
806 Some((_, HashMapValue::Modified(left_byte_record, right_byte_record))) => {
807 let fields_modified = left_byte_record
808 .iter()
809 .enumerate()
810 .zip(right_byte_record.iter())
811 .fold(Vec::new(), |mut acc, ((idx, field_left), field_right)| {
812 if field_left != field_right {
813 acc.push(idx);
814 }
815 acc
816 });
817 let left_byte_record_line = left_byte_record
818 .position()
819 .expect("a record position")
820 .line();
821 let right_byte_record_line = right_byte_record
822 .position()
823 .expect("a record position")
824 .line();
825 return Some(Some(Ok(DiffByteRecord::Modify {
826 add: ByteRecordLineInfo::new(right_byte_record, right_byte_record_line),
827 delete: ByteRecordLineInfo::new(left_byte_record, left_byte_record_line),
828 field_indices: fields_modified,
829 })));
830 }
831 _ => (),
832 }
833 None
834}
835
836pub(crate) struct DiffByteRecordFirstRow {
837 csv_left_right_parse_results: Receiver<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
838 sender_csv_records_recycle: Sender<csv::ByteRecord>,
839 headers: HeadersParsed,
840}
841
842impl DiffByteRecordFirstRow {
843 pub(crate) fn new(
844 csv_left_right_parse_results: Receiver<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
845 sender_csv_records_recycle: Sender<csv::ByteRecord>,
846 headers: HeadersParsed,
847 ) -> Self {
848 Self {
849 csv_left_right_parse_results,
850 sender_csv_records_recycle,
851 headers,
852 }
853 }
854
855 pub(crate) fn into_diff_byte_record_iter(self) -> DiffByteRecordsIterator {
856 let parse_result = &self.csv_left_right_parse_results;
857
858 let (num_cols, first_few) = match self.headers.max_num_cols() {
859 nc @ Some(_) => (nc, Default::default()),
860 None => match (parse_result.recv(), parse_result.recv()) {
861 (Ok(csv_left_right_parse_result_first), Ok(csv_left_right_parse_result_second)) => {
862 let len_first = csv_left_right_parse_result_first
863 .byte_record_result()
864 .map(|csv| csv.len())
865 .ok();
866 let len_second = csv_left_right_parse_result_second
867 .byte_record_result()
868 .map(|csv| csv.len())
869 .ok();
870
871 (
872 max(len_first, len_second),
873 vec![
874 csv_left_right_parse_result_first,
875 csv_left_right_parse_result_second,
876 ],
877 )
878 }
879 (Ok(csv_left_right_parse_result), Err(_))
880 | (Err(_), Ok(csv_left_right_parse_result)) => {
881 let num_cols = csv_left_right_parse_result
882 .byte_record_result()
883 .map(|csv| csv.len())
884 .ok();
885 (num_cols, vec![csv_left_right_parse_result])
886 }
887 (Err(_), Err(_)) => {
888 Default::default()
890 }
891 },
892 };
893 DiffByteRecordsIterator::new(
894 first_few
895 .into_iter()
896 .chain(self.csv_left_right_parse_results),
897 self.sender_csv_records_recycle,
898 self.headers,
899 num_cols,
900 )
901 }
902}
903
904trait ByteRecordResultFromParseResult {
905 fn byte_record_result(&self) -> Result<&csv::ByteRecord, &csv::Error>;
906}
907
908impl ByteRecordResultFromParseResult for CsvLeftRightParseResult<CsvByteRecordWithHash> {
909 fn byte_record_result(&self) -> Result<&csv::ByteRecord, &csv::Error> {
910 match self {
911 CsvLeftRightParseResult::Left(CsvByteRecordWithHash { byte_record, .. })
912 | CsvLeftRightParseResult::Right(CsvByteRecordWithHash { byte_record, .. }) => {
913 byte_record.as_ref()
914 }
915 }
916 }
917}
918
919#[cfg(test)]
920mod tests {
921 use crate::{
922 diff_result::{ColumnIdx, ColumnIdxError},
923 diff_row::{ByteRecordLineInfo, DiffByteRecord},
924 };
925 use pretty_assertions::assert_eq;
926 use std::error::Error;
927
928 use super::DiffByteRecords;
929
930 #[test]
931 fn sort_by_line_delete_then_add_already_sorted() -> Result<(), Box<dyn Error>> {
932 let mut diff_records = DiffByteRecords::new(
933 vec![
934 DiffByteRecord::Delete(ByteRecordLineInfo::new(
935 csv::ByteRecord::from(vec!["_", "_", "_"]),
936 3,
937 )),
938 DiffByteRecord::Add(ByteRecordLineInfo::new(
939 csv::ByteRecord::from(vec!["_", "_", "_"]),
940 4,
941 )),
942 ],
943 Default::default(),
944 None,
945 );
946
947 let expected = diff_records.clone();
948
949 diff_records.sort_by_line();
950
951 assert_eq!(diff_records, expected);
952
953 Ok(())
954 }
955
956 #[test]
957 fn sort_by_line_delete_then_add_not_sorted() -> Result<(), Box<dyn Error>> {
958 let mut diff_records = DiffByteRecords::new(
959 vec![
960 DiffByteRecord::Add(ByteRecordLineInfo::new(
961 csv::ByteRecord::from(vec!["_", "_", "_"]),
962 4,
963 )),
964 DiffByteRecord::Delete(ByteRecordLineInfo::new(
965 csv::ByteRecord::from(vec!["_", "_", "_"]),
966 3,
967 )),
968 ],
969 Default::default(),
970 None,
971 );
972
973 let expected = vec![
974 DiffByteRecord::Delete(ByteRecordLineInfo::new(
975 csv::ByteRecord::from(vec!["_", "_", "_"]),
976 3,
977 )),
978 DiffByteRecord::Add(ByteRecordLineInfo::new(
979 csv::ByteRecord::from(vec!["_", "_", "_"]),
980 4,
981 )),
982 ];
983
984 diff_records.sort_by_line();
985
986 assert_eq!(diff_records.as_slice(), expected);
987
988 Ok(())
989 }
990
991 #[test]
992 fn sort_by_line_modify_not_sorted_simple_one_sided() -> Result<(), Box<dyn Error>> {
993 let mut diff_records = DiffByteRecords::new(
994 vec![
995 DiffByteRecord::Modify {
996 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
997 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
998 field_indices: vec![],
999 },
1000 DiffByteRecord::Modify {
1001 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1002 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1003 field_indices: vec![],
1004 },
1005 ],
1006 Default::default(),
1007 None,
1008 );
1009
1010 let expected = vec![
1011 DiffByteRecord::Modify {
1012 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1013 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1014 field_indices: vec![],
1015 },
1016 DiffByteRecord::Modify {
1017 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1018 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1019 field_indices: vec![],
1020 },
1021 ];
1022
1023 diff_records.sort_by_line();
1024
1025 assert_eq!(diff_records.as_slice(), expected);
1026
1027 Ok(())
1028 }
1029
1030 #[test]
1031 fn sort_by_line_modify_lines_equal_on_opposite_side_prefer_smaller_delete_side_first(
1032 ) -> Result<(), Box<dyn Error>> {
1033 let mut diff_records = DiffByteRecords::new(
1034 vec![
1035 DiffByteRecord::Modify {
1036 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1037 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1038 field_indices: vec![],
1039 },
1040 DiffByteRecord::Modify {
1041 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1042 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1043 field_indices: vec![],
1044 },
1045 ],
1046 Default::default(),
1047 None,
1048 );
1049
1050 let expected = vec![
1051 DiffByteRecord::Modify {
1052 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1053 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1054 field_indices: vec![],
1055 },
1056 DiffByteRecord::Modify {
1057 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1058 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1059 field_indices: vec![],
1060 },
1061 ];
1062
1063 diff_records.sort_by_line();
1064
1065 assert_eq!(diff_records.as_slice(), expected);
1066
1067 Ok(())
1068 }
1069
1070 #[test]
1071 fn sort_by_line_modify_sum_lines_equal_minimum_on_add_side_prefer_smaller_add_side_first(
1072 ) -> Result<(), Box<dyn Error>> {
1073 let mut diff_records = DiffByteRecords::new(
1074 vec![
1075 DiffByteRecord::Modify {
1076 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
1077 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 4),
1078 field_indices: vec![],
1079 },
1080 DiffByteRecord::Modify {
1081 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1082 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
1083 field_indices: vec![],
1084 },
1085 ],
1086 Default::default(),
1087 None,
1088 );
1089
1090 let expected = vec![
1091 DiffByteRecord::Modify {
1092 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1093 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
1094 field_indices: vec![],
1095 },
1096 DiffByteRecord::Modify {
1097 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
1098 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 4),
1099 field_indices: vec![],
1100 },
1101 ];
1102
1103 diff_records.sort_by_line();
1104
1105 assert_eq!(diff_records.as_slice(), expected);
1106
1107 Ok(())
1108 }
1109
1110 #[test]
1111 fn sort_by_line_modify_not_sort_by_sum_lines_but_by_smallest() -> Result<(), Box<dyn Error>> {
1112 let mut diff_records = DiffByteRecords::new(
1113 vec![
1114 DiffByteRecord::Modify {
1115 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
1116 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 3),
1117 field_indices: vec![],
1118 },
1119 DiffByteRecord::Modify {
1120 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1121 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
1122 field_indices: vec![],
1123 },
1124 ],
1125 Default::default(),
1126 None,
1127 );
1128
1129 let expected = vec![
1130 DiffByteRecord::Modify {
1131 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1132 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
1133 field_indices: vec![],
1134 },
1135 DiffByteRecord::Modify {
1136 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
1137 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 3),
1138 field_indices: vec![],
1139 },
1140 ];
1141
1142 diff_records.sort_by_line();
1143
1144 assert_eq!(diff_records.as_slice(), expected);
1145
1146 Ok(())
1147 }
1148
1149 #[test]
1150 fn sort_by_line_modify_complex_interleaved() -> Result<(), Box<dyn Error>> {
1151 let mut diff_records = DiffByteRecords::new(
1152 vec![
1153 DiffByteRecord::Modify {
1154 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
1155 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1156 field_indices: vec![],
1157 },
1158 DiffByteRecord::Modify {
1159 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1160 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
1161 field_indices: vec![],
1162 },
1163 DiffByteRecord::Modify {
1164 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
1165 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
1166 field_indices: vec![],
1167 },
1168 ],
1169 Default::default(),
1170 None,
1171 );
1172
1173 let expected = vec![
1174 DiffByteRecord::Modify {
1175 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1176 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
1177 field_indices: vec![],
1178 },
1179 DiffByteRecord::Modify {
1180 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
1181 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1182 field_indices: vec![],
1183 },
1184 DiffByteRecord::Modify {
1185 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
1186 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
1187 field_indices: vec![],
1188 },
1189 ];
1190
1191 diff_records.sort_by_line();
1192
1193 assert_eq!(diff_records.as_slice(), expected);
1194
1195 Ok(())
1196 }
1197
1198 #[test]
1199 fn sort_by_col_selection_of_cols_is_empty_order_does_not_change() -> Result<(), Box<dyn Error>>
1200 {
1201 let mut diff_records = DiffByteRecords::new(
1202 vec![
1203 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1204 csv::ByteRecord::from(vec!["d", "e", "f"]),
1205 3,
1206 )),
1207 DiffByteRecord::Add(ByteRecordLineInfo::new(
1208 csv::ByteRecord::from(vec!["a", "b", "c"]),
1209 4,
1210 )),
1211 ],
1212 Default::default(),
1213 None,
1214 );
1215
1216 let expected = diff_records.clone();
1217
1218 diff_records.sort_by_columns::<ColumnIdx, _>(vec![])?;
1219
1220 assert_eq!(diff_records, expected);
1221
1222 Ok(())
1223 }
1224
1225 #[test]
1226 fn sort_by_col_all_equal_delete_before_add_order_does_not_change() -> Result<(), Box<dyn Error>>
1227 {
1228 let mut diff_records = DiffByteRecords::new(
1229 vec![
1230 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1231 csv::ByteRecord::from(vec!["a", "x", "y"]),
1232 3,
1233 )),
1234 DiffByteRecord::Add(ByteRecordLineInfo::new(
1235 csv::ByteRecord::from(vec!["a", "b", "c"]),
1236 4,
1237 )),
1238 ],
1239 Default::default(),
1240 None,
1241 );
1242
1243 let expected = diff_records.clone();
1244
1245 diff_records.sort_by_columns(vec![0])?;
1246
1247 assert_eq!(diff_records, expected);
1248
1249 Ok(())
1250 }
1251
1252 #[test]
1253 fn sort_by_second_col_a_in_add_is_less_than_b_in_modify_delete() -> Result<(), Box<dyn Error>> {
1254 let mut diff_records = DiffByteRecords::new(
1255 vec![
1256 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1257 csv::ByteRecord::from(vec!["_", "b", "_"]),
1258 3,
1259 )),
1260 DiffByteRecord::Add(ByteRecordLineInfo::new(
1261 csv::ByteRecord::from(vec!["_", "a", "_"]),
1262 4,
1263 )),
1264 ],
1265 Default::default(),
1266 None,
1267 );
1268
1269 diff_records.sort_by_columns(vec![1])?;
1270
1271 let expected = DiffByteRecords::new(
1272 vec![
1273 DiffByteRecord::Add(ByteRecordLineInfo::new(
1274 csv::ByteRecord::from(vec!["_", "a", "_"]),
1275 4,
1276 )),
1277 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1278 csv::ByteRecord::from(vec!["_", "b", "_"]),
1279 3,
1280 )),
1281 ],
1282 Default::default(),
1283 None,
1284 );
1285
1286 assert_eq!(diff_records, expected);
1287
1288 Ok(())
1289 }
1290
1291 #[test]
1292 fn sort_by_certain_col_idx_twice_is_ok() -> Result<(), Box<dyn Error>> {
1293 let mut diff_records = DiffByteRecords::new(
1294 vec![
1295 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1296 csv::ByteRecord::from(vec!["az", "_", "_"]),
1297 3,
1298 )),
1299 DiffByteRecord::Add(ByteRecordLineInfo::new(
1300 csv::ByteRecord::from(vec!["a", "_", "_"]),
1301 4,
1302 )),
1303 ],
1304 Default::default(),
1305 None,
1306 );
1307
1308 diff_records.sort_by_columns(vec![0, 0])?;
1309
1310 let expected = DiffByteRecords::new(
1311 vec![
1312 DiffByteRecord::Add(ByteRecordLineInfo::new(
1313 csv::ByteRecord::from(vec!["a", "_", "_"]),
1314 4,
1315 )),
1316 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1317 csv::ByteRecord::from(vec!["az", "_", "_"]),
1318 3,
1319 )),
1320 ],
1321 Default::default(),
1322 None,
1323 );
1324
1325 assert_eq!(diff_records, expected);
1326
1327 Ok(())
1328 }
1329
1330 #[test]
1331 fn sort_by_first_and_second_col_first_col_val_is_equal_so_second_col_decides_order(
1332 ) -> Result<(), Box<dyn Error>> {
1333 let mut diff_records = DiffByteRecords::new(
1334 vec![
1335 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1336 csv::ByteRecord::from(vec!["x", "b", "_"]),
1337 3,
1338 )),
1339 DiffByteRecord::Add(ByteRecordLineInfo::new(
1340 csv::ByteRecord::from(vec!["x", "a", "_"]),
1341 4,
1342 )),
1343 ],
1344 Default::default(),
1345 None,
1346 );
1347
1348 diff_records.sort_by_columns(vec![0, 1])?;
1349
1350 let expected = DiffByteRecords::new(
1351 vec![
1352 DiffByteRecord::Add(ByteRecordLineInfo::new(
1353 csv::ByteRecord::from(vec!["x", "a", "_"]),
1354 4,
1355 )),
1356 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1357 csv::ByteRecord::from(vec!["x", "b", "_"]),
1358 3,
1359 )),
1360 ],
1361 Default::default(),
1362 None,
1363 );
1364
1365 assert_eq!(diff_records, expected);
1366
1367 Ok(())
1368 }
1369
1370 #[test]
1371 fn sort_by_first_second_and_third_col_first_and_second_col_val_is_equal_so_third_col_decides_order(
1372 ) -> Result<(), Box<dyn Error>> {
1373 let mut diff_records = DiffByteRecords::new(
1374 vec![
1375 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1376 csv::ByteRecord::from(vec!["x", "a", "z"]),
1377 3,
1378 )),
1379 DiffByteRecord::Add(ByteRecordLineInfo::new(
1380 csv::ByteRecord::from(vec!["x", "a", "i"]),
1381 4,
1382 )),
1383 ],
1384 Default::default(),
1385 None,
1386 );
1387
1388 diff_records.sort_by_columns(vec![0, 1, 2])?;
1389
1390 let expected = DiffByteRecords::new(
1391 vec![
1392 DiffByteRecord::Add(ByteRecordLineInfo::new(
1393 csv::ByteRecord::from(vec!["x", "a", "i"]),
1394 4,
1395 )),
1396 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1397 csv::ByteRecord::from(vec!["x", "a", "z"]),
1398 3,
1399 )),
1400 ],
1401 Default::default(),
1402 None,
1403 );
1404
1405 assert_eq!(diff_records, expected);
1406
1407 Ok(())
1408 }
1409
1410 #[test]
1411 fn sort_by_first_second_and_third_col_back_to_front_third_and_second_col_val_is_equal_so_first_col_decides_order(
1412 ) -> Result<(), Box<dyn Error>> {
1413 let mut diff_records = DiffByteRecords::new(
1414 vec![
1415 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1416 csv::ByteRecord::from(vec!["2", "a", "z"]),
1417 3,
1418 )),
1419 DiffByteRecord::Add(ByteRecordLineInfo::new(
1420 csv::ByteRecord::from(vec!["1", "a", "z"]),
1421 4,
1422 )),
1423 ],
1424 Default::default(),
1425 None,
1426 );
1427
1428 diff_records.sort_by_columns(vec![2, 1, 0])?;
1429
1430 let expected = DiffByteRecords::new(
1431 vec![
1432 DiffByteRecord::Add(ByteRecordLineInfo::new(
1433 csv::ByteRecord::from(vec!["1", "a", "z"]),
1434 4,
1435 )),
1436 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1437 csv::ByteRecord::from(vec!["2", "a", "z"]),
1438 3,
1439 )),
1440 ],
1441 Default::default(),
1442 None,
1443 );
1444
1445 assert_eq!(diff_records, expected);
1446
1447 Ok(())
1448 }
1449
1450 #[test]
1451 fn sort_by_col_delete_must_be_smaller_than_add_when_otherwise_identical(
1452 ) -> Result<(), Box<dyn Error>> {
1453 let mut diff_records = DiffByteRecords::new(
1454 vec![
1455 DiffByteRecord::Add(ByteRecordLineInfo::new(
1456 csv::ByteRecord::from(vec!["same", "_", "_"]),
1457 4,
1458 )),
1459 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1460 csv::ByteRecord::from(vec!["same", "_", "_"]),
1461 5,
1462 )),
1463 ],
1464 Default::default(),
1465 None,
1466 );
1467
1468 diff_records.sort_by_columns(vec![0])?;
1469
1470 let expected = DiffByteRecords::new(
1471 vec![
1472 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1473 csv::ByteRecord::from(vec!["same", "_", "_"]),
1474 5,
1475 )),
1476 DiffByteRecord::Add(ByteRecordLineInfo::new(
1477 csv::ByteRecord::from(vec!["same", "_", "_"]),
1478 4,
1479 )),
1480 ],
1481 Default::default(),
1482 None,
1483 );
1484
1485 assert_eq!(diff_records, expected);
1486
1487 Ok(())
1488 }
1489
1490 #[test]
1491 fn sort_by_col_with_three_items_first_and_second_by_first_col_second_and_third_by_second_col(
1492 ) -> Result<(), Box<dyn Error>> {
1493 let mut diff_records = DiffByteRecords::new(
1494 vec![
1495 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1496 csv::ByteRecord::from(vec!["1", "b", "_"]),
1497 3,
1498 )),
1499 DiffByteRecord::Add(ByteRecordLineInfo::new(
1500 csv::ByteRecord::from(vec!["1", "a", "_"]),
1501 4,
1502 )),
1503 DiffByteRecord::Add(ByteRecordLineInfo::new(
1504 csv::ByteRecord::from(vec!["0", "a", "_"]),
1505 4,
1506 )),
1507 ],
1508 Default::default(),
1509 None,
1510 );
1511
1512 diff_records.sort_by_columns(vec![0, 1])?;
1513
1514 let expected = DiffByteRecords::new(
1515 vec![
1516 DiffByteRecord::Add(ByteRecordLineInfo::new(
1517 csv::ByteRecord::from(vec!["0", "a", "_"]),
1518 4,
1519 )),
1520 DiffByteRecord::Add(ByteRecordLineInfo::new(
1521 csv::ByteRecord::from(vec!["1", "a", "_"]),
1522 4,
1523 )),
1524 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1525 csv::ByteRecord::from(vec!["1", "b", "_"]),
1526 3,
1527 )),
1528 ],
1529 Default::default(),
1530 None,
1531 );
1532
1533 assert_eq!(diff_records, expected);
1534
1535 Ok(())
1536 }
1537
1538 #[test]
1539 fn sort_by_col_delete_compared_with_modify_delete() -> Result<(), Box<dyn Error>> {
1540 let mut diff_records = DiffByteRecords::new(
1541 vec![
1542 DiffByteRecord::Modify {
1543 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1544 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1545 field_indices: vec![],
1546 },
1547 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1548 csv::ByteRecord::from(vec!["b", "_", "_"]),
1549 4,
1550 )),
1551 ],
1552 Default::default(),
1553 None,
1554 );
1555
1556 diff_records.sort_by_columns(vec![0])?;
1557
1558 let expected = diff_records.clone();
1559
1560 assert_eq!(diff_records, expected);
1561
1562 Ok(())
1563 }
1564
1565 #[test]
1566 fn sort_by_col_delete_compared_with_modify_delete_are_equal_fall_back_to_compare_with_modify_add(
1567 ) -> Result<(), Box<dyn Error>> {
1568 let mut diff_records = DiffByteRecords::new(
1569 vec![
1570 DiffByteRecord::Modify {
1571 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1572 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1573 field_indices: vec![],
1574 },
1575 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1576 csv::ByteRecord::from(vec!["c", "_", "_"]),
1577 4,
1578 )),
1579 ],
1580 Default::default(),
1581 None,
1582 );
1583
1584 diff_records.sort_by_columns(vec![0])?;
1585
1586 let expected = diff_records.clone();
1587
1588 assert_eq!(diff_records, expected);
1589
1590 Ok(())
1591 }
1592
1593 #[test]
1594 fn sort_by_col_delete_must_be_smaller_than_modify_when_otherwise_identical(
1595 ) -> Result<(), Box<dyn Error>> {
1596 let mut diff_records = DiffByteRecords::new(
1597 vec![
1598 DiffByteRecord::Modify {
1599 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1600 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1601 field_indices: vec![],
1602 },
1603 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1604 csv::ByteRecord::from(vec!["c", "_", "_"]),
1605 4,
1606 )),
1607 ],
1608 Default::default(),
1609 None,
1610 );
1611
1612 diff_records.sort_by_columns(vec![0])?;
1613
1614 let expected = DiffByteRecords::new(
1615 vec![
1616 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1617 csv::ByteRecord::from(vec!["c", "_", "_"]),
1618 4,
1619 )),
1620 DiffByteRecord::Modify {
1621 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1622 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1623 field_indices: vec![],
1624 },
1625 ],
1626 Default::default(),
1627 None,
1628 );
1629
1630 assert_eq!(diff_records, expected);
1631
1632 Ok(())
1633 }
1634
1635 #[test]
1636 fn sort_by_col_modify_delete_compared_with_add() -> Result<(), Box<dyn Error>> {
1637 let mut diff_records = DiffByteRecords::new(
1638 vec![
1639 DiffByteRecord::Modify {
1640 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1641 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1642 field_indices: vec![],
1643 },
1644 DiffByteRecord::Add(ByteRecordLineInfo::new(
1645 csv::ByteRecord::from(vec!["b", "_", "_"]),
1646 4,
1647 )),
1648 ],
1649 Default::default(),
1650 None,
1651 );
1652
1653 diff_records.sort_by_columns(vec![0])?;
1654
1655 let expected = diff_records.clone();
1656
1657 assert_eq!(diff_records, expected);
1658
1659 Ok(())
1660 }
1661
1662 #[test]
1663 fn sort_by_col_add_compared_with_modify_delete() -> Result<(), Box<dyn Error>> {
1664 let mut diff_records = DiffByteRecords::new(
1665 vec![
1666 DiffByteRecord::Add(ByteRecordLineInfo::new(
1667 csv::ByteRecord::from(vec!["b", "_", "_"]),
1668 4,
1669 )),
1670 DiffByteRecord::Modify {
1671 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1672 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1673 field_indices: vec![],
1674 },
1675 ],
1676 Default::default(),
1677 None,
1678 );
1679
1680 diff_records.sort_by_columns(vec![0])?;
1681
1682 let expected = DiffByteRecords::new(
1683 vec![
1684 DiffByteRecord::Modify {
1685 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1686 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1687 field_indices: vec![],
1688 },
1689 DiffByteRecord::Add(ByteRecordLineInfo::new(
1690 csv::ByteRecord::from(vec!["b", "_", "_"]),
1691 4,
1692 )),
1693 ],
1694 Default::default(),
1695 None,
1696 );
1697
1698 assert_eq!(diff_records, expected);
1699
1700 Ok(())
1701 }
1702
1703 #[test]
1704 fn sort_by_col_modify_delete_compared_with_add_are_equal_fall_back_to_compare_with_modify_add(
1705 ) -> Result<(), Box<dyn Error>> {
1706 let mut diff_records = DiffByteRecords::new(
1707 vec![
1708 DiffByteRecord::Modify {
1709 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1710 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1711 field_indices: vec![],
1712 },
1713 DiffByteRecord::Add(ByteRecordLineInfo::new(
1714 csv::ByteRecord::from(vec!["a", "_", "_"]),
1715 4,
1716 )),
1717 ],
1718 Default::default(),
1719 None,
1720 );
1721
1722 diff_records.sort_by_columns(vec![0])?;
1723
1724 let expected = DiffByteRecords::new(
1725 vec![
1726 DiffByteRecord::Add(ByteRecordLineInfo::new(
1727 csv::ByteRecord::from(vec!["a", "_", "_"]),
1728 4,
1729 )),
1730 DiffByteRecord::Modify {
1731 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1732 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1733 field_indices: vec![],
1734 },
1735 ],
1736 Default::default(),
1737 None,
1738 );
1739
1740 assert_eq!(diff_records, expected);
1741
1742 Ok(())
1743 }
1744
1745 #[test]
1746 fn sort_by_col_add_must_be_greater_than_modify_when_otherwise_identical(
1747 ) -> Result<(), Box<dyn Error>> {
1748 let mut diff_records = DiffByteRecords::new(
1749 vec![
1750 DiffByteRecord::Modify {
1751 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1752 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1753 field_indices: vec![],
1754 },
1755 DiffByteRecord::Add(ByteRecordLineInfo::new(
1756 csv::ByteRecord::from(vec!["c", "_", "_"]),
1757 4,
1758 )),
1759 ],
1760 Default::default(),
1761 None,
1762 );
1763
1764 diff_records.sort_by_columns(vec![0])?;
1765
1766 let expected = diff_records.clone();
1767
1768 assert_eq!(diff_records, expected);
1769
1770 Ok(())
1771 }
1772
1773 #[test]
1774 fn sort_by_col_modify_delete_compared_with_modify_delete() -> Result<(), Box<dyn Error>> {
1775 let mut diff_records = DiffByteRecords::new(
1776 vec![
1777 DiffByteRecord::Modify {
1778 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["d", "_", "_"]), 1),
1779 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1780 field_indices: vec![],
1781 },
1782 DiffByteRecord::Modify {
1783 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1784 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1785 field_indices: vec![],
1786 },
1787 ],
1788 Default::default(),
1789 None,
1790 );
1791
1792 diff_records.sort_by_columns(vec![0])?;
1793
1794 let expected = DiffByteRecords::new(
1795 vec![
1796 DiffByteRecord::Modify {
1797 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1798 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1799 field_indices: vec![],
1800 },
1801 DiffByteRecord::Modify {
1802 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["d", "_", "_"]), 1),
1803 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1804 field_indices: vec![],
1805 },
1806 ],
1807 Default::default(),
1808 None,
1809 );
1810
1811 assert_eq!(diff_records, expected);
1812
1813 Ok(())
1814 }
1815
1816 #[test]
1817 fn sort_by_col_modify_delete_compared_with_modify_delete_are_equal_fall_back_to_compare_modify_add_with_modify_add(
1818 ) -> Result<(), Box<dyn Error>> {
1819 let mut diff_records = DiffByteRecords::new(
1820 vec![
1821 DiffByteRecord::Modify {
1822 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1823 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1824 field_indices: vec![],
1825 },
1826 DiffByteRecord::Modify {
1827 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1828 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1829 field_indices: vec![],
1830 },
1831 ],
1832 Default::default(),
1833 None,
1834 );
1835
1836 diff_records.sort_by_columns(vec![0])?;
1837
1838 let expected = DiffByteRecords::new(
1839 vec![
1840 DiffByteRecord::Modify {
1841 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1842 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1843 field_indices: vec![],
1844 },
1845 DiffByteRecord::Modify {
1846 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1847 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1848 field_indices: vec![],
1849 },
1850 ],
1851 Default::default(),
1852 None,
1853 );
1854
1855 assert_eq!(diff_records, expected);
1856
1857 Ok(())
1858 }
1859
1860 #[test]
1861 fn sort_by_col_modify_cmp_with_add_cmp_with_modify_cmp_with_delete(
1862 ) -> Result<(), Box<dyn Error>> {
1863 let mut diff_records = DiffByteRecords::new(
1864 vec![
1865 DiffByteRecord::Modify {
1866 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1867 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1868 field_indices: vec![],
1869 },
1870 DiffByteRecord::Add(ByteRecordLineInfo::new(
1871 csv::ByteRecord::from(vec!["a", "_", "_"]),
1872 4,
1873 )),
1874 DiffByteRecord::Modify {
1875 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1876 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1877 field_indices: vec![],
1878 },
1879 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1880 csv::ByteRecord::from(vec!["a", "_", "_"]),
1881 4,
1882 )),
1883 ],
1884 Default::default(),
1885 None,
1886 );
1887
1888 diff_records.sort_by_columns(vec![0])?;
1889
1890 let expected = DiffByteRecords::new(
1891 vec![
1892 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1893 csv::ByteRecord::from(vec!["a", "_", "_"]),
1894 4,
1895 )),
1896 DiffByteRecord::Add(ByteRecordLineInfo::new(
1897 csv::ByteRecord::from(vec!["a", "_", "_"]),
1898 4,
1899 )),
1900 DiffByteRecord::Modify {
1901 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1902 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1903 field_indices: vec![],
1904 },
1905 DiffByteRecord::Modify {
1906 delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1907 add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1908 field_indices: vec![],
1909 },
1910 ],
1911 Default::default(),
1912 None,
1913 );
1914
1915 assert_eq!(diff_records, expected);
1916
1917 Ok(())
1918 }
1919
1920 #[test]
1921 fn sort_by_col_idx_out_of_bounds_err() -> Result<(), Box<dyn Error>> {
1922 let mut diff_records = DiffByteRecords::new(
1923 vec![
1924 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1925 csv::ByteRecord::from(vec!["a", "b", "c"]),
1926 3,
1927 )),
1928 DiffByteRecord::Add(ByteRecordLineInfo::new(
1929 csv::ByteRecord::from(vec!["a", "x", "y"]),
1930 4,
1931 )),
1932 ],
1933 Default::default(),
1934 None,
1935 );
1936
1937 let res = diff_records.sort_by_columns(vec![3]);
1938
1939 assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
1940
1941 Ok(())
1942 }
1943
1944 #[test]
1945 fn sort_by_col_first_idx_ok_and_cmp_as_equal_second_idx_out_of_bounds_err_order_stays_the_same(
1946 ) -> Result<(), Box<dyn Error>> {
1947 let mut diff_records = DiffByteRecords::new(
1948 vec![
1949 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1950 csv::ByteRecord::from(vec!["_", "same", "_"]),
1951 3,
1952 )),
1953 DiffByteRecord::Add(ByteRecordLineInfo::new(
1954 csv::ByteRecord::from(vec!["_", "same", "_"]),
1955 4,
1956 )),
1957 ],
1958 Default::default(),
1959 None,
1960 );
1961
1962 let res = diff_records.sort_by_columns(vec![1, 3]);
1963
1964 assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
1965
1966 let expected = diff_records.clone();
1967
1968 assert_eq!(diff_records, expected);
1969
1970 Ok(())
1971 }
1972
1973 #[test]
1974 fn sort_by_col_first_idx_ok_and_cmp_not_equal_second_idx_out_of_bounds_but_no_err_because_first_idx_already_sorted(
1975 ) -> Result<(), Box<dyn Error>> {
1976 let mut diff_records = DiffByteRecords::new(
1977 vec![
1978 DiffByteRecord::Delete(ByteRecordLineInfo::new(
1979 csv::ByteRecord::from(vec!["_", "b", "_"]),
1980 3,
1981 )),
1982 DiffByteRecord::Add(ByteRecordLineInfo::new(
1983 csv::ByteRecord::from(vec!["_", "a", "_"]),
1984 4,
1985 )),
1986 ],
1987 Default::default(),
1988 None,
1989 );
1990
1991 let res = diff_records.sort_by_columns(vec![1, 3]);
1992
1993 assert_eq!(res, Ok(()));
1994
1995 let expected = DiffByteRecords::new(
1996 vec![
1997 DiffByteRecord::Add(ByteRecordLineInfo::new(
1998 csv::ByteRecord::from(vec!["_", "a", "_"]),
1999 4,
2000 )),
2001 DiffByteRecord::Delete(ByteRecordLineInfo::new(
2002 csv::ByteRecord::from(vec!["_", "b", "_"]),
2003 3,
2004 )),
2005 ],
2006 Default::default(),
2007 None,
2008 );
2009
2010 assert_eq!(diff_records, expected);
2011
2012 Ok(())
2013 }
2014
2015 #[test]
2016 fn sort_by_col_first_idx_out_of_bounds_err_second_idx_ok_sort_by_second_idx(
2017 ) -> Result<(), Box<dyn Error>> {
2018 let mut diff_records = DiffByteRecords::new(
2019 vec![
2020 DiffByteRecord::Delete(ByteRecordLineInfo::new(
2021 csv::ByteRecord::from(vec!["_", "b", "_"]),
2022 3,
2023 )),
2024 DiffByteRecord::Add(ByteRecordLineInfo::new(
2025 csv::ByteRecord::from(vec!["_", "a", "_"]),
2026 4,
2027 )),
2028 ],
2029 Default::default(),
2030 None,
2031 );
2032
2033 let res = diff_records.sort_by_columns(vec![3, 1]);
2034
2035 assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
2036
2037 let expected = DiffByteRecords::new(
2039 vec![
2040 DiffByteRecord::Add(ByteRecordLineInfo::new(
2041 csv::ByteRecord::from(vec!["_", "a", "_"]),
2042 4,
2043 )),
2044 DiffByteRecord::Delete(ByteRecordLineInfo::new(
2045 csv::ByteRecord::from(vec!["_", "b", "_"]),
2046 3,
2047 )),
2048 ],
2049 Default::default(),
2050 None,
2051 );
2052
2053 assert_eq!(diff_records, expected);
2054
2055 Ok(())
2056 }
2057
2058 #[test]
2059 fn sort_by_col_first_idx_out_of_bounds_err_second_idx_ok_third_idx_out_of_bounds_sort_by_second_idx(
2060 ) -> Result<(), Box<dyn Error>> {
2061 let mut diff_records = DiffByteRecords::new(
2062 vec![
2063 DiffByteRecord::Delete(ByteRecordLineInfo::new(
2064 csv::ByteRecord::from(vec!["_", "b", "_"]),
2065 3,
2066 )),
2067 DiffByteRecord::Add(ByteRecordLineInfo::new(
2068 csv::ByteRecord::from(vec!["_", "a", "_"]),
2069 4,
2070 )),
2071 ],
2072 Default::default(),
2073 None,
2074 );
2075
2076 let res = diff_records.sort_by_columns(vec![3, 1, 4]);
2077
2078 assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
2080
2081 let expected = DiffByteRecords::new(
2083 vec![
2084 DiffByteRecord::Add(ByteRecordLineInfo::new(
2085 csv::ByteRecord::from(vec!["_", "a", "_"]),
2086 4,
2087 )),
2088 DiffByteRecord::Delete(ByteRecordLineInfo::new(
2089 csv::ByteRecord::from(vec!["_", "b", "_"]),
2090 3,
2091 )),
2092 ],
2093 Default::default(),
2094 None,
2095 );
2096
2097 assert_eq!(diff_records, expected);
2098
2099 Ok(())
2100 }
2101}