csv_diff/
diff_result.rs

1use crate::{
2    csv_headers::{Headers, HeadersParsed},
3    csv_parse_result::{CsvByteRecordWithHash, CsvLeftRightParseResult, Position, RecordHash},
4    csv_parser_hasher::HashMapValue,
5    diff_row::*,
6};
7use ahash::AHashMap as HashMap;
8use crossbeam_channel::{Receiver, Sender};
9use std::{
10    cmp::{max, Ordering},
11    collections::{hash_map::IntoIter, VecDeque},
12    convert::TryInto,
13};
14use thiserror::Error;
15
16/// Holds all information about the difference between two CSVs, after they have
17/// been compared with [`CsvByteDiffLocal.diff`](crate::csv_diff::CsvByteDiffLocal::diff).
18/// CSV records that are equal are __not__ stored in this structure.
19///
20/// Also, keep in mind, that differences are stored _unordered_ (with regard to the line in the CSV).
21/// You can use [`DiffByteRecords.sort_by_line`](DiffByteRecords::sort_by_line) to sort them in-place.
22///
23/// See the example on [`CsvByteDiffLocal`](crate::csv_diff::CsvByteDiffLocal) for general usage.
24#[derive(Debug, PartialEq, Clone)]
25pub struct DiffByteRecords {
26    inner: Vec<DiffByteRecord>,
27    headers: Headers,
28    num_columns: Option<usize>,
29}
30
31impl DiffByteRecords {
32    pub(crate) fn new(
33        inner: Vec<DiffByteRecord>,
34        headers: Headers,
35        num_columns: Option<usize>,
36    ) -> Self {
37        Self {
38            inner,
39            headers,
40            num_columns,
41        }
42    }
43
44    pub fn headers(&self) -> &Headers {
45        &self.headers
46    }
47
48    pub fn num_columns(&self) -> Option<usize> {
49        self.num_columns
50    }
51
52    /// Sort the underlying [`DiffByteRecord`](crate::diff_row::DiffByteRecord)s by line.
53    ///
54    /// Note that comparison is done in parallel. Therefore, __without calling this method__, the resulting `DiffByteRecord`s are out of order
55    /// after the comparison (with regard to their line in the original CSV).
56    pub fn sort_by_line(&mut self) {
57        self.inner.sort_by(DiffByteRecord::cmp_by_line)
58    }
59
60    // TODO: in the future, we might want to have something like Result<(), Vec<ColumnIdxError>> as a return value,
61    // so that we can report _all_ the errors that happened and not only the first one
62    pub fn sort_by_columns<E: Into<ColumnIdx>, I: IntoIterator<Item = E>>(
63        &mut self,
64        cols: I,
65    ) -> Result<(), ColumnIdxError> {
66        let cols_to_sort = cols.into_iter().map(|e| e.into()).collect::<Vec<_>>();
67        let mut error_maybe: Result<(), ColumnIdxError> = Ok(());
68        if !cols_to_sort.is_empty() {
69            self.inner.sort_by(|a, b| match (a, b) {
70                (DiffByteRecord::Add(add_l), DiffByteRecord::Add(add_r)) => cols_to_sort
71                    .iter()
72                    .find_map(|col_idx| {
73                        match (add_l, add_r)
74                            .cmp_by_col(col_idx)
75                            .map(|ord| (!ord.is_eq()).then_some(ord))
76                        {
77                            Ok(ord) => ord,
78                            Err(e) => {
79                                if error_maybe.is_ok() {
80                                    error_maybe = Err(e);
81                                }
82                                None
83                            }
84                        }
85                    })
86                    .unwrap_or(Ordering::Equal),
87                (
88                    DiffByteRecord::Add(left),
89                    DiffByteRecord::Modify {
90                        delete: mod_del,
91                        add: mod_add,
92                        field_indices: _field_indices,
93                    },
94                ) => cols_to_sort
95                    .iter()
96                    .find_map(|col_idx| {
97                        match (left, mod_del)
98                            .cmp_by_col(col_idx)
99                            .and_then(|ord| match ord {
100                                Ordering::Equal => (left, mod_add)
101                                    .cmp_by_col(col_idx)
102                                    .map(|ord| (!ord.is_eq()).then_some(ord)),
103                                _ => Ok(Some(ord)),
104                            }) {
105                            Ok(ord) => ord,
106                            Err(e) => {
107                                if error_maybe.is_ok() {
108                                    error_maybe = Err(e);
109                                }
110                                None
111                            }
112                        }
113                    })
114                    // `Add` should be treated as greater than `Modify`
115                    .unwrap_or(Ordering::Greater),
116                (DiffByteRecord::Add(add), DiffByteRecord::Delete(del)) => cols_to_sort
117                    .iter()
118                    .find_map(|col_idx| {
119                        match (add, del)
120                            .cmp_by_col(col_idx)
121                            .map(|ord| (!ord.is_eq()).then_some(ord))
122                        {
123                            Ok(ord) => ord,
124                            Err(e) => {
125                                if error_maybe.is_ok() {
126                                    error_maybe = Err(e);
127                                }
128                                None
129                            }
130                        }
131                    })
132                    // `Add` should be treated as greater than `Delete`
133                    .unwrap_or(Ordering::Greater),
134                (
135                    DiffByteRecord::Modify {
136                        delete: mod_del,
137                        add: mod_add,
138                        field_indices: _field_indices,
139                    },
140                    DiffByteRecord::Add(add),
141                ) => cols_to_sort
142                    .iter()
143                    .find_map(|col_idx| {
144                        match (mod_del, add)
145                            .cmp_by_col(col_idx)
146                            .and_then(|ord| match ord {
147                                Ordering::Equal => (mod_add, add)
148                                    .cmp_by_col(col_idx)
149                                    .map(|ord| (!ord.is_eq()).then_some(ord)),
150                                _ => Ok(Some(ord)),
151                            }) {
152                            Ok(ord) => ord,
153                            Err(e) => {
154                                if error_maybe.is_ok() {
155                                    error_maybe = Err(e);
156                                }
157                                None
158                            }
159                        }
160                    })
161                    // `Modify` should be treated as less than `Add`
162                    .unwrap_or(Ordering::Less),
163                (
164                    DiffByteRecord::Modify {
165                        delete: delete_l,
166                        add: add_l,
167                        field_indices: _field_indices_l,
168                    },
169                    DiffByteRecord::Modify {
170                        delete: delete_r,
171                        add: add_r,
172                        field_indices: _field_indices_r,
173                    },
174                ) => cols_to_sort
175                    .iter()
176                    .find_map(|col_idx| {
177                        match (delete_l, delete_r)
178                            .cmp_by_col(col_idx)
179                            .and_then(|ord| match ord {
180                                Ordering::Equal => (add_l, add_r)
181                                    .cmp_by_col(col_idx)
182                                    .map(|ord| (!ord.is_eq()).then_some(ord)),
183                                _ => Ok(Some(ord)),
184                            }) {
185                            Ok(ord) => ord,
186                            Err(e) => {
187                                if error_maybe.is_ok() {
188                                    error_maybe = Err(e);
189                                }
190                                None
191                            }
192                        }
193                    })
194                    .unwrap_or(Ordering::Equal),
195                (
196                    DiffByteRecord::Modify {
197                        delete: mod_del,
198                        add: mod_add,
199                        field_indices: _field_indices,
200                    },
201                    DiffByteRecord::Delete(del),
202                ) => cols_to_sort
203                    .iter()
204                    .find_map(|col_idx| {
205                        match (mod_del, del)
206                            .cmp_by_col(col_idx)
207                            .and_then(|ord| match ord {
208                                Ordering::Equal => (mod_add, del)
209                                    .cmp_by_col(col_idx)
210                                    .map(|ord| (!ord.is_eq()).then_some(ord)),
211                                _ => Ok(Some(ord)),
212                            }) {
213                            Ok(ord) => ord,
214                            Err(e) => {
215                                if error_maybe.is_ok() {
216                                    error_maybe = Err(e);
217                                }
218                                None
219                            }
220                        }
221                    })
222                    // `Modify` should be treated as greater than `Delete`
223                    .unwrap_or(Ordering::Greater),
224                (DiffByteRecord::Delete(del), DiffByteRecord::Add(add)) => cols_to_sort
225                    .iter()
226                    .find_map(|col_idx| {
227                        match (del, add)
228                            .cmp_by_col(col_idx)
229                            .map(|ord| (!ord.is_eq()).then_some(ord))
230                        {
231                            Ok(ord) => ord,
232                            Err(e) => {
233                                if error_maybe.is_ok() {
234                                    error_maybe = Err(e);
235                                }
236                                None
237                            }
238                        }
239                    })
240                    // `Delete` should be treated as less than `Add`
241                    .unwrap_or(Ordering::Less),
242                (
243                    DiffByteRecord::Delete(del),
244                    DiffByteRecord::Modify {
245                        delete: mod_del,
246                        add: mod_add,
247                        field_indices: _field_indices,
248                    },
249                ) => cols_to_sort
250                    .iter()
251                    .find_map(|col_idx| {
252                        match (del, mod_del)
253                            .cmp_by_col(col_idx)
254                            .and_then(|ord| match ord {
255                                Ordering::Equal => (del, mod_add)
256                                    .cmp_by_col(col_idx)
257                                    .map(|ord| (!ord.is_eq()).then_some(ord)),
258                                _ => Ok(Some(ord)),
259                            }) {
260                            Ok(ord) => ord,
261                            Err(e) => {
262                                if error_maybe.is_ok() {
263                                    error_maybe = Err(e);
264                                }
265                                None
266                            }
267                        }
268                    })
269                    // `Delete` should be treated as less than `Modify`
270                    .unwrap_or(Ordering::Less),
271                (DiffByteRecord::Delete(del_l), DiffByteRecord::Delete(del_r)) => cols_to_sort
272                    .iter()
273                    .find_map(|col_idx| {
274                        match (del_l, del_r)
275                            .cmp_by_col(col_idx)
276                            .map(|ord| (!ord.is_eq()).then_some(ord))
277                        {
278                            Ok(ord) => ord,
279                            Err(e) => {
280                                if error_maybe.is_ok() {
281                                    error_maybe = Err(e);
282                                }
283                                None
284                            }
285                        }
286                    })
287                    .unwrap_or(Ordering::Equal),
288            });
289        }
290        error_maybe
291    }
292
293    /// Return the `DiffByteRecord`s as a single slice.
294    /// # Example
295    #[cfg_attr(
296        feature = "rayon-threads",
297        doc = r##"
298    use csv_diff::{csv_diff::CsvByteDiffLocal, csv::Csv};
299    use std::collections::HashSet;
300    use std::iter::FromIterator;
301    # fn main() -> Result<(), Box<dyn std::error::Error>> {
302    // some csv data with a header, where the first column is a unique id
303    let csv_data_left = "id,name,kind\n\
304                         1,lemon,fruit\n\
305                         2,strawberry,fruit";
306    let csv_data_right = "id,name,kind\n\
307                          1,lemon,fruit\n\
308                          2,strawberry,nut\n\
309                          3,cherry,fruit";
310
311    let csv_byte_diff = CsvByteDiffLocal::new()?;
312
313    let mut diff_byte_records = csv_byte_diff.diff(
314        Csv::with_reader_seek(csv_data_left.as_bytes()),
315        Csv::with_reader_seek(csv_data_right.as_bytes()),
316    )?;
317    
318    let diff_byte_record_slice = diff_byte_records.as_slice();
319
320    assert_eq!(
321        diff_byte_record_slice.len(),
322        2
323    );
324    Ok(())
325    # }
326    "##
327    )]
328    pub fn as_slice(&self) -> &[DiffByteRecord] {
329        self.inner.as_slice()
330    }
331
332    /// Return an iterator over the `DiffByteRecord`s.
333    pub fn iter(&self) -> core::slice::Iter<'_, DiffByteRecord> {
334        self.inner.iter()
335    }
336}
337
338trait CmpByColumn {
339    fn cmp_by_col(&self, col_idx: &ColumnIdx) -> Result<Ordering, ColumnIdxError>;
340}
341
342impl CmpByColumn for (&ByteRecordLineInfo, &ByteRecordLineInfo) {
343    #[inline]
344    fn cmp_by_col(&self, col_idx: &ColumnIdx) -> Result<Ordering, ColumnIdxError> {
345        let idx_for_both = col_idx
346            .idx_for_both()
347            .expect("idx, because it is the only enum variant");
348        let &(brli_left, brli_right) = self;
349        brli_left
350            .byte_record()
351            .get(idx_for_both)
352            .zip(brli_right.byte_record().get(idx_for_both))
353            .map(|(a, b)| a.cmp(b))
354            .ok_or(ColumnIdxError::IdxOutOfBounds {
355                idx: idx_for_both,
356                len: brli_left.byte_record().len(),
357            })
358    }
359}
360
361pub enum ColumnIdx {
362    IdxForBoth(usize),
363    // TODO: we will implement this later - right now it will be too complicated
364    // TODO: instead of String, we should use `AsRef<[u8]>`
365    // HeaderForBoth(String),
366    // HeaderLeftIdxRight(String, usize),
367    // HeaderLeftHeaderRight(String, String),
368    // IdxLeftHeaderRight(usize, String),
369    // IdxLeftIdxRight(usize, usize),
370}
371
372impl ColumnIdx {
373    #[inline]
374    fn idx_for_both(&self) -> Option<usize> {
375        match self {
376            &Self::IdxForBoth(idx) => Some(idx),
377        }
378    }
379}
380
381// TODO: we will implement this later - right now it will be too complicated
382// impl From<String> for ColumnIdx {
383//     fn from(value: String) -> Self {
384//         Self::Header(value)
385//     }
386// }
387
388// impl From<&str> for ColumnIdx {
389//     fn from(value: &str) -> Self {
390//         Self::Header(value.into())
391//     }
392// }
393
394impl From<usize> for ColumnIdx {
395    fn from(value: usize) -> Self {
396        Self::IdxForBoth(value)
397    }
398}
399
400#[derive(Debug, Error, PartialEq)]
401pub enum ColumnIdxError {
402    // TODO: we will implement this later - right now it will be too complicated
403    // #[error(r#"the header name "{0}" does not exist"#)]
404    // NoSuchHeaderName(AsRef<[u8]>),
405    #[error("the column index `{idx}` exceeds the total number of columns ({len})")]
406    IdxOutOfBounds { idx: usize, len: usize },
407}
408
409impl IntoIterator for DiffByteRecords {
410    type Item = DiffByteRecord;
411    type IntoIter = DiffByteRecordsIntoIterator;
412
413    fn into_iter(self) -> Self::IntoIter {
414        let num_columns = self.num_columns();
415        DiffByteRecordsIntoIterator {
416            inner: self.inner.into_iter(),
417            headers: self.headers,
418            num_columns,
419        }
420    }
421}
422
423/// Consuming iterator that can be created from [`DiffByteRecords`](DiffByteRecords)
424pub struct DiffByteRecordsIntoIterator {
425    inner: std::vec::IntoIter<DiffByteRecord>,
426    headers: Headers,
427    num_columns: Option<usize>,
428}
429
430impl Iterator for DiffByteRecordsIntoIterator {
431    type Item = DiffByteRecord;
432
433    fn next(&mut self) -> Option<Self::Item> {
434        self.inner.next()
435    }
436}
437
438impl DiffByteRecordsIntoIterator {
439    pub fn headers(&self) -> &Headers {
440        &self.headers
441    }
442
443    pub fn num_columns(&self) -> Option<usize> {
444        self.num_columns
445    }
446}
447
448pub(crate) type CsvHashValueMap = HashMap<u128, HashMapValue<Position, RecordHash>>;
449pub(crate) type CsvByteRecordValueMap = HashMap<u128, HashMapValue<csv::ByteRecord>>;
450
451struct MaxCapacityThreshold(usize);
452
453impl MaxCapacityThreshold {
454    #[inline]
455    fn value(&self) -> usize {
456        self.0
457    }
458    fn calc_new(&mut self, current_line: u64) {
459        if current_line % 100 == 0 {
460            self.0 = max(
461                10,
462                (current_line / 100)
463                    .try_into()
464                    .unwrap_or(usize::MAX),
465            );
466        }
467    }
468}
469
470/// Emits all information about the difference between two CSVs as
471/// [`Result`](::csv::Result)<[`DiffByteRecord`](crate::diff_row::DiffByteRecord)>, after they have been compared with
472/// [`CsvByteDiff.diff`](crate::csv_diff::CsvByteDiff::diff).
473/// CSV records that are equal are __not__ emitted by this iterator.
474///
475/// Also, keep in mind, that this iterator produces values _unordered_ (with regard to the line in the CSV).
476/// If you want to have them ordered, you first need to collect them into [`DiffByteRecords`] and then use
477/// [`DiffByteRecords.sort_by_line`](DiffByteRecords::sort_by_line) to sort them in-place.
478///
479/// See the example on [`CsvByteDiff`](crate::csv_diff::CsvByteDiff) for general usage.
480pub struct DiffByteRecordsIterator {
481    buf: VecDeque<csv::Result<DiffByteRecord>>,
482    headers: HeadersParsed,
483    num_columns: Option<usize>,
484    csv_left_right_parse_results: std::iter::Chain<
485        std::vec::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
486        crossbeam_channel::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
487    >,
488    csv_records_left_map: CsvByteRecordValueMap,
489    csv_records_left_map_iter: Option<IntoIter<u128, HashMapValue<csv::ByteRecord>>>,
490    csv_records_right_map: CsvByteRecordValueMap,
491    csv_records_right_map_iter: Option<IntoIter<u128, HashMapValue<csv::ByteRecord>>>,
492    max_capacity_left_map: MaxCapacityThreshold,
493    max_capacity_right_map: MaxCapacityThreshold,
494    sender_csv_records_recycle: Sender<csv::ByteRecord>,
495}
496
497impl DiffByteRecordsIterator {
498    pub(crate) fn new(
499        csv_left_right_parse_results: std::iter::Chain<
500            std::vec::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
501            crossbeam_channel::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
502        >,
503        sender_csv_records_recycle: Sender<csv::ByteRecord>,
504        headers: HeadersParsed,
505        num_columns: Option<usize>,
506    ) -> Self {
507        Self {
508            buf: Default::default(),
509            headers,
510            num_columns,
511            csv_left_right_parse_results,
512            csv_records_left_map: HashMap::new(),
513            csv_records_left_map_iter: None,
514            csv_records_right_map: HashMap::new(),
515            csv_records_right_map_iter: None,
516            max_capacity_left_map: MaxCapacityThreshold(10),
517            max_capacity_right_map: MaxCapacityThreshold(10),
518            sender_csv_records_recycle,
519        }
520    }
521
522    pub fn headers(&self) -> &HeadersParsed {
523        &self.headers
524    }
525
526    /// Return the number of fields a [`DiffByteRecord`] will have that is
527    /// yielded by this iterator.
528    /// It produces `None`, if:
529    /// - both CSVs are empty and `has_headers` == `false` or
530    /// - the first record of both the left and right CSV couldn't be parsed successfully
531    ///
532    /// Note: This produces only one value, because when comparing two CSVs, they
533    /// must have the same number of fields for each record.
534    pub fn num_columns(&self) -> Option<usize> {
535        self.num_columns
536    }
537
538    pub fn try_to_diff_byte_records(mut self) -> csv::Result<DiffByteRecords> {
539        let num_cols = self.num_columns();
540        let headers_parsed = std::mem::take(&mut self.headers);
541        let headers: Headers = headers_parsed.try_into()?;
542        let diff_records = self.collect::<csv::Result<_>>()?;
543        Ok(DiffByteRecords::new(diff_records, headers, num_cols))
544    }
545}
546
547impl Iterator for DiffByteRecordsIterator {
548    type Item = csv::Result<DiffByteRecord>;
549
550    fn next(&mut self) -> Option<Self::Item> {
551        if !self.buf.is_empty() {
552            return self.buf.pop_front();
553        }
554        for csv_left_right_parse_result in &mut self.csv_left_right_parse_results {
555            match csv_left_right_parse_result {
556                CsvLeftRightParseResult::Left(CsvByteRecordWithHash {
557                    byte_record: Ok(byte_record_left),
558                    record_hash: record_hash_left,
559                }) => {
560                    let byte_record_left_line =
561                        // TODO: the closure _might_ be a performance bottleneck!?
562                        byte_record_left.position().map_or(0, |pos| pos.line());
563                    match self.csv_records_right_map.get_mut(&record_hash_left.key) {
564                        Some(hash_map_val) => {
565                            if let HashMapValue::Initial(record_hash_right, byte_record_right) =
566                                hash_map_val
567                            {
568                                if record_hash_left.record_hash != *record_hash_right {
569                                    *hash_map_val = HashMapValue::Modified(
570                                        byte_record_left,
571                                        std::mem::take(byte_record_right),
572                                    );
573                                } else {
574                                    *hash_map_val = HashMapValue::Equal(
575                                        byte_record_left,
576                                        std::mem::take(byte_record_right),
577                                    );
578                                }
579                            }
580                        }
581                        None => {
582                            self.csv_records_left_map.insert(
583                                record_hash_left.key,
584                                HashMapValue::Initial(
585                                    record_hash_left.record_hash,
586                                    byte_record_left,
587                                ),
588                            );
589                        }
590                    }
591                    if self.max_capacity_right_map.value() > 0
592                        && byte_record_left_line % self.max_capacity_right_map.value() as u64 == 0
593                    {
594                        self.max_capacity_right_map.calc_new(byte_record_left_line);
595                        for (_k, v) in self
596                            .csv_records_right_map
597                            .extract_if(|_k, v| !matches!(v, HashMapValue::Initial(..)))
598                        {
599                            match v {
600                                HashMapValue::Equal(byte_record_left, byte_record_right) => {
601                                    // can be recycled, so we send it upstream;
602                                    // if receiver is already gone, we ignore the error that occurs when sending,
603                                    // which only leads to the byte record not being recycled (it can't be recycled,
604                                    // because upstream has finished it's work)
605                                    let _ = self.sender_csv_records_recycle.send(byte_record_left);
606                                    let _ = self.sender_csv_records_recycle.send(byte_record_right);
607                                }
608                                HashMapValue::Modified(left_byte_record, right_byte_record) => {
609                                    let fields_modified = left_byte_record
610                                        .iter()
611                                        .enumerate()
612                                        .zip(right_byte_record.iter())
613                                        .fold(
614                                            Vec::new(),
615                                            |mut acc, ((idx, field_left), field_right)| {
616                                                if field_left != field_right {
617                                                    acc.push(idx);
618                                                }
619                                                acc
620                                            },
621                                        );
622                                    let left_byte_record_line = left_byte_record
623                                        .position()
624                                        // TODO: handle error (although it shouldn't error here)
625                                        .expect("a record position")
626                                        .line();
627                                    let right_byte_record_line = right_byte_record
628                                        .position()
629                                        // TODO: handle error (although it shouldn't error here)
630                                        .expect("a record position")
631                                        .line();
632                                    self.buf.push_back(Ok(DiffByteRecord::Modify {
633                                        add: ByteRecordLineInfo::new(
634                                            right_byte_record,
635                                            right_byte_record_line,
636                                        ),
637                                        delete: ByteRecordLineInfo::new(
638                                            left_byte_record,
639                                            left_byte_record_line,
640                                        ),
641                                        field_indices: fields_modified,
642                                    }));
643                                }
644                                HashMapValue::Initial(..) => {
645                                    unreachable!("reached a hashmap value that shouldn't be there")
646                                }
647                            }
648                        }
649                        if !self.buf.is_empty() {
650                            break;
651                        }
652                    }
653                }
654                CsvLeftRightParseResult::Left(CsvByteRecordWithHash {
655                    byte_record: Err(byte_record_left_err),
656                    ..
657                }) => {
658                    self.buf.push_back(Err(byte_record_left_err));
659                    break;
660                }
661                CsvLeftRightParseResult::Right(CsvByteRecordWithHash {
662                    byte_record: Ok(byte_record_right),
663                    record_hash: record_hash_right,
664                }) => {
665                    // TODO: the closure _might_ be a performance bottleneck!?
666                    let byte_record_right_line =
667                        byte_record_right.position().map_or(0, |pos| pos.line());
668                    match self.csv_records_left_map.get_mut(&record_hash_right.key) {
669                        Some(hash_map_val) => {
670                            if let HashMapValue::Initial(record_hash_left, byte_record_left) =
671                                hash_map_val
672                            {
673                                if *record_hash_left != record_hash_right.record_hash {
674                                    *hash_map_val = HashMapValue::Modified(
675                                        std::mem::take(byte_record_left),
676                                        byte_record_right,
677                                    );
678                                } else {
679                                    *hash_map_val = HashMapValue::Equal(
680                                        std::mem::take(byte_record_left),
681                                        byte_record_right,
682                                    );
683                                }
684                            }
685                        }
686                        None => {
687                            self.csv_records_right_map.insert(
688                                record_hash_right.key,
689                                HashMapValue::Initial(
690                                    record_hash_right.record_hash,
691                                    byte_record_right,
692                                ),
693                            );
694                        }
695                    }
696                    if self.max_capacity_left_map.value() > 0
697                        && byte_record_right_line % self.max_capacity_left_map.value() as u64 == 0
698                    {
699                        self.max_capacity_left_map.calc_new(byte_record_right_line);
700                        for (_k, v) in self
701                            .csv_records_left_map
702                            .extract_if(|_k, v| !matches!(v, HashMapValue::Initial(..)))
703                        {
704                            match v {
705                                HashMapValue::Equal(byte_record_left, byte_record_right) => {
706                                    // can be recycled, so we send it upstream;
707                                    // if receiver is already gone, we ignore the error that occurs when sending,
708                                    // which only leads to the byte record not being recycled (it can't be recycled,
709                                    // because upstream has finished it's work)
710                                    let _ = self.sender_csv_records_recycle.send(byte_record_left);
711                                    let _ = self.sender_csv_records_recycle.send(byte_record_right);
712                                }
713                                HashMapValue::Modified(left_byte_record, right_byte_record) => {
714                                    let fields_modified = left_byte_record
715                                        .iter()
716                                        .enumerate()
717                                        .zip(right_byte_record.iter())
718                                        .fold(
719                                            Vec::new(),
720                                            |mut acc, ((idx, field_left), field_right)| {
721                                                if field_left != field_right {
722                                                    acc.push(idx);
723                                                }
724                                                acc
725                                            },
726                                        );
727                                    let left_byte_record_line = left_byte_record
728                                        .position()
729                                        .expect("a record position")
730                                        .line();
731                                    let right_byte_record_line = right_byte_record
732                                        .position()
733                                        .expect("a record position")
734                                        .line();
735                                    self.buf.push_back(Ok(DiffByteRecord::Modify {
736                                        add: ByteRecordLineInfo::new(
737                                            right_byte_record,
738                                            right_byte_record_line,
739                                        ),
740                                        delete: ByteRecordLineInfo::new(
741                                            left_byte_record,
742                                            left_byte_record_line,
743                                        ),
744                                        field_indices: fields_modified,
745                                    }));
746                                }
747                                HashMapValue::Initial(..) => {
748                                    unreachable!("reached a hashmap value that shouldn't be there")
749                                }
750                            }
751                        }
752                        if !self.buf.is_empty() {
753                            break;
754                        }
755                    }
756                }
757                CsvLeftRightParseResult::Right(CsvByteRecordWithHash {
758                    byte_record: Err(e),
759                    ..
760                }) => {
761                    self.buf.push_back(Err(e));
762                    break;
763                }
764            }
765        }
766
767        if !self.buf.is_empty() {
768            return self.buf.pop_front();
769        }
770
771        let iter_left_map = self
772            .csv_records_left_map_iter
773            .get_or_insert(std::mem::take(&mut self.csv_records_left_map).into_iter());
774
775        let iter_left_map =
776            iter_left_map.skip_while(|(_, v)| matches!(v, HashMapValue::Equal(_, _)));
777        if let Some(value) = get_next_diff(iter_left_map, DiffByteRecord::Delete) {
778            return value;
779        }
780
781        let iter_right_map = self
782            .csv_records_right_map_iter
783            .get_or_insert(std::mem::take(&mut self.csv_records_right_map).into_iter());
784
785        let iter_right_map =
786            iter_right_map.skip_while(|(_, v)| matches!(v, HashMapValue::Equal(_, _)));
787        if let Some(value) = get_next_diff(iter_right_map, DiffByteRecord::Add) {
788            return value;
789        }
790        None
791    }
792}
793
794#[inline]
795fn get_next_diff<I: Iterator<Item = (u128, HashMapValue<csv::ByteRecord>)>>(
796    mut iter_map: I,
797    diff_byte_record_add_or_delete: fn(ByteRecordLineInfo) -> DiffByteRecord,
798) -> Option<Option<Result<DiffByteRecord, csv::Error>>> {
799    match iter_map.next() {
800        Some((_, HashMapValue::Initial(_hash, byte_record))) => {
801            let line = byte_record.position().expect("a record position").line();
802            return Some(Some(Ok(diff_byte_record_add_or_delete(
803                ByteRecordLineInfo::new(byte_record, line),
804            ))));
805        }
806        Some((_, HashMapValue::Modified(left_byte_record, right_byte_record))) => {
807            let fields_modified = left_byte_record
808                .iter()
809                .enumerate()
810                .zip(right_byte_record.iter())
811                .fold(Vec::new(), |mut acc, ((idx, field_left), field_right)| {
812                    if field_left != field_right {
813                        acc.push(idx);
814                    }
815                    acc
816                });
817            let left_byte_record_line = left_byte_record
818                .position()
819                .expect("a record position")
820                .line();
821            let right_byte_record_line = right_byte_record
822                .position()
823                .expect("a record position")
824                .line();
825            return Some(Some(Ok(DiffByteRecord::Modify {
826                add: ByteRecordLineInfo::new(right_byte_record, right_byte_record_line),
827                delete: ByteRecordLineInfo::new(left_byte_record, left_byte_record_line),
828                field_indices: fields_modified,
829            })));
830        }
831        _ => (),
832    }
833    None
834}
835
836pub(crate) struct DiffByteRecordFirstRow {
837    csv_left_right_parse_results: Receiver<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
838    sender_csv_records_recycle: Sender<csv::ByteRecord>,
839    headers: HeadersParsed,
840}
841
842impl DiffByteRecordFirstRow {
843    pub(crate) fn new(
844        csv_left_right_parse_results: Receiver<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
845        sender_csv_records_recycle: Sender<csv::ByteRecord>,
846        headers: HeadersParsed,
847    ) -> Self {
848        Self {
849            csv_left_right_parse_results,
850            sender_csv_records_recycle,
851            headers,
852        }
853    }
854
855    pub(crate) fn into_diff_byte_record_iter(self) -> DiffByteRecordsIterator {
856        let parse_result = &self.csv_left_right_parse_results;
857
858        let (num_cols, first_few) = match self.headers.max_num_cols() {
859            nc @ Some(_) => (nc, Default::default()),
860            None => match (parse_result.recv(), parse_result.recv()) {
861                (Ok(csv_left_right_parse_result_first), Ok(csv_left_right_parse_result_second)) => {
862                    let len_first = csv_left_right_parse_result_first
863                        .byte_record_result()
864                        .map(|csv| csv.len())
865                        .ok();
866                    let len_second = csv_left_right_parse_result_second
867                        .byte_record_result()
868                        .map(|csv| csv.len())
869                        .ok();
870
871                    (
872                        max(len_first, len_second),
873                        vec![
874                            csv_left_right_parse_result_first,
875                            csv_left_right_parse_result_second,
876                        ],
877                    )
878                }
879                (Ok(csv_left_right_parse_result), Err(_))
880                | (Err(_), Ok(csv_left_right_parse_result)) => {
881                    let num_cols = csv_left_right_parse_result
882                        .byte_record_result()
883                        .map(|csv| csv.len())
884                        .ok();
885                    (num_cols, vec![csv_left_right_parse_result])
886                }
887                (Err(_), Err(_)) => {
888                    // nothing to do
889                    Default::default()
890                }
891            },
892        };
893        DiffByteRecordsIterator::new(
894            first_few
895                .into_iter()
896                .chain(self.csv_left_right_parse_results),
897            self.sender_csv_records_recycle,
898            self.headers,
899            num_cols,
900        )
901    }
902}
903
904trait ByteRecordResultFromParseResult {
905    fn byte_record_result(&self) -> Result<&csv::ByteRecord, &csv::Error>;
906}
907
908impl ByteRecordResultFromParseResult for CsvLeftRightParseResult<CsvByteRecordWithHash> {
909    fn byte_record_result(&self) -> Result<&csv::ByteRecord, &csv::Error> {
910        match self {
911            CsvLeftRightParseResult::Left(CsvByteRecordWithHash { byte_record, .. })
912            | CsvLeftRightParseResult::Right(CsvByteRecordWithHash { byte_record, .. }) => {
913                byte_record.as_ref()
914            }
915        }
916    }
917}
918
919#[cfg(test)]
920mod tests {
921    use crate::{
922        diff_result::{ColumnIdx, ColumnIdxError},
923        diff_row::{ByteRecordLineInfo, DiffByteRecord},
924    };
925    use pretty_assertions::assert_eq;
926    use std::error::Error;
927
928    use super::DiffByteRecords;
929
930    #[test]
931    fn sort_by_line_delete_then_add_already_sorted() -> Result<(), Box<dyn Error>> {
932        let mut diff_records = DiffByteRecords::new(
933            vec![
934                DiffByteRecord::Delete(ByteRecordLineInfo::new(
935                    csv::ByteRecord::from(vec!["_", "_", "_"]),
936                    3,
937                )),
938                DiffByteRecord::Add(ByteRecordLineInfo::new(
939                    csv::ByteRecord::from(vec!["_", "_", "_"]),
940                    4,
941                )),
942            ],
943            Default::default(),
944            None,
945        );
946
947        let expected = diff_records.clone();
948
949        diff_records.sort_by_line();
950
951        assert_eq!(diff_records, expected);
952
953        Ok(())
954    }
955
956    #[test]
957    fn sort_by_line_delete_then_add_not_sorted() -> Result<(), Box<dyn Error>> {
958        let mut diff_records = DiffByteRecords::new(
959            vec![
960                DiffByteRecord::Add(ByteRecordLineInfo::new(
961                    csv::ByteRecord::from(vec!["_", "_", "_"]),
962                    4,
963                )),
964                DiffByteRecord::Delete(ByteRecordLineInfo::new(
965                    csv::ByteRecord::from(vec!["_", "_", "_"]),
966                    3,
967                )),
968            ],
969            Default::default(),
970            None,
971        );
972
973        let expected = vec![
974            DiffByteRecord::Delete(ByteRecordLineInfo::new(
975                csv::ByteRecord::from(vec!["_", "_", "_"]),
976                3,
977            )),
978            DiffByteRecord::Add(ByteRecordLineInfo::new(
979                csv::ByteRecord::from(vec!["_", "_", "_"]),
980                4,
981            )),
982        ];
983
984        diff_records.sort_by_line();
985
986        assert_eq!(diff_records.as_slice(), expected);
987
988        Ok(())
989    }
990
991    #[test]
992    fn sort_by_line_modify_not_sorted_simple_one_sided() -> Result<(), Box<dyn Error>> {
993        let mut diff_records = DiffByteRecords::new(
994            vec![
995                DiffByteRecord::Modify {
996                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
997                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
998                    field_indices: vec![],
999                },
1000                DiffByteRecord::Modify {
1001                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1002                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1003                    field_indices: vec![],
1004                },
1005            ],
1006            Default::default(),
1007            None,
1008        );
1009
1010        let expected = vec![
1011            DiffByteRecord::Modify {
1012                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1013                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1014                field_indices: vec![],
1015            },
1016            DiffByteRecord::Modify {
1017                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1018                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1019                field_indices: vec![],
1020            },
1021        ];
1022
1023        diff_records.sort_by_line();
1024
1025        assert_eq!(diff_records.as_slice(), expected);
1026
1027        Ok(())
1028    }
1029
1030    #[test]
1031    fn sort_by_line_modify_lines_equal_on_opposite_side_prefer_smaller_delete_side_first(
1032    ) -> Result<(), Box<dyn Error>> {
1033        let mut diff_records = DiffByteRecords::new(
1034            vec![
1035                DiffByteRecord::Modify {
1036                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1037                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1038                    field_indices: vec![],
1039                },
1040                DiffByteRecord::Modify {
1041                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1042                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1043                    field_indices: vec![],
1044                },
1045            ],
1046            Default::default(),
1047            None,
1048        );
1049
1050        let expected = vec![
1051            DiffByteRecord::Modify {
1052                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1053                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1054                field_indices: vec![],
1055            },
1056            DiffByteRecord::Modify {
1057                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1058                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1059                field_indices: vec![],
1060            },
1061        ];
1062
1063        diff_records.sort_by_line();
1064
1065        assert_eq!(diff_records.as_slice(), expected);
1066
1067        Ok(())
1068    }
1069
1070    #[test]
1071    fn sort_by_line_modify_sum_lines_equal_minimum_on_add_side_prefer_smaller_add_side_first(
1072    ) -> Result<(), Box<dyn Error>> {
1073        let mut diff_records = DiffByteRecords::new(
1074            vec![
1075                DiffByteRecord::Modify {
1076                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
1077                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 4),
1078                    field_indices: vec![],
1079                },
1080                DiffByteRecord::Modify {
1081                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1082                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
1083                    field_indices: vec![],
1084                },
1085            ],
1086            Default::default(),
1087            None,
1088        );
1089
1090        let expected = vec![
1091            DiffByteRecord::Modify {
1092                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1093                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
1094                field_indices: vec![],
1095            },
1096            DiffByteRecord::Modify {
1097                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
1098                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 4),
1099                field_indices: vec![],
1100            },
1101        ];
1102
1103        diff_records.sort_by_line();
1104
1105        assert_eq!(diff_records.as_slice(), expected);
1106
1107        Ok(())
1108    }
1109
1110    #[test]
1111    fn sort_by_line_modify_not_sort_by_sum_lines_but_by_smallest() -> Result<(), Box<dyn Error>> {
1112        let mut diff_records = DiffByteRecords::new(
1113            vec![
1114                DiffByteRecord::Modify {
1115                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
1116                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 3),
1117                    field_indices: vec![],
1118                },
1119                DiffByteRecord::Modify {
1120                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1121                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
1122                    field_indices: vec![],
1123                },
1124            ],
1125            Default::default(),
1126            None,
1127        );
1128
1129        let expected = vec![
1130            DiffByteRecord::Modify {
1131                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1132                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
1133                field_indices: vec![],
1134            },
1135            DiffByteRecord::Modify {
1136                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
1137                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 3),
1138                field_indices: vec![],
1139            },
1140        ];
1141
1142        diff_records.sort_by_line();
1143
1144        assert_eq!(diff_records.as_slice(), expected);
1145
1146        Ok(())
1147    }
1148
1149    #[test]
1150    fn sort_by_line_modify_complex_interleaved() -> Result<(), Box<dyn Error>> {
1151        let mut diff_records = DiffByteRecords::new(
1152            vec![
1153                DiffByteRecord::Modify {
1154                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
1155                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1156                    field_indices: vec![],
1157                },
1158                DiffByteRecord::Modify {
1159                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1160                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
1161                    field_indices: vec![],
1162                },
1163                DiffByteRecord::Modify {
1164                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
1165                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
1166                    field_indices: vec![],
1167                },
1168            ],
1169            Default::default(),
1170            None,
1171        );
1172
1173        let expected = vec![
1174            DiffByteRecord::Modify {
1175                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1176                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
1177                field_indices: vec![],
1178            },
1179            DiffByteRecord::Modify {
1180                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
1181                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1182                field_indices: vec![],
1183            },
1184            DiffByteRecord::Modify {
1185                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
1186                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
1187                field_indices: vec![],
1188            },
1189        ];
1190
1191        diff_records.sort_by_line();
1192
1193        assert_eq!(diff_records.as_slice(), expected);
1194
1195        Ok(())
1196    }
1197
1198    #[test]
1199    fn sort_by_col_selection_of_cols_is_empty_order_does_not_change() -> Result<(), Box<dyn Error>>
1200    {
1201        let mut diff_records = DiffByteRecords::new(
1202            vec![
1203                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1204                    csv::ByteRecord::from(vec!["d", "e", "f"]),
1205                    3,
1206                )),
1207                DiffByteRecord::Add(ByteRecordLineInfo::new(
1208                    csv::ByteRecord::from(vec!["a", "b", "c"]),
1209                    4,
1210                )),
1211            ],
1212            Default::default(),
1213            None,
1214        );
1215
1216        let expected = diff_records.clone();
1217
1218        diff_records.sort_by_columns::<ColumnIdx, _>(vec![])?;
1219
1220        assert_eq!(diff_records, expected);
1221
1222        Ok(())
1223    }
1224
1225    #[test]
1226    fn sort_by_col_all_equal_delete_before_add_order_does_not_change() -> Result<(), Box<dyn Error>>
1227    {
1228        let mut diff_records = DiffByteRecords::new(
1229            vec![
1230                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1231                    csv::ByteRecord::from(vec!["a", "x", "y"]),
1232                    3,
1233                )),
1234                DiffByteRecord::Add(ByteRecordLineInfo::new(
1235                    csv::ByteRecord::from(vec!["a", "b", "c"]),
1236                    4,
1237                )),
1238            ],
1239            Default::default(),
1240            None,
1241        );
1242
1243        let expected = diff_records.clone();
1244
1245        diff_records.sort_by_columns(vec![0])?;
1246
1247        assert_eq!(diff_records, expected);
1248
1249        Ok(())
1250    }
1251
1252    #[test]
1253    fn sort_by_second_col_a_in_add_is_less_than_b_in_modify_delete() -> Result<(), Box<dyn Error>> {
1254        let mut diff_records = DiffByteRecords::new(
1255            vec![
1256                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1257                    csv::ByteRecord::from(vec!["_", "b", "_"]),
1258                    3,
1259                )),
1260                DiffByteRecord::Add(ByteRecordLineInfo::new(
1261                    csv::ByteRecord::from(vec!["_", "a", "_"]),
1262                    4,
1263                )),
1264            ],
1265            Default::default(),
1266            None,
1267        );
1268
1269        diff_records.sort_by_columns(vec![1])?;
1270
1271        let expected = DiffByteRecords::new(
1272            vec![
1273                DiffByteRecord::Add(ByteRecordLineInfo::new(
1274                    csv::ByteRecord::from(vec!["_", "a", "_"]),
1275                    4,
1276                )),
1277                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1278                    csv::ByteRecord::from(vec!["_", "b", "_"]),
1279                    3,
1280                )),
1281            ],
1282            Default::default(),
1283            None,
1284        );
1285
1286        assert_eq!(diff_records, expected);
1287
1288        Ok(())
1289    }
1290
1291    #[test]
1292    fn sort_by_certain_col_idx_twice_is_ok() -> Result<(), Box<dyn Error>> {
1293        let mut diff_records = DiffByteRecords::new(
1294            vec![
1295                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1296                    csv::ByteRecord::from(vec!["az", "_", "_"]),
1297                    3,
1298                )),
1299                DiffByteRecord::Add(ByteRecordLineInfo::new(
1300                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1301                    4,
1302                )),
1303            ],
1304            Default::default(),
1305            None,
1306        );
1307
1308        diff_records.sort_by_columns(vec![0, 0])?;
1309
1310        let expected = DiffByteRecords::new(
1311            vec![
1312                DiffByteRecord::Add(ByteRecordLineInfo::new(
1313                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1314                    4,
1315                )),
1316                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1317                    csv::ByteRecord::from(vec!["az", "_", "_"]),
1318                    3,
1319                )),
1320            ],
1321            Default::default(),
1322            None,
1323        );
1324
1325        assert_eq!(diff_records, expected);
1326
1327        Ok(())
1328    }
1329
1330    #[test]
1331    fn sort_by_first_and_second_col_first_col_val_is_equal_so_second_col_decides_order(
1332    ) -> Result<(), Box<dyn Error>> {
1333        let mut diff_records = DiffByteRecords::new(
1334            vec![
1335                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1336                    csv::ByteRecord::from(vec!["x", "b", "_"]),
1337                    3,
1338                )),
1339                DiffByteRecord::Add(ByteRecordLineInfo::new(
1340                    csv::ByteRecord::from(vec!["x", "a", "_"]),
1341                    4,
1342                )),
1343            ],
1344            Default::default(),
1345            None,
1346        );
1347
1348        diff_records.sort_by_columns(vec![0, 1])?;
1349
1350        let expected = DiffByteRecords::new(
1351            vec![
1352                DiffByteRecord::Add(ByteRecordLineInfo::new(
1353                    csv::ByteRecord::from(vec!["x", "a", "_"]),
1354                    4,
1355                )),
1356                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1357                    csv::ByteRecord::from(vec!["x", "b", "_"]),
1358                    3,
1359                )),
1360            ],
1361            Default::default(),
1362            None,
1363        );
1364
1365        assert_eq!(diff_records, expected);
1366
1367        Ok(())
1368    }
1369
1370    #[test]
1371    fn sort_by_first_second_and_third_col_first_and_second_col_val_is_equal_so_third_col_decides_order(
1372    ) -> Result<(), Box<dyn Error>> {
1373        let mut diff_records = DiffByteRecords::new(
1374            vec![
1375                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1376                    csv::ByteRecord::from(vec!["x", "a", "z"]),
1377                    3,
1378                )),
1379                DiffByteRecord::Add(ByteRecordLineInfo::new(
1380                    csv::ByteRecord::from(vec!["x", "a", "i"]),
1381                    4,
1382                )),
1383            ],
1384            Default::default(),
1385            None,
1386        );
1387
1388        diff_records.sort_by_columns(vec![0, 1, 2])?;
1389
1390        let expected = DiffByteRecords::new(
1391            vec![
1392                DiffByteRecord::Add(ByteRecordLineInfo::new(
1393                    csv::ByteRecord::from(vec!["x", "a", "i"]),
1394                    4,
1395                )),
1396                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1397                    csv::ByteRecord::from(vec!["x", "a", "z"]),
1398                    3,
1399                )),
1400            ],
1401            Default::default(),
1402            None,
1403        );
1404
1405        assert_eq!(diff_records, expected);
1406
1407        Ok(())
1408    }
1409
1410    #[test]
1411    fn sort_by_first_second_and_third_col_back_to_front_third_and_second_col_val_is_equal_so_first_col_decides_order(
1412    ) -> Result<(), Box<dyn Error>> {
1413        let mut diff_records = DiffByteRecords::new(
1414            vec![
1415                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1416                    csv::ByteRecord::from(vec!["2", "a", "z"]),
1417                    3,
1418                )),
1419                DiffByteRecord::Add(ByteRecordLineInfo::new(
1420                    csv::ByteRecord::from(vec!["1", "a", "z"]),
1421                    4,
1422                )),
1423            ],
1424            Default::default(),
1425            None,
1426        );
1427
1428        diff_records.sort_by_columns(vec![2, 1, 0])?;
1429
1430        let expected = DiffByteRecords::new(
1431            vec![
1432                DiffByteRecord::Add(ByteRecordLineInfo::new(
1433                    csv::ByteRecord::from(vec!["1", "a", "z"]),
1434                    4,
1435                )),
1436                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1437                    csv::ByteRecord::from(vec!["2", "a", "z"]),
1438                    3,
1439                )),
1440            ],
1441            Default::default(),
1442            None,
1443        );
1444
1445        assert_eq!(diff_records, expected);
1446
1447        Ok(())
1448    }
1449
1450    #[test]
1451    fn sort_by_col_delete_must_be_smaller_than_add_when_otherwise_identical(
1452    ) -> Result<(), Box<dyn Error>> {
1453        let mut diff_records = DiffByteRecords::new(
1454            vec![
1455                DiffByteRecord::Add(ByteRecordLineInfo::new(
1456                    csv::ByteRecord::from(vec!["same", "_", "_"]),
1457                    4,
1458                )),
1459                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1460                    csv::ByteRecord::from(vec!["same", "_", "_"]),
1461                    5,
1462                )),
1463            ],
1464            Default::default(),
1465            None,
1466        );
1467
1468        diff_records.sort_by_columns(vec![0])?;
1469
1470        let expected = DiffByteRecords::new(
1471            vec![
1472                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1473                    csv::ByteRecord::from(vec!["same", "_", "_"]),
1474                    5,
1475                )),
1476                DiffByteRecord::Add(ByteRecordLineInfo::new(
1477                    csv::ByteRecord::from(vec!["same", "_", "_"]),
1478                    4,
1479                )),
1480            ],
1481            Default::default(),
1482            None,
1483        );
1484
1485        assert_eq!(diff_records, expected);
1486
1487        Ok(())
1488    }
1489
1490    #[test]
1491    fn sort_by_col_with_three_items_first_and_second_by_first_col_second_and_third_by_second_col(
1492    ) -> Result<(), Box<dyn Error>> {
1493        let mut diff_records = DiffByteRecords::new(
1494            vec![
1495                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1496                    csv::ByteRecord::from(vec!["1", "b", "_"]),
1497                    3,
1498                )),
1499                DiffByteRecord::Add(ByteRecordLineInfo::new(
1500                    csv::ByteRecord::from(vec!["1", "a", "_"]),
1501                    4,
1502                )),
1503                DiffByteRecord::Add(ByteRecordLineInfo::new(
1504                    csv::ByteRecord::from(vec!["0", "a", "_"]),
1505                    4,
1506                )),
1507            ],
1508            Default::default(),
1509            None,
1510        );
1511
1512        diff_records.sort_by_columns(vec![0, 1])?;
1513
1514        let expected = DiffByteRecords::new(
1515            vec![
1516                DiffByteRecord::Add(ByteRecordLineInfo::new(
1517                    csv::ByteRecord::from(vec!["0", "a", "_"]),
1518                    4,
1519                )),
1520                DiffByteRecord::Add(ByteRecordLineInfo::new(
1521                    csv::ByteRecord::from(vec!["1", "a", "_"]),
1522                    4,
1523                )),
1524                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1525                    csv::ByteRecord::from(vec!["1", "b", "_"]),
1526                    3,
1527                )),
1528            ],
1529            Default::default(),
1530            None,
1531        );
1532
1533        assert_eq!(diff_records, expected);
1534
1535        Ok(())
1536    }
1537
1538    #[test]
1539    fn sort_by_col_delete_compared_with_modify_delete() -> Result<(), Box<dyn Error>> {
1540        let mut diff_records = DiffByteRecords::new(
1541            vec![
1542                DiffByteRecord::Modify {
1543                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1544                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1545                    field_indices: vec![],
1546                },
1547                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1548                    csv::ByteRecord::from(vec!["b", "_", "_"]),
1549                    4,
1550                )),
1551            ],
1552            Default::default(),
1553            None,
1554        );
1555
1556        diff_records.sort_by_columns(vec![0])?;
1557
1558        let expected = diff_records.clone();
1559
1560        assert_eq!(diff_records, expected);
1561
1562        Ok(())
1563    }
1564
1565    #[test]
1566    fn sort_by_col_delete_compared_with_modify_delete_are_equal_fall_back_to_compare_with_modify_add(
1567    ) -> Result<(), Box<dyn Error>> {
1568        let mut diff_records = DiffByteRecords::new(
1569            vec![
1570                DiffByteRecord::Modify {
1571                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1572                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1573                    field_indices: vec![],
1574                },
1575                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1576                    csv::ByteRecord::from(vec!["c", "_", "_"]),
1577                    4,
1578                )),
1579            ],
1580            Default::default(),
1581            None,
1582        );
1583
1584        diff_records.sort_by_columns(vec![0])?;
1585
1586        let expected = diff_records.clone();
1587
1588        assert_eq!(diff_records, expected);
1589
1590        Ok(())
1591    }
1592
1593    #[test]
1594    fn sort_by_col_delete_must_be_smaller_than_modify_when_otherwise_identical(
1595    ) -> Result<(), Box<dyn Error>> {
1596        let mut diff_records = DiffByteRecords::new(
1597            vec![
1598                DiffByteRecord::Modify {
1599                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1600                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1601                    field_indices: vec![],
1602                },
1603                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1604                    csv::ByteRecord::from(vec!["c", "_", "_"]),
1605                    4,
1606                )),
1607            ],
1608            Default::default(),
1609            None,
1610        );
1611
1612        diff_records.sort_by_columns(vec![0])?;
1613
1614        let expected = DiffByteRecords::new(
1615            vec![
1616                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1617                    csv::ByteRecord::from(vec!["c", "_", "_"]),
1618                    4,
1619                )),
1620                DiffByteRecord::Modify {
1621                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1622                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1623                    field_indices: vec![],
1624                },
1625            ],
1626            Default::default(),
1627            None,
1628        );
1629
1630        assert_eq!(diff_records, expected);
1631
1632        Ok(())
1633    }
1634
1635    #[test]
1636    fn sort_by_col_modify_delete_compared_with_add() -> Result<(), Box<dyn Error>> {
1637        let mut diff_records = DiffByteRecords::new(
1638            vec![
1639                DiffByteRecord::Modify {
1640                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1641                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1642                    field_indices: vec![],
1643                },
1644                DiffByteRecord::Add(ByteRecordLineInfo::new(
1645                    csv::ByteRecord::from(vec!["b", "_", "_"]),
1646                    4,
1647                )),
1648            ],
1649            Default::default(),
1650            None,
1651        );
1652
1653        diff_records.sort_by_columns(vec![0])?;
1654
1655        let expected = diff_records.clone();
1656
1657        assert_eq!(diff_records, expected);
1658
1659        Ok(())
1660    }
1661
1662    #[test]
1663    fn sort_by_col_add_compared_with_modify_delete() -> Result<(), Box<dyn Error>> {
1664        let mut diff_records = DiffByteRecords::new(
1665            vec![
1666                DiffByteRecord::Add(ByteRecordLineInfo::new(
1667                    csv::ByteRecord::from(vec!["b", "_", "_"]),
1668                    4,
1669                )),
1670                DiffByteRecord::Modify {
1671                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1672                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1673                    field_indices: vec![],
1674                },
1675            ],
1676            Default::default(),
1677            None,
1678        );
1679
1680        diff_records.sort_by_columns(vec![0])?;
1681
1682        let expected = DiffByteRecords::new(
1683            vec![
1684                DiffByteRecord::Modify {
1685                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1686                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1687                    field_indices: vec![],
1688                },
1689                DiffByteRecord::Add(ByteRecordLineInfo::new(
1690                    csv::ByteRecord::from(vec!["b", "_", "_"]),
1691                    4,
1692                )),
1693            ],
1694            Default::default(),
1695            None,
1696        );
1697
1698        assert_eq!(diff_records, expected);
1699
1700        Ok(())
1701    }
1702
1703    #[test]
1704    fn sort_by_col_modify_delete_compared_with_add_are_equal_fall_back_to_compare_with_modify_add(
1705    ) -> Result<(), Box<dyn Error>> {
1706        let mut diff_records = DiffByteRecords::new(
1707            vec![
1708                DiffByteRecord::Modify {
1709                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1710                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1711                    field_indices: vec![],
1712                },
1713                DiffByteRecord::Add(ByteRecordLineInfo::new(
1714                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1715                    4,
1716                )),
1717            ],
1718            Default::default(),
1719            None,
1720        );
1721
1722        diff_records.sort_by_columns(vec![0])?;
1723
1724        let expected = DiffByteRecords::new(
1725            vec![
1726                DiffByteRecord::Add(ByteRecordLineInfo::new(
1727                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1728                    4,
1729                )),
1730                DiffByteRecord::Modify {
1731                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1732                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1733                    field_indices: vec![],
1734                },
1735            ],
1736            Default::default(),
1737            None,
1738        );
1739
1740        assert_eq!(diff_records, expected);
1741
1742        Ok(())
1743    }
1744
1745    #[test]
1746    fn sort_by_col_add_must_be_greater_than_modify_when_otherwise_identical(
1747    ) -> Result<(), Box<dyn Error>> {
1748        let mut diff_records = DiffByteRecords::new(
1749            vec![
1750                DiffByteRecord::Modify {
1751                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1752                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1753                    field_indices: vec![],
1754                },
1755                DiffByteRecord::Add(ByteRecordLineInfo::new(
1756                    csv::ByteRecord::from(vec!["c", "_", "_"]),
1757                    4,
1758                )),
1759            ],
1760            Default::default(),
1761            None,
1762        );
1763
1764        diff_records.sort_by_columns(vec![0])?;
1765
1766        let expected = diff_records.clone();
1767
1768        assert_eq!(diff_records, expected);
1769
1770        Ok(())
1771    }
1772
1773    #[test]
1774    fn sort_by_col_modify_delete_compared_with_modify_delete() -> Result<(), Box<dyn Error>> {
1775        let mut diff_records = DiffByteRecords::new(
1776            vec![
1777                DiffByteRecord::Modify {
1778                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["d", "_", "_"]), 1),
1779                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1780                    field_indices: vec![],
1781                },
1782                DiffByteRecord::Modify {
1783                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1784                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1785                    field_indices: vec![],
1786                },
1787            ],
1788            Default::default(),
1789            None,
1790        );
1791
1792        diff_records.sort_by_columns(vec![0])?;
1793
1794        let expected = DiffByteRecords::new(
1795            vec![
1796                DiffByteRecord::Modify {
1797                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1798                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1799                    field_indices: vec![],
1800                },
1801                DiffByteRecord::Modify {
1802                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["d", "_", "_"]), 1),
1803                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1804                    field_indices: vec![],
1805                },
1806            ],
1807            Default::default(),
1808            None,
1809        );
1810
1811        assert_eq!(diff_records, expected);
1812
1813        Ok(())
1814    }
1815
1816    #[test]
1817    fn sort_by_col_modify_delete_compared_with_modify_delete_are_equal_fall_back_to_compare_modify_add_with_modify_add(
1818    ) -> Result<(), Box<dyn Error>> {
1819        let mut diff_records = DiffByteRecords::new(
1820            vec![
1821                DiffByteRecord::Modify {
1822                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1823                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1824                    field_indices: vec![],
1825                },
1826                DiffByteRecord::Modify {
1827                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1828                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1829                    field_indices: vec![],
1830                },
1831            ],
1832            Default::default(),
1833            None,
1834        );
1835
1836        diff_records.sort_by_columns(vec![0])?;
1837
1838        let expected = DiffByteRecords::new(
1839            vec![
1840                DiffByteRecord::Modify {
1841                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1842                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1843                    field_indices: vec![],
1844                },
1845                DiffByteRecord::Modify {
1846                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1847                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1848                    field_indices: vec![],
1849                },
1850            ],
1851            Default::default(),
1852            None,
1853        );
1854
1855        assert_eq!(diff_records, expected);
1856
1857        Ok(())
1858    }
1859
1860    #[test]
1861    fn sort_by_col_modify_cmp_with_add_cmp_with_modify_cmp_with_delete(
1862    ) -> Result<(), Box<dyn Error>> {
1863        let mut diff_records = DiffByteRecords::new(
1864            vec![
1865                DiffByteRecord::Modify {
1866                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1867                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1868                    field_indices: vec![],
1869                },
1870                DiffByteRecord::Add(ByteRecordLineInfo::new(
1871                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1872                    4,
1873                )),
1874                DiffByteRecord::Modify {
1875                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1876                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1877                    field_indices: vec![],
1878                },
1879                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1880                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1881                    4,
1882                )),
1883            ],
1884            Default::default(),
1885            None,
1886        );
1887
1888        diff_records.sort_by_columns(vec![0])?;
1889
1890        let expected = DiffByteRecords::new(
1891            vec![
1892                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1893                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1894                    4,
1895                )),
1896                DiffByteRecord::Add(ByteRecordLineInfo::new(
1897                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1898                    4,
1899                )),
1900                DiffByteRecord::Modify {
1901                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1902                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1903                    field_indices: vec![],
1904                },
1905                DiffByteRecord::Modify {
1906                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1907                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1908                    field_indices: vec![],
1909                },
1910            ],
1911            Default::default(),
1912            None,
1913        );
1914
1915        assert_eq!(diff_records, expected);
1916
1917        Ok(())
1918    }
1919
1920    #[test]
1921    fn sort_by_col_idx_out_of_bounds_err() -> Result<(), Box<dyn Error>> {
1922        let mut diff_records = DiffByteRecords::new(
1923            vec![
1924                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1925                    csv::ByteRecord::from(vec!["a", "b", "c"]),
1926                    3,
1927                )),
1928                DiffByteRecord::Add(ByteRecordLineInfo::new(
1929                    csv::ByteRecord::from(vec!["a", "x", "y"]),
1930                    4,
1931                )),
1932            ],
1933            Default::default(),
1934            None,
1935        );
1936
1937        let res = diff_records.sort_by_columns(vec![3]);
1938
1939        assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
1940
1941        Ok(())
1942    }
1943
1944    #[test]
1945    fn sort_by_col_first_idx_ok_and_cmp_as_equal_second_idx_out_of_bounds_err_order_stays_the_same(
1946    ) -> Result<(), Box<dyn Error>> {
1947        let mut diff_records = DiffByteRecords::new(
1948            vec![
1949                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1950                    csv::ByteRecord::from(vec!["_", "same", "_"]),
1951                    3,
1952                )),
1953                DiffByteRecord::Add(ByteRecordLineInfo::new(
1954                    csv::ByteRecord::from(vec!["_", "same", "_"]),
1955                    4,
1956                )),
1957            ],
1958            Default::default(),
1959            None,
1960        );
1961
1962        let res = diff_records.sort_by_columns(vec![1, 3]);
1963
1964        assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
1965
1966        let expected = diff_records.clone();
1967
1968        assert_eq!(diff_records, expected);
1969
1970        Ok(())
1971    }
1972
1973    #[test]
1974    fn sort_by_col_first_idx_ok_and_cmp_not_equal_second_idx_out_of_bounds_but_no_err_because_first_idx_already_sorted(
1975    ) -> Result<(), Box<dyn Error>> {
1976        let mut diff_records = DiffByteRecords::new(
1977            vec![
1978                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1979                    csv::ByteRecord::from(vec!["_", "b", "_"]),
1980                    3,
1981                )),
1982                DiffByteRecord::Add(ByteRecordLineInfo::new(
1983                    csv::ByteRecord::from(vec!["_", "a", "_"]),
1984                    4,
1985                )),
1986            ],
1987            Default::default(),
1988            None,
1989        );
1990
1991        let res = diff_records.sort_by_columns(vec![1, 3]);
1992
1993        assert_eq!(res, Ok(()));
1994
1995        let expected = DiffByteRecords::new(
1996            vec![
1997                DiffByteRecord::Add(ByteRecordLineInfo::new(
1998                    csv::ByteRecord::from(vec!["_", "a", "_"]),
1999                    4,
2000                )),
2001                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2002                    csv::ByteRecord::from(vec!["_", "b", "_"]),
2003                    3,
2004                )),
2005            ],
2006            Default::default(),
2007            None,
2008        );
2009
2010        assert_eq!(diff_records, expected);
2011
2012        Ok(())
2013    }
2014
2015    #[test]
2016    fn sort_by_col_first_idx_out_of_bounds_err_second_idx_ok_sort_by_second_idx(
2017    ) -> Result<(), Box<dyn Error>> {
2018        let mut diff_records = DiffByteRecords::new(
2019            vec![
2020                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2021                    csv::ByteRecord::from(vec!["_", "b", "_"]),
2022                    3,
2023                )),
2024                DiffByteRecord::Add(ByteRecordLineInfo::new(
2025                    csv::ByteRecord::from(vec!["_", "a", "_"]),
2026                    4,
2027                )),
2028            ],
2029            Default::default(),
2030            None,
2031        );
2032
2033        let res = diff_records.sort_by_columns(vec![3, 1]);
2034
2035        assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
2036
2037        // it is still sorted by the second column
2038        let expected = DiffByteRecords::new(
2039            vec![
2040                DiffByteRecord::Add(ByteRecordLineInfo::new(
2041                    csv::ByteRecord::from(vec!["_", "a", "_"]),
2042                    4,
2043                )),
2044                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2045                    csv::ByteRecord::from(vec!["_", "b", "_"]),
2046                    3,
2047                )),
2048            ],
2049            Default::default(),
2050            None,
2051        );
2052
2053        assert_eq!(diff_records, expected);
2054
2055        Ok(())
2056    }
2057
2058    #[test]
2059    fn sort_by_col_first_idx_out_of_bounds_err_second_idx_ok_third_idx_out_of_bounds_sort_by_second_idx(
2060    ) -> Result<(), Box<dyn Error>> {
2061        let mut diff_records = DiffByteRecords::new(
2062            vec![
2063                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2064                    csv::ByteRecord::from(vec!["_", "b", "_"]),
2065                    3,
2066                )),
2067                DiffByteRecord::Add(ByteRecordLineInfo::new(
2068                    csv::ByteRecord::from(vec!["_", "a", "_"]),
2069                    4,
2070                )),
2071            ],
2072            Default::default(),
2073            None,
2074        );
2075
2076        let res = diff_records.sort_by_columns(vec![3, 1, 4]);
2077
2078        // we only get the first error that is encountered during the sort
2079        assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
2080
2081        // but it is still sorted by the second column
2082        let expected = DiffByteRecords::new(
2083            vec![
2084                DiffByteRecord::Add(ByteRecordLineInfo::new(
2085                    csv::ByteRecord::from(vec!["_", "a", "_"]),
2086                    4,
2087                )),
2088                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2089                    csv::ByteRecord::from(vec!["_", "b", "_"]),
2090                    3,
2091                )),
2092            ],
2093            Default::default(),
2094            None,
2095        );
2096
2097        assert_eq!(diff_records, expected);
2098
2099        Ok(())
2100    }
2101}