csv_diff/
diff_result.rs

1use crate::{
2    csv_headers::{Headers, HeadersParsed},
3    csv_parse_result::{CsvByteRecordWithHash, CsvLeftRightParseResult, Position, RecordHash},
4    csv_parser_hasher::HashMapValue,
5    diff_row::*,
6};
7use ahash::AHashMap as HashMap;
8use crossbeam_channel::{Receiver, Sender};
9use std::{
10    cmp::{max, Ordering},
11    collections::{hash_map::IntoIter, VecDeque},
12    convert::TryInto,
13};
14use thiserror::Error;
15
16/// Holds all information about the difference between two CSVs, after they have
17/// been compared with [`CsvByteDiffLocal.diff`](crate::csv_diff::CsvByteDiffLocal::diff).
18/// CSV records that are equal are __not__ stored in this structure.
19///
20/// Also, keep in mind, that differences are stored _unordered_ (with regard to the line in the CSV).
21/// You can use [`DiffByteRecords.sort_by_line`](DiffByteRecords::sort_by_line) to sort them in-place.
22///
23/// See the example on [`CsvByteDiffLocal`](crate::csv_diff::CsvByteDiffLocal) for general usage.
24#[derive(Debug, PartialEq, Clone)]
25pub struct DiffByteRecords {
26    inner: Vec<DiffByteRecord>,
27    headers: Headers,
28    num_columns: Option<usize>,
29}
30
31impl DiffByteRecords {
32    pub(crate) fn new(
33        inner: Vec<DiffByteRecord>,
34        headers: Headers,
35        num_columns: Option<usize>,
36    ) -> Self {
37        Self {
38            inner,
39            headers,
40            num_columns,
41        }
42    }
43
44    pub fn headers(&self) -> &Headers {
45        &self.headers
46    }
47
48    pub fn num_columns(&self) -> Option<usize> {
49        self.num_columns
50    }
51
52    /// Sort the underlying [`DiffByteRecord`](crate::diff_row::DiffByteRecord)s by line.
53    ///
54    /// Note that comparison is done in parallel. Therefore, __without calling this method__, the resulting `DiffByteRecord`s are out of order
55    /// after the comparison (with regard to their line in the original CSV).
56    pub fn sort_by_line(&mut self) {
57        self.inner
58            .sort_by(|a, b| match (a.line_num(), b.line_num()) {
59                (LineNum::OneSide(line_num_a), LineNum::OneSide(line_num_b)) => line_num_a
60                    .cmp(&line_num_b)
61                    .then(if matches!(a, DiffByteRecord::Delete(..)) {
62                        Ordering::Less
63                    } else {
64                        Ordering::Greater
65                    }),
66                (
67                    LineNum::OneSide(line_num_a),
68                    LineNum::BothSides {
69                        for_deleted: modify_line_del,
70                        for_added: modify_line_add,
71                    },
72                ) => line_num_a
73                    .cmp(if modify_line_del < modify_line_add {
74                        &modify_line_del
75                    } else {
76                        &modify_line_add
77                    })
78                    .then(if matches!(a, DiffByteRecord::Delete(..)) {
79                        Ordering::Less
80                    } else {
81                        Ordering::Greater
82                    }),
83                (
84                    LineNum::BothSides {
85                        for_deleted: modify_line_del,
86                        for_added: modify_line_add,
87                    },
88                    LineNum::OneSide(line_num_b),
89                ) => if modify_line_del < modify_line_add {
90                    &modify_line_del
91                } else {
92                    &modify_line_add
93                }
94                .cmp(&line_num_b)
95                .then(if matches!(b, DiffByteRecord::Add(..)) {
96                    Ordering::Less
97                } else {
98                    Ordering::Greater
99                }),
100                (
101                    LineNum::BothSides {
102                        for_deleted: modify_line_del_a,
103                        for_added: modify_line_add_a,
104                    },
105                    LineNum::BothSides {
106                        for_deleted: modify_line_del_b,
107                        for_added: modify_line_add_b,
108                    },
109                ) => if modify_line_del_a < modify_line_add_a {
110                    &modify_line_del_a
111                } else {
112                    &modify_line_add_a
113                }
114                .cmp(if modify_line_del_b < modify_line_add_b {
115                    &modify_line_del_b
116                } else {
117                    &modify_line_add_b
118                })
119                .then(modify_line_del_a.cmp(&modify_line_del_b))
120                .then(modify_line_add_a.cmp(&modify_line_add_b)),
121            })
122    }
123
124    // TODO: in the future, we might want to have something like Result<(), Vec<ColumnIdxError>> as a return value,
125    // so that we can report _all_ the errors that happened and not only the first one
126    pub fn sort_by_columns<E: Into<ColumnIdx>, I: IntoIterator<Item = E>>(
127        &mut self,
128        cols: I,
129    ) -> Result<(), ColumnIdxError> {
130        let cols_to_sort = cols.into_iter().map(|e| e.into()).collect::<Vec<_>>();
131        let mut error_maybe: Result<(), ColumnIdxError> = Ok(());
132        if !cols_to_sort.is_empty() {
133            self.inner.sort_by(|a, b| match (a, b) {
134                (DiffByteRecord::Add(add_l), DiffByteRecord::Add(add_r)) => cols_to_sort
135                    .iter()
136                    .find_map(|col_idx| {
137                        match (add_l, add_r)
138                            .cmp_by_col(col_idx)
139                            .map(|ord| (!ord.is_eq()).then(|| ord))
140                        {
141                            Ok(ord) => ord,
142                            Err(e) => {
143                                if !error_maybe.is_err() {
144                                    error_maybe = Err(e);
145                                }
146                                None
147                            }
148                        }
149                    })
150                    .unwrap_or(Ordering::Equal),
151                (
152                    DiffByteRecord::Add(left),
153                    DiffByteRecord::Modify {
154                        delete: mod_del,
155                        add: mod_add,
156                        field_indices: _field_indices,
157                    },
158                ) => cols_to_sort
159                    .iter()
160                    .find_map(|col_idx| {
161                        match (left, mod_del)
162                            .cmp_by_col(col_idx)
163                            .and_then(|ord| match ord {
164                                Ordering::Equal => (left, mod_add)
165                                    .cmp_by_col(col_idx)
166                                    .map(|ord| (!ord.is_eq()).then(|| ord)),
167                                _ => Ok(Some(ord)),
168                            }) {
169                            Ok(ord) => ord,
170                            Err(e) => {
171                                if !error_maybe.is_err() {
172                                    error_maybe = Err(e);
173                                }
174                                None
175                            }
176                        }
177                    })
178                    // `Add` should be treated as greater than `Modify`
179                    .unwrap_or(Ordering::Greater),
180                (DiffByteRecord::Add(add), DiffByteRecord::Delete(del)) => cols_to_sort
181                    .iter()
182                    .find_map(|col_idx| {
183                        match (add, del)
184                            .cmp_by_col(col_idx)
185                            .map(|ord| (!ord.is_eq()).then(|| ord))
186                        {
187                            Ok(ord) => ord,
188                            Err(e) => {
189                                if !error_maybe.is_err() {
190                                    error_maybe = Err(e);
191                                }
192                                None
193                            }
194                        }
195                    })
196                    // `Add` should be treated as greater than `Delete`
197                    .unwrap_or(Ordering::Greater),
198                (
199                    DiffByteRecord::Modify {
200                        delete: mod_del,
201                        add: mod_add,
202                        field_indices: _field_indices,
203                    },
204                    DiffByteRecord::Add(add),
205                ) => cols_to_sort
206                    .iter()
207                    .find_map(|col_idx| {
208                        match (mod_del, add)
209                            .cmp_by_col(col_idx)
210                            .and_then(|ord| match ord {
211                                Ordering::Equal => (mod_add, add)
212                                    .cmp_by_col(col_idx)
213                                    .map(|ord| (!ord.is_eq()).then(|| ord)),
214                                _ => Ok(Some(ord)),
215                            }) {
216                            Ok(ord) => ord,
217                            Err(e) => {
218                                if !error_maybe.is_err() {
219                                    error_maybe = Err(e);
220                                }
221                                None
222                            }
223                        }
224                    })
225                    // `Modify` should be treated as less than `Add`
226                    .unwrap_or(Ordering::Less),
227                (
228                    DiffByteRecord::Modify {
229                        delete: delete_l,
230                        add: add_l,
231                        field_indices: _field_indices_l,
232                    },
233                    DiffByteRecord::Modify {
234                        delete: delete_r,
235                        add: add_r,
236                        field_indices: _field_indices_r,
237                    },
238                ) => cols_to_sort
239                    .iter()
240                    .find_map(|col_idx| {
241                        match (delete_l, delete_r)
242                            .cmp_by_col(col_idx)
243                            .and_then(|ord| match ord {
244                                Ordering::Equal => (add_l, add_r)
245                                    .cmp_by_col(col_idx)
246                                    .map(|ord| (!ord.is_eq()).then(|| ord)),
247                                _ => Ok(Some(ord)),
248                            }) {
249                            Ok(ord) => ord,
250                            Err(e) => {
251                                if !error_maybe.is_err() {
252                                    error_maybe = Err(e);
253                                }
254                                None
255                            }
256                        }
257                    })
258                    .unwrap_or(Ordering::Equal),
259                (
260                    DiffByteRecord::Modify {
261                        delete: mod_del,
262                        add: mod_add,
263                        field_indices: _field_indices,
264                    },
265                    DiffByteRecord::Delete(del),
266                ) => cols_to_sort
267                    .iter()
268                    .find_map(|col_idx| {
269                        match (mod_del, del)
270                            .cmp_by_col(col_idx)
271                            .and_then(|ord| match ord {
272                                Ordering::Equal => (mod_add, del)
273                                    .cmp_by_col(col_idx)
274                                    .map(|ord| (!ord.is_eq()).then(|| ord)),
275                                _ => Ok(Some(ord)),
276                            }) {
277                            Ok(ord) => ord,
278                            Err(e) => {
279                                if !error_maybe.is_err() {
280                                    error_maybe = Err(e);
281                                }
282                                None
283                            }
284                        }
285                    })
286                    // `Modify` should be treated as greater than `Delete`
287                    .unwrap_or(Ordering::Greater),
288                (DiffByteRecord::Delete(del), DiffByteRecord::Add(add)) => cols_to_sort
289                    .iter()
290                    .find_map(|col_idx| {
291                        match (del, add)
292                            .cmp_by_col(col_idx)
293                            .map(|ord| (!ord.is_eq()).then(|| ord))
294                        {
295                            Ok(ord) => ord,
296                            Err(e) => {
297                                if !error_maybe.is_err() {
298                                    error_maybe = Err(e);
299                                }
300                                None
301                            }
302                        }
303                    })
304                    // `Delete` should be treated as less than `Add`
305                    .unwrap_or(Ordering::Less),
306                (
307                    DiffByteRecord::Delete(del),
308                    DiffByteRecord::Modify {
309                        delete: mod_del,
310                        add: mod_add,
311                        field_indices: _field_indices,
312                    },
313                ) => cols_to_sort
314                    .iter()
315                    .find_map(|col_idx| {
316                        match (del, mod_del)
317                            .cmp_by_col(col_idx)
318                            .and_then(|ord| match ord {
319                                Ordering::Equal => (del, mod_add)
320                                    .cmp_by_col(col_idx)
321                                    .map(|ord| (!ord.is_eq()).then(|| ord)),
322                                _ => Ok(Some(ord)),
323                            }) {
324                            Ok(ord) => ord,
325                            Err(e) => {
326                                if !error_maybe.is_err() {
327                                    error_maybe = Err(e);
328                                }
329                                None
330                            }
331                        }
332                    })
333                    // `Delete` should be treated as less than `Modify`
334                    .unwrap_or(Ordering::Less),
335                (DiffByteRecord::Delete(del_l), DiffByteRecord::Delete(del_r)) => cols_to_sort
336                    .iter()
337                    .find_map(|col_idx| {
338                        match (del_l, del_r)
339                            .cmp_by_col(col_idx)
340                            .map(|ord| (!ord.is_eq()).then(|| ord))
341                        {
342                            Ok(ord) => ord,
343                            Err(e) => {
344                                if !error_maybe.is_err() {
345                                    error_maybe = Err(e);
346                                }
347                                None
348                            }
349                        }
350                    })
351                    .unwrap_or(Ordering::Equal),
352            });
353        }
354        error_maybe
355    }
356
357    /// Return the `DiffByteRecord`s as a single slice.
358    /// # Example
359    #[cfg_attr(
360        feature = "rayon-threads",
361        doc = r##"
362    use csv_diff::{csv_diff::CsvByteDiffLocal, csv::Csv};
363    use std::collections::HashSet;
364    use std::iter::FromIterator;
365    # fn main() -> Result<(), Box<dyn std::error::Error>> {
366    // some csv data with a header, where the first column is a unique id
367    let csv_data_left = "id,name,kind\n\
368                         1,lemon,fruit\n\
369                         2,strawberry,fruit";
370    let csv_data_right = "id,name,kind\n\
371                          1,lemon,fruit\n\
372                          2,strawberry,nut\n\
373                          3,cherry,fruit";
374
375    let csv_byte_diff = CsvByteDiffLocal::new()?;
376
377    let mut diff_byte_records = csv_byte_diff.diff(
378        Csv::with_reader_seek(csv_data_left.as_bytes()),
379        Csv::with_reader_seek(csv_data_right.as_bytes()),
380    )?;
381    
382    let diff_byte_record_slice = diff_byte_records.as_slice();
383
384    assert_eq!(
385        diff_byte_record_slice.len(),
386        2
387    );
388    Ok(())
389    # }
390    "##
391    )]
392    pub fn as_slice(&self) -> &[DiffByteRecord] {
393        &self.inner.as_slice()
394    }
395
396    /// Return an iterator over the `DiffByteRecord`s.
397    pub fn iter(&self) -> core::slice::Iter<'_, DiffByteRecord> {
398        self.inner.iter()
399    }
400}
401
402trait CmpByColumn {
403    fn cmp_by_col(&self, col_idx: &ColumnIdx) -> Result<Ordering, ColumnIdxError>;
404}
405
406impl CmpByColumn for (&ByteRecordLineInfo, &ByteRecordLineInfo) {
407    #[inline]
408    fn cmp_by_col(&self, col_idx: &ColumnIdx) -> Result<Ordering, ColumnIdxError> {
409        let idx_for_both = col_idx
410            .idx_for_both()
411            .expect("idx, because it is the only enum variant");
412        let &(brli_left, brli_right) = self;
413        brli_left
414            .byte_record()
415            .get(idx_for_both)
416            .zip(brli_right.byte_record().get(idx_for_both))
417            .map(|(a, b)| a.cmp(b))
418            .ok_or(ColumnIdxError::IdxOutOfBounds {
419                idx: idx_for_both,
420                len: brli_left.byte_record().len(),
421            })
422    }
423}
424
425pub enum ColumnIdx {
426    IdxForBoth(usize),
427    // TODO: we will implement this later - right now it will be too complicated
428    // TODO: instead of String, we should use `AsRef<[u8]>`
429    // HeaderForBoth(String),
430    // HeaderLeftIdxRight(String, usize),
431    // HeaderLeftHeaderRight(String, String),
432    // IdxLeftHeaderRight(usize, String),
433    // IdxLeftIdxRight(usize, usize),
434}
435
436impl ColumnIdx {
437    #[inline]
438    fn idx_for_both(&self) -> Option<usize> {
439        match self {
440            &Self::IdxForBoth(idx) => Some(idx),
441        }
442    }
443}
444
445// TODO: we will implement this later - right now it will be too complicated
446// impl From<String> for ColumnIdx {
447//     fn from(value: String) -> Self {
448//         Self::Header(value)
449//     }
450// }
451
452// impl From<&str> for ColumnIdx {
453//     fn from(value: &str) -> Self {
454//         Self::Header(value.into())
455//     }
456// }
457
458impl From<usize> for ColumnIdx {
459    fn from(value: usize) -> Self {
460        Self::IdxForBoth(value)
461    }
462}
463
464#[derive(Debug, Error, PartialEq)]
465pub enum ColumnIdxError {
466    // TODO: we will implement this later - right now it will be too complicated
467    // #[error(r#"the header name "{0}" does not exist"#)]
468    // NoSuchHeaderName(AsRef<[u8]>),
469    #[error("the column index `{idx}` exceeds the total number of columns ({len})")]
470    IdxOutOfBounds { idx: usize, len: usize },
471}
472
473impl IntoIterator for DiffByteRecords {
474    type Item = DiffByteRecord;
475    type IntoIter = DiffByteRecordsIntoIterator;
476
477    fn into_iter(self) -> Self::IntoIter {
478        let num_columns = self.num_columns();
479        DiffByteRecordsIntoIterator {
480            inner: self.inner.into_iter(),
481            headers: self.headers,
482            num_columns,
483        }
484    }
485}
486
487/// Consuming iterator that can be created from [`DiffByteRecords`](DiffByteRecords)
488pub struct DiffByteRecordsIntoIterator {
489    inner: std::vec::IntoIter<DiffByteRecord>,
490    headers: Headers,
491    num_columns: Option<usize>,
492}
493
494impl Iterator for DiffByteRecordsIntoIterator {
495    type Item = DiffByteRecord;
496
497    fn next(&mut self) -> Option<Self::Item> {
498        self.inner.next()
499    }
500}
501
502impl DiffByteRecordsIntoIterator {
503    pub fn headers(&self) -> &Headers {
504        &self.headers
505    }
506
507    pub fn num_columns(&self) -> Option<usize> {
508        self.num_columns
509    }
510}
511
512pub(crate) type CsvHashValueMap = HashMap<u128, HashMapValue<Position, RecordHash>>;
513pub(crate) type CsvByteRecordValueMap = HashMap<u128, HashMapValue<csv::ByteRecord>>;
514
515struct MaxCapacityThreshold(usize);
516
517impl MaxCapacityThreshold {
518    #[inline]
519    fn value(&self) -> usize {
520        self.0
521    }
522    fn calc_new(&mut self, current_line: u64) {
523        if current_line % 100 == 0 {
524            self.0 = max(
525                10,
526                (current_line / 100)
527                    .try_into()
528                    .unwrap_or(usize::max_value()),
529            );
530        }
531    }
532}
533
534/// Emits all information about the difference between two CSVs as
535/// [`Result`](::csv::Result)<[`DiffByteRecord`](crate::diff_row::DiffByteRecord)>, after they have been compared with
536/// [`CsvByteDiff.diff`](crate::csv_diff::CsvByteDiff::diff).
537/// CSV records that are equal are __not__ emitted by this iterator.
538///
539/// Also, keep in mind, that this iterator produces values _unordered_ (with regard to the line in the CSV).
540/// If you want to have them ordered, you first need to collect them into [`DiffByteRecords`] and then use
541/// [`DiffByteRecords.sort_by_line`](DiffByteRecords::sort_by_line) to sort them in-place.
542///
543/// See the example on [`CsvByteDiff`](crate::csv_diff::CsvByteDiff) for general usage.
544pub struct DiffByteRecordsIterator {
545    buf: VecDeque<csv::Result<DiffByteRecord>>,
546    headers: HeadersParsed,
547    num_columns: Option<usize>,
548    csv_left_right_parse_results: std::iter::Chain<
549        std::vec::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
550        crossbeam_channel::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
551    >,
552    csv_records_left_map: CsvByteRecordValueMap,
553    csv_records_left_map_iter: Option<IntoIter<u128, HashMapValue<csv::ByteRecord>>>,
554    csv_records_right_map: CsvByteRecordValueMap,
555    csv_records_right_map_iter: Option<IntoIter<u128, HashMapValue<csv::ByteRecord>>>,
556    intermediate_left_map: CsvByteRecordValueMap,
557    intermediate_right_map: CsvByteRecordValueMap,
558    max_capacity_left_map: MaxCapacityThreshold,
559    max_capacity_right_map: MaxCapacityThreshold,
560    sender_csv_records_recycle: Sender<csv::ByteRecord>,
561}
562
563impl DiffByteRecordsIterator {
564    pub(crate) fn new(
565        csv_left_right_parse_results: std::iter::Chain<
566            std::vec::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
567            crossbeam_channel::IntoIter<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
568        >,
569        sender_csv_records_recycle: Sender<csv::ByteRecord>,
570        headers: HeadersParsed,
571        num_columns: Option<usize>,
572    ) -> Self {
573        Self {
574            buf: Default::default(),
575            headers,
576            num_columns,
577            csv_left_right_parse_results,
578            csv_records_left_map: HashMap::new(),
579            csv_records_left_map_iter: None,
580            csv_records_right_map: HashMap::new(),
581            csv_records_right_map_iter: None,
582            intermediate_left_map: HashMap::new(),
583            intermediate_right_map: HashMap::new(),
584            max_capacity_left_map: MaxCapacityThreshold(10),
585            max_capacity_right_map: MaxCapacityThreshold(10),
586            sender_csv_records_recycle,
587        }
588    }
589
590    pub fn headers(&self) -> &HeadersParsed {
591        &self.headers
592    }
593
594    /// Return the number of fields a [`DiffByteRecord`] will have that is
595    /// yielded by this iterator.
596    /// It produces `None`, if:
597    /// - both CSVs are empty and `has_headers` == `false` or
598    /// - the first record of both the left and right CSV couldn't be parsed successfully
599    ///
600    /// Note: This produces only one value, because when comparing two CSVs, they
601    /// must have the same number of fields for each record.
602    pub fn num_columns(&self) -> Option<usize> {
603        self.num_columns
604    }
605
606    pub fn try_to_diff_byte_records(mut self) -> csv::Result<DiffByteRecords> {
607        let num_cols = self.num_columns();
608        let headers_parsed = std::mem::take(&mut self.headers);
609        let headers: Headers = headers_parsed.try_into()?;
610        let diff_records = self.collect::<csv::Result<_>>()?;
611        Ok(DiffByteRecords::new(diff_records, headers, num_cols))
612    }
613}
614
615impl Iterator for DiffByteRecordsIterator {
616    type Item = csv::Result<DiffByteRecord>;
617
618    fn next(&mut self) -> Option<Self::Item> {
619        if !self.buf.is_empty() {
620            return self.buf.pop_front();
621        }
622        for csv_left_right_parse_result in &mut self.csv_left_right_parse_results {
623            match csv_left_right_parse_result {
624                CsvLeftRightParseResult::Left(CsvByteRecordWithHash {
625                    byte_record: Ok(byte_record_left),
626                    record_hash: record_hash_left,
627                }) => {
628                    let byte_record_left_line =
629                        // TODO: the closure _might_ be a performance bottleneck!?
630                        byte_record_left.position().map_or(0, |pos| pos.line());
631                    match self.csv_records_right_map.get_mut(&record_hash_left.key) {
632                        Some(hash_map_val) => {
633                            if let HashMapValue::Initial(record_hash_right, byte_record_right) =
634                                hash_map_val
635                            {
636                                if record_hash_left.record_hash != *record_hash_right {
637                                    *hash_map_val = HashMapValue::Modified(
638                                        byte_record_left,
639                                        std::mem::take(byte_record_right),
640                                    );
641                                } else {
642                                    *hash_map_val = HashMapValue::Equal(
643                                        byte_record_left,
644                                        std::mem::take(byte_record_right),
645                                    );
646                                }
647                            }
648                        }
649                        None => {
650                            self.csv_records_left_map.insert(
651                                record_hash_left.key,
652                                HashMapValue::Initial(
653                                    record_hash_left.record_hash,
654                                    byte_record_left,
655                                ),
656                            );
657                        }
658                    }
659                    if self.max_capacity_right_map.value() > 0
660                        && byte_record_left_line % self.max_capacity_right_map.value() as u64 == 0
661                    {
662                        self.max_capacity_right_map.calc_new(byte_record_left_line);
663                        for (k, v) in self.csv_records_right_map.drain() {
664                            match v {
665                                HashMapValue::Equal(byte_record_left, byte_record_right) => {
666                                    // can be recycled, so we send it upstream;
667                                    // if receiver is already gone, we ignore the error that occurs when sending,
668                                    // which only leads to the byte record not being recycled (it can't be recycled,
669                                    // because upstream has finished it's work)
670                                    let _ = self.sender_csv_records_recycle.send(byte_record_left);
671                                    let _ = self.sender_csv_records_recycle.send(byte_record_right);
672                                }
673                                HashMapValue::Initial(_hash, ref _byte_record) => {
674                                    // put it back, because we don't know what to do with this value yet
675                                    self.intermediate_right_map.insert(k, v);
676                                }
677                                HashMapValue::Modified(left_byte_record, right_byte_record) => {
678                                    let fields_modified = left_byte_record
679                                        .iter()
680                                        .enumerate()
681                                        .zip(right_byte_record.iter())
682                                        .fold(
683                                            Vec::new(),
684                                            |mut acc, ((idx, field_left), field_right)| {
685                                                if field_left != field_right {
686                                                    acc.push(idx);
687                                                }
688                                                acc
689                                            },
690                                        );
691                                    let left_byte_record_line = left_byte_record
692                                        .position()
693                                        // TODO: handle error (although it shouldn't error here)
694                                        .expect("a record position")
695                                        .line();
696                                    let right_byte_record_line = right_byte_record
697                                        .position()
698                                        // TODO: handle error (although it shouldn't error here)
699                                        .expect("a record position")
700                                        .line();
701                                    self.buf.push_back(Ok(DiffByteRecord::Modify {
702                                        add: ByteRecordLineInfo::new(
703                                            right_byte_record,
704                                            right_byte_record_line,
705                                        ),
706                                        delete: ByteRecordLineInfo::new(
707                                            left_byte_record,
708                                            left_byte_record_line,
709                                        ),
710                                        field_indices: fields_modified,
711                                    }));
712                                }
713                            }
714                        }
715                        std::mem::swap(
716                            &mut self.intermediate_right_map,
717                            &mut self.csv_records_right_map,
718                        );
719                        if !self.buf.is_empty() {
720                            break;
721                        }
722                    }
723                }
724                CsvLeftRightParseResult::Left(CsvByteRecordWithHash {
725                    byte_record: Err(byte_record_left_err),
726                    ..
727                }) => {
728                    self.buf.push_back(Err(byte_record_left_err));
729                    break;
730                }
731                CsvLeftRightParseResult::Right(CsvByteRecordWithHash {
732                    byte_record: Ok(byte_record_right),
733                    record_hash: record_hash_right,
734                }) => {
735                    // TODO: the closure _might_ be a performance bottleneck!?
736                    let byte_record_right_line =
737                        byte_record_right.position().map_or(0, |pos| pos.line());
738                    match self.csv_records_left_map.get_mut(&record_hash_right.key) {
739                        Some(hash_map_val) => {
740                            if let HashMapValue::Initial(record_hash_left, byte_record_left) =
741                                hash_map_val
742                            {
743                                if *record_hash_left != record_hash_right.record_hash {
744                                    *hash_map_val = HashMapValue::Modified(
745                                        std::mem::take(byte_record_left),
746                                        byte_record_right,
747                                    );
748                                } else {
749                                    *hash_map_val = HashMapValue::Equal(
750                                        std::mem::take(byte_record_left),
751                                        byte_record_right,
752                                    );
753                                }
754                            }
755                        }
756                        None => {
757                            self.csv_records_right_map.insert(
758                                record_hash_right.key,
759                                HashMapValue::Initial(
760                                    record_hash_right.record_hash,
761                                    byte_record_right,
762                                ),
763                            );
764                        }
765                    }
766                    if self.max_capacity_left_map.value() > 0
767                        && byte_record_right_line % self.max_capacity_left_map.value() as u64 == 0
768                    {
769                        self.max_capacity_left_map.calc_new(byte_record_right_line);
770                        for (k, v) in self.csv_records_left_map.drain() {
771                            match v {
772                                HashMapValue::Equal(byte_record_left, byte_record_right) => {
773                                    // can be recycled, so we send it upstream;
774                                    // if receiver is already gone, we ignore the error that occurs when sending,
775                                    // which only leads to the byte record not being recycled (it can't be recycled,
776                                    // because upstream has finished it's work)
777                                    let _ = self.sender_csv_records_recycle.send(byte_record_left);
778                                    let _ = self.sender_csv_records_recycle.send(byte_record_right);
779                                }
780                                HashMapValue::Initial(_hash, ref _byte_record) => {
781                                    // put it back, because we don't know what to do with this value yet
782                                    self.intermediate_left_map.insert(k, v);
783                                }
784                                HashMapValue::Modified(left_byte_record, right_byte_record) => {
785                                    let fields_modified = left_byte_record
786                                        .iter()
787                                        .enumerate()
788                                        .zip(right_byte_record.iter())
789                                        .fold(
790                                            Vec::new(),
791                                            |mut acc, ((idx, field_left), field_right)| {
792                                                if field_left != field_right {
793                                                    acc.push(idx);
794                                                }
795                                                acc
796                                            },
797                                        );
798                                    let left_byte_record_line = left_byte_record
799                                        .position()
800                                        .expect("a record position")
801                                        .line();
802                                    let right_byte_record_line = right_byte_record
803                                        .position()
804                                        .expect("a record position")
805                                        .line();
806                                    self.buf.push_back(Ok(DiffByteRecord::Modify {
807                                        add: ByteRecordLineInfo::new(
808                                            right_byte_record,
809                                            right_byte_record_line,
810                                        ),
811                                        delete: ByteRecordLineInfo::new(
812                                            left_byte_record,
813                                            left_byte_record_line,
814                                        ),
815                                        field_indices: fields_modified,
816                                    }));
817                                }
818                            }
819                        }
820                        std::mem::swap(
821                            &mut self.intermediate_left_map,
822                            &mut self.csv_records_left_map,
823                        );
824                        if !self.buf.is_empty() {
825                            break;
826                        }
827                    }
828                }
829                CsvLeftRightParseResult::Right(CsvByteRecordWithHash {
830                    byte_record: Err(e),
831                    ..
832                }) => {
833                    self.buf.push_back(Err(e));
834                    break;
835                }
836            }
837        }
838
839        if !self.buf.is_empty() {
840            return self.buf.pop_front();
841        }
842
843        let iter_left_map = self
844            .csv_records_left_map_iter
845            .get_or_insert(std::mem::take(&mut self.csv_records_left_map).into_iter());
846
847        let mut iter_left_map =
848            iter_left_map.skip_while(|(_, v)| matches!(v, HashMapValue::Equal(_, _)));
849        match iter_left_map.next() {
850            Some((_, HashMapValue::Initial(_hash, byte_record))) => {
851                let line = byte_record.position().expect("a record position").line();
852                return Some(Ok(DiffByteRecord::Delete(ByteRecordLineInfo::new(
853                    byte_record,
854                    line,
855                ))));
856            }
857            Some((_, HashMapValue::Modified(left_byte_record, right_byte_record))) => {
858                let fields_modified = left_byte_record
859                    .iter()
860                    .enumerate()
861                    .zip(right_byte_record.iter())
862                    .fold(Vec::new(), |mut acc, ((idx, field_left), field_right)| {
863                        if field_left != field_right {
864                            acc.push(idx);
865                        }
866                        acc
867                    });
868                let left_byte_record_line = left_byte_record
869                    .position()
870                    .expect("a record position")
871                    .line();
872                let right_byte_record_line = right_byte_record
873                    .position()
874                    .expect("a record position")
875                    .line();
876                return Some(Ok(DiffByteRecord::Modify {
877                    add: ByteRecordLineInfo::new(right_byte_record, right_byte_record_line),
878                    delete: ByteRecordLineInfo::new(left_byte_record, left_byte_record_line),
879                    field_indices: fields_modified,
880                }));
881            }
882            _ => (),
883        }
884
885        let iter_right_map = self
886            .csv_records_right_map_iter
887            .get_or_insert(std::mem::take(&mut self.csv_records_right_map).into_iter());
888
889        let mut iter_right_map =
890            iter_right_map.skip_while(|(_, v)| matches!(v, HashMapValue::Equal(_, _)));
891        match iter_right_map.next() {
892            Some((_, HashMapValue::Initial(_hash, byte_record))) => {
893                let line = byte_record.position().expect("a record position").line();
894                return Some(Ok(DiffByteRecord::Add(ByteRecordLineInfo::new(
895                    byte_record,
896                    line,
897                ))));
898            }
899            Some((_, HashMapValue::Modified(left_byte_record, right_byte_record))) => {
900                let fields_modified = left_byte_record
901                    .iter()
902                    .enumerate()
903                    .zip(right_byte_record.iter())
904                    .fold(Vec::new(), |mut acc, ((idx, field_left), field_right)| {
905                        if field_left != field_right {
906                            acc.push(idx);
907                        }
908                        acc
909                    });
910                let left_byte_record_line = left_byte_record
911                    .position()
912                    .expect("a record position")
913                    .line();
914                let right_byte_record_line = right_byte_record
915                    .position()
916                    .expect("a record position")
917                    .line();
918                return Some(Ok(DiffByteRecord::Modify {
919                    add: ByteRecordLineInfo::new(right_byte_record, right_byte_record_line),
920                    delete: ByteRecordLineInfo::new(left_byte_record, left_byte_record_line),
921                    field_indices: fields_modified,
922                }));
923            }
924            _ => (),
925        }
926        None
927    }
928}
929
930pub(crate) struct DiffByteRecordFirstRow {
931    csv_left_right_parse_results: Receiver<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
932    sender_csv_records_recycle: Sender<csv::ByteRecord>,
933    headers: HeadersParsed,
934}
935
936impl DiffByteRecordFirstRow {
937    pub(crate) fn new(
938        csv_left_right_parse_results: Receiver<CsvLeftRightParseResult<CsvByteRecordWithHash>>,
939        sender_csv_records_recycle: Sender<csv::ByteRecord>,
940        headers: HeadersParsed,
941    ) -> Self {
942        Self {
943            csv_left_right_parse_results,
944            sender_csv_records_recycle,
945            headers,
946        }
947    }
948
949    pub(crate) fn into_diff_byte_record_iter(self) -> DiffByteRecordsIterator {
950        let parse_result = &self.csv_left_right_parse_results;
951
952        let (num_cols, first_few) = match self.headers.max_num_cols() {
953            nc @ Some(_) => (nc, Default::default()),
954            None => match (parse_result.recv(), parse_result.recv()) {
955                (Ok(csv_left_right_parse_result_first), Ok(csv_left_right_parse_result_second)) => {
956                    let len_first = csv_left_right_parse_result_first
957                        .byte_record_result()
958                        .map(|csv| csv.len())
959                        .ok();
960                    let len_second = csv_left_right_parse_result_second
961                        .byte_record_result()
962                        .map(|csv| csv.len())
963                        .ok();
964
965                    (
966                        max(len_first, len_second),
967                        vec![
968                            csv_left_right_parse_result_first,
969                            csv_left_right_parse_result_second,
970                        ],
971                    )
972                }
973                (Ok(csv_left_right_parse_result), Err(_))
974                | (Err(_), Ok(csv_left_right_parse_result)) => {
975                    let num_cols = csv_left_right_parse_result
976                        .byte_record_result()
977                        .map(|csv| csv.len())
978                        .ok();
979                    (num_cols, vec![csv_left_right_parse_result])
980                }
981                (Err(_), Err(_)) => {
982                    // nothing to do
983                    Default::default()
984                }
985            },
986        };
987        DiffByteRecordsIterator::new(
988            first_few
989                .into_iter()
990                .chain(self.csv_left_right_parse_results),
991            self.sender_csv_records_recycle,
992            self.headers,
993            num_cols,
994        )
995    }
996}
997
998trait ByteRecordResultFromParseResult {
999    fn byte_record_result(&self) -> Result<&csv::ByteRecord, &csv::Error>;
1000}
1001
1002impl ByteRecordResultFromParseResult for CsvLeftRightParseResult<CsvByteRecordWithHash> {
1003    fn byte_record_result(&self) -> Result<&csv::ByteRecord, &csv::Error> {
1004        match self {
1005            CsvLeftRightParseResult::Left(CsvByteRecordWithHash { byte_record, .. })
1006            | CsvLeftRightParseResult::Right(CsvByteRecordWithHash { byte_record, .. }) => {
1007                byte_record.as_ref()
1008            }
1009        }
1010    }
1011}
1012
1013#[cfg(test)]
1014mod tests {
1015    use crate::{
1016        diff_result::{ColumnIdx, ColumnIdxError},
1017        diff_row::{ByteRecordLineInfo, DiffByteRecord},
1018    };
1019    use pretty_assertions::assert_eq;
1020    use std::error::Error;
1021
1022    use super::DiffByteRecords;
1023
1024    #[test]
1025    fn sort_by_line_delete_then_add_already_sorted() -> Result<(), Box<dyn Error>> {
1026        let mut diff_records = DiffByteRecords::new(
1027            vec![
1028                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1029                    csv::ByteRecord::from(vec!["_", "_", "_"]),
1030                    3,
1031                )),
1032                DiffByteRecord::Add(ByteRecordLineInfo::new(
1033                    csv::ByteRecord::from(vec!["_", "_", "_"]),
1034                    4,
1035                )),
1036            ],
1037            Default::default(),
1038            None,
1039        );
1040
1041        let expected = diff_records.clone();
1042
1043        diff_records.sort_by_line();
1044
1045        assert_eq!(diff_records, expected);
1046
1047        Ok(())
1048    }
1049
1050    #[test]
1051    fn sort_by_line_delete_then_add_not_sorted() -> Result<(), Box<dyn Error>> {
1052        let mut diff_records = DiffByteRecords::new(
1053            vec![
1054                DiffByteRecord::Add(ByteRecordLineInfo::new(
1055                    csv::ByteRecord::from(vec!["_", "_", "_"]),
1056                    4,
1057                )),
1058                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1059                    csv::ByteRecord::from(vec!["_", "_", "_"]),
1060                    3,
1061                )),
1062            ],
1063            Default::default(),
1064            None,
1065        );
1066
1067        let expected = vec![
1068            DiffByteRecord::Delete(ByteRecordLineInfo::new(
1069                csv::ByteRecord::from(vec!["_", "_", "_"]),
1070                3,
1071            )),
1072            DiffByteRecord::Add(ByteRecordLineInfo::new(
1073                csv::ByteRecord::from(vec!["_", "_", "_"]),
1074                4,
1075            )),
1076        ];
1077
1078        diff_records.sort_by_line();
1079
1080        assert_eq!(diff_records.as_slice(), expected);
1081
1082        Ok(())
1083    }
1084
1085    #[test]
1086    fn sort_by_line_modify_not_sorted_simple_one_sided() -> Result<(), Box<dyn Error>> {
1087        let mut diff_records = DiffByteRecords::new(
1088            vec![
1089                DiffByteRecord::Modify {
1090                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1091                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1092                    field_indices: vec![],
1093                },
1094                DiffByteRecord::Modify {
1095                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1096                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1097                    field_indices: vec![],
1098                },
1099            ],
1100            Default::default(),
1101            None,
1102        );
1103
1104        let expected = vec![
1105            DiffByteRecord::Modify {
1106                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1107                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1108                field_indices: vec![],
1109            },
1110            DiffByteRecord::Modify {
1111                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1112                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1113                field_indices: vec![],
1114            },
1115        ];
1116
1117        diff_records.sort_by_line();
1118
1119        assert_eq!(diff_records.as_slice(), expected);
1120
1121        Ok(())
1122    }
1123
1124    #[test]
1125    fn sort_by_line_modify_lines_equal_on_opposite_side_prefer_smaller_delete_side_first(
1126    ) -> Result<(), Box<dyn Error>> {
1127        let mut diff_records = DiffByteRecords::new(
1128            vec![
1129                DiffByteRecord::Modify {
1130                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1131                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1132                    field_indices: vec![],
1133                },
1134                DiffByteRecord::Modify {
1135                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1136                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1137                    field_indices: vec![],
1138                },
1139            ],
1140            Default::default(),
1141            None,
1142        );
1143
1144        let expected = vec![
1145            DiffByteRecord::Modify {
1146                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1147                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1148                field_indices: vec![],
1149            },
1150            DiffByteRecord::Modify {
1151                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1152                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1153                field_indices: vec![],
1154            },
1155        ];
1156
1157        diff_records.sort_by_line();
1158
1159        assert_eq!(diff_records.as_slice(), expected);
1160
1161        Ok(())
1162    }
1163
1164    #[test]
1165    fn sort_by_line_modify_sum_lines_equal_minimum_on_add_side_prefer_smaller_add_side_first(
1166    ) -> Result<(), Box<dyn Error>> {
1167        let mut diff_records = DiffByteRecords::new(
1168            vec![
1169                DiffByteRecord::Modify {
1170                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
1171                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 4),
1172                    field_indices: vec![],
1173                },
1174                DiffByteRecord::Modify {
1175                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1176                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
1177                    field_indices: vec![],
1178                },
1179            ],
1180            Default::default(),
1181            None,
1182        );
1183
1184        let expected = vec![
1185            DiffByteRecord::Modify {
1186                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1187                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
1188                field_indices: vec![],
1189            },
1190            DiffByteRecord::Modify {
1191                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
1192                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 4),
1193                field_indices: vec![],
1194            },
1195        ];
1196
1197        diff_records.sort_by_line();
1198
1199        assert_eq!(diff_records.as_slice(), expected);
1200
1201        Ok(())
1202    }
1203
1204    #[test]
1205    fn sort_by_line_modify_not_sort_by_sum_lines_but_by_smallest() -> Result<(), Box<dyn Error>> {
1206        let mut diff_records = DiffByteRecords::new(
1207            vec![
1208                DiffByteRecord::Modify {
1209                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
1210                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 3),
1211                    field_indices: vec![],
1212                },
1213                DiffByteRecord::Modify {
1214                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1215                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
1216                    field_indices: vec![],
1217                },
1218            ],
1219            Default::default(),
1220            None,
1221        );
1222
1223        let expected = vec![
1224            DiffByteRecord::Modify {
1225                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 5),
1226                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 0),
1227                field_indices: vec![],
1228            },
1229            DiffByteRecord::Modify {
1230                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 1),
1231                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 3),
1232                field_indices: vec![],
1233            },
1234        ];
1235
1236        diff_records.sort_by_line();
1237
1238        assert_eq!(diff_records.as_slice(), expected);
1239
1240        Ok(())
1241    }
1242
1243    #[test]
1244    fn sort_by_line_modify_complex_interleaved() -> Result<(), Box<dyn Error>> {
1245        let mut diff_records = DiffByteRecords::new(
1246            vec![
1247                DiffByteRecord::Modify {
1248                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
1249                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1250                    field_indices: vec![],
1251                },
1252                DiffByteRecord::Modify {
1253                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1254                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
1255                    field_indices: vec![],
1256                },
1257                DiffByteRecord::Modify {
1258                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
1259                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
1260                    field_indices: vec![],
1261                },
1262            ],
1263            Default::default(),
1264            None,
1265        );
1266
1267        let expected = vec![
1268            DiffByteRecord::Modify {
1269                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1270                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
1271                field_indices: vec![],
1272            },
1273            DiffByteRecord::Modify {
1274                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
1275                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 6),
1276                field_indices: vec![],
1277            },
1278            DiffByteRecord::Modify {
1279                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 7),
1280                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["_", "_", "_"]), 8),
1281                field_indices: vec![],
1282            },
1283        ];
1284
1285        diff_records.sort_by_line();
1286
1287        assert_eq!(diff_records.as_slice(), expected);
1288
1289        Ok(())
1290    }
1291
1292    #[test]
1293    fn sort_by_col_selection_of_cols_is_empty_order_does_not_change() -> Result<(), Box<dyn Error>>
1294    {
1295        let mut diff_records = DiffByteRecords::new(
1296            vec![
1297                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1298                    csv::ByteRecord::from(vec!["d", "e", "f"]),
1299                    3,
1300                )),
1301                DiffByteRecord::Add(ByteRecordLineInfo::new(
1302                    csv::ByteRecord::from(vec!["a", "b", "c"]),
1303                    4,
1304                )),
1305            ],
1306            Default::default(),
1307            None,
1308        );
1309
1310        let expected = diff_records.clone();
1311
1312        diff_records.sort_by_columns::<ColumnIdx, _>(vec![])?;
1313
1314        assert_eq!(diff_records, expected);
1315
1316        Ok(())
1317    }
1318
1319    #[test]
1320    fn sort_by_col_all_equal_delete_before_add_order_does_not_change() -> Result<(), Box<dyn Error>>
1321    {
1322        let mut diff_records = DiffByteRecords::new(
1323            vec![
1324                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1325                    csv::ByteRecord::from(vec!["a", "x", "y"]),
1326                    3,
1327                )),
1328                DiffByteRecord::Add(ByteRecordLineInfo::new(
1329                    csv::ByteRecord::from(vec!["a", "b", "c"]),
1330                    4,
1331                )),
1332            ],
1333            Default::default(),
1334            None,
1335        );
1336
1337        let expected = diff_records.clone();
1338
1339        diff_records.sort_by_columns(vec![0])?;
1340
1341        assert_eq!(diff_records, expected);
1342
1343        Ok(())
1344    }
1345
1346    #[test]
1347    fn sort_by_second_col_a_in_add_is_less_than_b_in_modify_delete() -> Result<(), Box<dyn Error>> {
1348        let mut diff_records = DiffByteRecords::new(
1349            vec![
1350                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1351                    csv::ByteRecord::from(vec!["_", "b", "_"]),
1352                    3,
1353                )),
1354                DiffByteRecord::Add(ByteRecordLineInfo::new(
1355                    csv::ByteRecord::from(vec!["_", "a", "_"]),
1356                    4,
1357                )),
1358            ],
1359            Default::default(),
1360            None,
1361        );
1362
1363        diff_records.sort_by_columns(vec![1])?;
1364
1365        let expected = DiffByteRecords::new(
1366            vec![
1367                DiffByteRecord::Add(ByteRecordLineInfo::new(
1368                    csv::ByteRecord::from(vec!["_", "a", "_"]),
1369                    4,
1370                )),
1371                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1372                    csv::ByteRecord::from(vec!["_", "b", "_"]),
1373                    3,
1374                )),
1375            ],
1376            Default::default(),
1377            None,
1378        );
1379
1380        assert_eq!(diff_records, expected);
1381
1382        Ok(())
1383    }
1384
1385    #[test]
1386    fn sort_by_certain_col_idx_twice_is_ok() -> Result<(), Box<dyn Error>> {
1387        let mut diff_records = DiffByteRecords::new(
1388            vec![
1389                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1390                    csv::ByteRecord::from(vec!["az", "_", "_"]),
1391                    3,
1392                )),
1393                DiffByteRecord::Add(ByteRecordLineInfo::new(
1394                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1395                    4,
1396                )),
1397            ],
1398            Default::default(),
1399            None,
1400        );
1401
1402        diff_records.sort_by_columns(vec![0, 0])?;
1403
1404        let expected = DiffByteRecords::new(
1405            vec![
1406                DiffByteRecord::Add(ByteRecordLineInfo::new(
1407                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1408                    4,
1409                )),
1410                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1411                    csv::ByteRecord::from(vec!["az", "_", "_"]),
1412                    3,
1413                )),
1414            ],
1415            Default::default(),
1416            None,
1417        );
1418
1419        assert_eq!(diff_records, expected);
1420
1421        Ok(())
1422    }
1423
1424    #[test]
1425    fn sort_by_first_and_second_col_first_col_val_is_equal_so_second_col_decides_order(
1426    ) -> Result<(), Box<dyn Error>> {
1427        let mut diff_records = DiffByteRecords::new(
1428            vec![
1429                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1430                    csv::ByteRecord::from(vec!["x", "b", "_"]),
1431                    3,
1432                )),
1433                DiffByteRecord::Add(ByteRecordLineInfo::new(
1434                    csv::ByteRecord::from(vec!["x", "a", "_"]),
1435                    4,
1436                )),
1437            ],
1438            Default::default(),
1439            None,
1440        );
1441
1442        diff_records.sort_by_columns(vec![0, 1])?;
1443
1444        let expected = DiffByteRecords::new(
1445            vec![
1446                DiffByteRecord::Add(ByteRecordLineInfo::new(
1447                    csv::ByteRecord::from(vec!["x", "a", "_"]),
1448                    4,
1449                )),
1450                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1451                    csv::ByteRecord::from(vec!["x", "b", "_"]),
1452                    3,
1453                )),
1454            ],
1455            Default::default(),
1456            None,
1457        );
1458
1459        assert_eq!(diff_records, expected);
1460
1461        Ok(())
1462    }
1463
1464    #[test]
1465    fn sort_by_first_second_and_third_col_first_and_second_col_val_is_equal_so_third_col_decides_order(
1466    ) -> Result<(), Box<dyn Error>> {
1467        let mut diff_records = DiffByteRecords::new(
1468            vec![
1469                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1470                    csv::ByteRecord::from(vec!["x", "a", "z"]),
1471                    3,
1472                )),
1473                DiffByteRecord::Add(ByteRecordLineInfo::new(
1474                    csv::ByteRecord::from(vec!["x", "a", "i"]),
1475                    4,
1476                )),
1477            ],
1478            Default::default(),
1479            None,
1480        );
1481
1482        diff_records.sort_by_columns(vec![0, 1, 2])?;
1483
1484        let expected = DiffByteRecords::new(
1485            vec![
1486                DiffByteRecord::Add(ByteRecordLineInfo::new(
1487                    csv::ByteRecord::from(vec!["x", "a", "i"]),
1488                    4,
1489                )),
1490                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1491                    csv::ByteRecord::from(vec!["x", "a", "z"]),
1492                    3,
1493                )),
1494            ],
1495            Default::default(),
1496            None,
1497        );
1498
1499        assert_eq!(diff_records, expected);
1500
1501        Ok(())
1502    }
1503
1504    #[test]
1505    fn sort_by_first_second_and_third_col_back_to_front_third_and_second_col_val_is_equal_so_first_col_decides_order(
1506    ) -> Result<(), Box<dyn Error>> {
1507        let mut diff_records = DiffByteRecords::new(
1508            vec![
1509                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1510                    csv::ByteRecord::from(vec!["2", "a", "z"]),
1511                    3,
1512                )),
1513                DiffByteRecord::Add(ByteRecordLineInfo::new(
1514                    csv::ByteRecord::from(vec!["1", "a", "z"]),
1515                    4,
1516                )),
1517            ],
1518            Default::default(),
1519            None,
1520        );
1521
1522        diff_records.sort_by_columns(vec![2, 1, 0])?;
1523
1524        let expected = DiffByteRecords::new(
1525            vec![
1526                DiffByteRecord::Add(ByteRecordLineInfo::new(
1527                    csv::ByteRecord::from(vec!["1", "a", "z"]),
1528                    4,
1529                )),
1530                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1531                    csv::ByteRecord::from(vec!["2", "a", "z"]),
1532                    3,
1533                )),
1534            ],
1535            Default::default(),
1536            None,
1537        );
1538
1539        assert_eq!(diff_records, expected);
1540
1541        Ok(())
1542    }
1543
1544    #[test]
1545    fn sort_by_col_delete_must_be_smaller_than_add_when_otherwise_identical(
1546    ) -> Result<(), Box<dyn Error>> {
1547        let mut diff_records = DiffByteRecords::new(
1548            vec![
1549                DiffByteRecord::Add(ByteRecordLineInfo::new(
1550                    csv::ByteRecord::from(vec!["same", "_", "_"]),
1551                    4,
1552                )),
1553                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1554                    csv::ByteRecord::from(vec!["same", "_", "_"]),
1555                    5,
1556                )),
1557            ],
1558            Default::default(),
1559            None,
1560        );
1561
1562        diff_records.sort_by_columns(vec![0])?;
1563
1564        let expected = DiffByteRecords::new(
1565            vec![
1566                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1567                    csv::ByteRecord::from(vec!["same", "_", "_"]),
1568                    5,
1569                )),
1570                DiffByteRecord::Add(ByteRecordLineInfo::new(
1571                    csv::ByteRecord::from(vec!["same", "_", "_"]),
1572                    4,
1573                )),
1574            ],
1575            Default::default(),
1576            None,
1577        );
1578
1579        assert_eq!(diff_records, expected);
1580
1581        Ok(())
1582    }
1583
1584    #[test]
1585    fn sort_by_col_with_three_items_first_and_second_by_first_col_second_and_third_by_second_col(
1586    ) -> Result<(), Box<dyn Error>> {
1587        let mut diff_records = DiffByteRecords::new(
1588            vec![
1589                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1590                    csv::ByteRecord::from(vec!["1", "b", "_"]),
1591                    3,
1592                )),
1593                DiffByteRecord::Add(ByteRecordLineInfo::new(
1594                    csv::ByteRecord::from(vec!["1", "a", "_"]),
1595                    4,
1596                )),
1597                DiffByteRecord::Add(ByteRecordLineInfo::new(
1598                    csv::ByteRecord::from(vec!["0", "a", "_"]),
1599                    4,
1600                )),
1601            ],
1602            Default::default(),
1603            None,
1604        );
1605
1606        diff_records.sort_by_columns(vec![0, 1])?;
1607
1608        let expected = DiffByteRecords::new(
1609            vec![
1610                DiffByteRecord::Add(ByteRecordLineInfo::new(
1611                    csv::ByteRecord::from(vec!["0", "a", "_"]),
1612                    4,
1613                )),
1614                DiffByteRecord::Add(ByteRecordLineInfo::new(
1615                    csv::ByteRecord::from(vec!["1", "a", "_"]),
1616                    4,
1617                )),
1618                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1619                    csv::ByteRecord::from(vec!["1", "b", "_"]),
1620                    3,
1621                )),
1622            ],
1623            Default::default(),
1624            None,
1625        );
1626
1627        assert_eq!(diff_records, expected);
1628
1629        Ok(())
1630    }
1631
1632    #[test]
1633    fn sort_by_col_delete_compared_with_modify_delete() -> Result<(), Box<dyn Error>> {
1634        let mut diff_records = DiffByteRecords::new(
1635            vec![
1636                DiffByteRecord::Modify {
1637                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1638                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1639                    field_indices: vec![],
1640                },
1641                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1642                    csv::ByteRecord::from(vec!["b", "_", "_"]),
1643                    4,
1644                )),
1645            ],
1646            Default::default(),
1647            None,
1648        );
1649
1650        diff_records.sort_by_columns(vec![0])?;
1651
1652        let expected = diff_records.clone();
1653
1654        assert_eq!(diff_records, expected);
1655
1656        Ok(())
1657    }
1658
1659    #[test]
1660    fn sort_by_col_delete_compared_with_modify_delete_are_equal_fall_back_to_compare_with_modify_add(
1661    ) -> Result<(), Box<dyn Error>> {
1662        let mut diff_records = DiffByteRecords::new(
1663            vec![
1664                DiffByteRecord::Modify {
1665                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1666                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1667                    field_indices: vec![],
1668                },
1669                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1670                    csv::ByteRecord::from(vec!["c", "_", "_"]),
1671                    4,
1672                )),
1673            ],
1674            Default::default(),
1675            None,
1676        );
1677
1678        diff_records.sort_by_columns(vec![0])?;
1679
1680        let expected = diff_records.clone();
1681
1682        assert_eq!(diff_records, expected);
1683
1684        Ok(())
1685    }
1686
1687    #[test]
1688    fn sort_by_col_delete_must_be_smaller_than_modify_when_otherwise_identical(
1689    ) -> Result<(), Box<dyn Error>> {
1690        let mut diff_records = DiffByteRecords::new(
1691            vec![
1692                DiffByteRecord::Modify {
1693                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1694                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1695                    field_indices: vec![],
1696                },
1697                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1698                    csv::ByteRecord::from(vec!["c", "_", "_"]),
1699                    4,
1700                )),
1701            ],
1702            Default::default(),
1703            None,
1704        );
1705
1706        diff_records.sort_by_columns(vec![0])?;
1707
1708        let expected = DiffByteRecords::new(
1709            vec![
1710                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1711                    csv::ByteRecord::from(vec!["c", "_", "_"]),
1712                    4,
1713                )),
1714                DiffByteRecord::Modify {
1715                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1716                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1717                    field_indices: vec![],
1718                },
1719            ],
1720            Default::default(),
1721            None,
1722        );
1723
1724        assert_eq!(diff_records, expected);
1725
1726        Ok(())
1727    }
1728
1729    #[test]
1730    fn sort_by_col_modify_delete_compared_with_add() -> Result<(), Box<dyn Error>> {
1731        let mut diff_records = DiffByteRecords::new(
1732            vec![
1733                DiffByteRecord::Modify {
1734                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1735                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1736                    field_indices: vec![],
1737                },
1738                DiffByteRecord::Add(ByteRecordLineInfo::new(
1739                    csv::ByteRecord::from(vec!["b", "_", "_"]),
1740                    4,
1741                )),
1742            ],
1743            Default::default(),
1744            None,
1745        );
1746
1747        diff_records.sort_by_columns(vec![0])?;
1748
1749        let expected = diff_records.clone();
1750
1751        assert_eq!(diff_records, expected);
1752
1753        Ok(())
1754    }
1755
1756    #[test]
1757    fn sort_by_col_add_compared_with_modify_delete() -> Result<(), Box<dyn Error>> {
1758        let mut diff_records = DiffByteRecords::new(
1759            vec![
1760                DiffByteRecord::Add(ByteRecordLineInfo::new(
1761                    csv::ByteRecord::from(vec!["b", "_", "_"]),
1762                    4,
1763                )),
1764                DiffByteRecord::Modify {
1765                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1766                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1767                    field_indices: vec![],
1768                },
1769            ],
1770            Default::default(),
1771            None,
1772        );
1773
1774        diff_records.sort_by_columns(vec![0])?;
1775
1776        let expected = DiffByteRecords::new(
1777            vec![
1778                DiffByteRecord::Modify {
1779                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1780                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1781                    field_indices: vec![],
1782                },
1783                DiffByteRecord::Add(ByteRecordLineInfo::new(
1784                    csv::ByteRecord::from(vec!["b", "_", "_"]),
1785                    4,
1786                )),
1787            ],
1788            Default::default(),
1789            None,
1790        );
1791
1792        assert_eq!(diff_records, expected);
1793
1794        Ok(())
1795    }
1796
1797    #[test]
1798    fn sort_by_col_modify_delete_compared_with_add_are_equal_fall_back_to_compare_with_modify_add(
1799    ) -> Result<(), Box<dyn Error>> {
1800        let mut diff_records = DiffByteRecords::new(
1801            vec![
1802                DiffByteRecord::Modify {
1803                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1804                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1805                    field_indices: vec![],
1806                },
1807                DiffByteRecord::Add(ByteRecordLineInfo::new(
1808                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1809                    4,
1810                )),
1811            ],
1812            Default::default(),
1813            None,
1814        );
1815
1816        diff_records.sort_by_columns(vec![0])?;
1817
1818        let expected = DiffByteRecords::new(
1819            vec![
1820                DiffByteRecord::Add(ByteRecordLineInfo::new(
1821                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1822                    4,
1823                )),
1824                DiffByteRecord::Modify {
1825                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 1),
1826                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1827                    field_indices: vec![],
1828                },
1829            ],
1830            Default::default(),
1831            None,
1832        );
1833
1834        assert_eq!(diff_records, expected);
1835
1836        Ok(())
1837    }
1838
1839    #[test]
1840    fn sort_by_col_add_must_be_greater_than_modify_when_otherwise_identical(
1841    ) -> Result<(), Box<dyn Error>> {
1842        let mut diff_records = DiffByteRecords::new(
1843            vec![
1844                DiffByteRecord::Modify {
1845                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1846                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 2),
1847                    field_indices: vec![],
1848                },
1849                DiffByteRecord::Add(ByteRecordLineInfo::new(
1850                    csv::ByteRecord::from(vec!["c", "_", "_"]),
1851                    4,
1852                )),
1853            ],
1854            Default::default(),
1855            None,
1856        );
1857
1858        diff_records.sort_by_columns(vec![0])?;
1859
1860        let expected = diff_records.clone();
1861
1862        assert_eq!(diff_records, expected);
1863
1864        Ok(())
1865    }
1866
1867    #[test]
1868    fn sort_by_col_modify_delete_compared_with_modify_delete() -> Result<(), Box<dyn Error>> {
1869        let mut diff_records = DiffByteRecords::new(
1870            vec![
1871                DiffByteRecord::Modify {
1872                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["d", "_", "_"]), 1),
1873                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1874                    field_indices: vec![],
1875                },
1876                DiffByteRecord::Modify {
1877                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1878                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1879                    field_indices: vec![],
1880                },
1881            ],
1882            Default::default(),
1883            None,
1884        );
1885
1886        diff_records.sort_by_columns(vec![0])?;
1887
1888        let expected = DiffByteRecords::new(
1889            vec![
1890                DiffByteRecord::Modify {
1891                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1892                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1893                    field_indices: vec![],
1894                },
1895                DiffByteRecord::Modify {
1896                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["d", "_", "_"]), 1),
1897                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1898                    field_indices: vec![],
1899                },
1900            ],
1901            Default::default(),
1902            None,
1903        );
1904
1905        assert_eq!(diff_records, expected);
1906
1907        Ok(())
1908    }
1909
1910    #[test]
1911    fn sort_by_col_modify_delete_compared_with_modify_delete_are_equal_fall_back_to_compare_modify_add_with_modify_add(
1912    ) -> Result<(), Box<dyn Error>> {
1913        let mut diff_records = DiffByteRecords::new(
1914            vec![
1915                DiffByteRecord::Modify {
1916                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1917                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1918                    field_indices: vec![],
1919                },
1920                DiffByteRecord::Modify {
1921                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1922                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1923                    field_indices: vec![],
1924                },
1925            ],
1926            Default::default(),
1927            None,
1928        );
1929
1930        diff_records.sort_by_columns(vec![0])?;
1931
1932        let expected = DiffByteRecords::new(
1933            vec![
1934                DiffByteRecord::Modify {
1935                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1936                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1937                    field_indices: vec![],
1938                },
1939                DiffByteRecord::Modify {
1940                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1941                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1942                    field_indices: vec![],
1943                },
1944            ],
1945            Default::default(),
1946            None,
1947        );
1948
1949        assert_eq!(diff_records, expected);
1950
1951        Ok(())
1952    }
1953
1954    #[test]
1955    fn sort_by_col_modify_cmp_with_add_cmp_with_modify_cmp_with_delete(
1956    ) -> Result<(), Box<dyn Error>> {
1957        let mut diff_records = DiffByteRecords::new(
1958            vec![
1959                DiffByteRecord::Modify {
1960                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1961                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
1962                    field_indices: vec![],
1963                },
1964                DiffByteRecord::Add(ByteRecordLineInfo::new(
1965                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1966                    4,
1967                )),
1968                DiffByteRecord::Modify {
1969                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1970                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1971                    field_indices: vec![],
1972                },
1973                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1974                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1975                    4,
1976                )),
1977            ],
1978            Default::default(),
1979            None,
1980        );
1981
1982        diff_records.sort_by_columns(vec![0])?;
1983
1984        let expected = DiffByteRecords::new(
1985            vec![
1986                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1987                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1988                    4,
1989                )),
1990                DiffByteRecord::Add(ByteRecordLineInfo::new(
1991                    csv::ByteRecord::from(vec!["a", "_", "_"]),
1992                    4,
1993                )),
1994                DiffByteRecord::Modify {
1995                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
1996                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "_", "_"]), 2),
1997                    field_indices: vec![],
1998                },
1999                DiffByteRecord::Modify {
2000                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["c", "_", "_"]), 1),
2001                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["b", "_", "_"]), 2),
2002                    field_indices: vec![],
2003                },
2004            ],
2005            Default::default(),
2006            None,
2007        );
2008
2009        assert_eq!(diff_records, expected);
2010
2011        Ok(())
2012    }
2013
2014    #[test]
2015    fn sort_by_col_idx_out_of_bounds_err() -> Result<(), Box<dyn Error>> {
2016        let mut diff_records = DiffByteRecords::new(
2017            vec![
2018                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2019                    csv::ByteRecord::from(vec!["a", "b", "c"]),
2020                    3,
2021                )),
2022                DiffByteRecord::Add(ByteRecordLineInfo::new(
2023                    csv::ByteRecord::from(vec!["a", "x", "y"]),
2024                    4,
2025                )),
2026            ],
2027            Default::default(),
2028            None,
2029        );
2030
2031        let res = diff_records.sort_by_columns(vec![3]);
2032
2033        assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
2034
2035        Ok(())
2036    }
2037
2038    #[test]
2039    fn sort_by_col_first_idx_ok_and_cmp_as_equal_second_idx_out_of_bounds_err_order_stays_the_same(
2040    ) -> Result<(), Box<dyn Error>> {
2041        let mut diff_records = DiffByteRecords::new(
2042            vec![
2043                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2044                    csv::ByteRecord::from(vec!["_", "same", "_"]),
2045                    3,
2046                )),
2047                DiffByteRecord::Add(ByteRecordLineInfo::new(
2048                    csv::ByteRecord::from(vec!["_", "same", "_"]),
2049                    4,
2050                )),
2051            ],
2052            Default::default(),
2053            None,
2054        );
2055
2056        let res = diff_records.sort_by_columns(vec![1, 3]);
2057
2058        assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
2059
2060        let expected = diff_records.clone();
2061
2062        assert_eq!(diff_records, expected);
2063
2064        Ok(())
2065    }
2066
2067    #[test]
2068    fn sort_by_col_first_idx_ok_and_cmp_not_equal_second_idx_out_of_bounds_but_no_err_because_first_idx_already_sorted(
2069    ) -> Result<(), Box<dyn Error>> {
2070        let mut diff_records = DiffByteRecords::new(
2071            vec![
2072                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2073                    csv::ByteRecord::from(vec!["_", "b", "_"]),
2074                    3,
2075                )),
2076                DiffByteRecord::Add(ByteRecordLineInfo::new(
2077                    csv::ByteRecord::from(vec!["_", "a", "_"]),
2078                    4,
2079                )),
2080            ],
2081            Default::default(),
2082            None,
2083        );
2084
2085        let res = diff_records.sort_by_columns(vec![1, 3]);
2086
2087        assert_eq!(res, Ok(()));
2088
2089        let expected = DiffByteRecords::new(
2090            vec![
2091                DiffByteRecord::Add(ByteRecordLineInfo::new(
2092                    csv::ByteRecord::from(vec!["_", "a", "_"]),
2093                    4,
2094                )),
2095                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2096                    csv::ByteRecord::from(vec!["_", "b", "_"]),
2097                    3,
2098                )),
2099            ],
2100            Default::default(),
2101            None,
2102        );
2103
2104        assert_eq!(diff_records, expected);
2105
2106        Ok(())
2107    }
2108
2109    #[test]
2110    fn sort_by_col_first_idx_out_of_bounds_err_second_idx_ok_sort_by_second_idx(
2111    ) -> Result<(), Box<dyn Error>> {
2112        let mut diff_records = DiffByteRecords::new(
2113            vec![
2114                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2115                    csv::ByteRecord::from(vec!["_", "b", "_"]),
2116                    3,
2117                )),
2118                DiffByteRecord::Add(ByteRecordLineInfo::new(
2119                    csv::ByteRecord::from(vec!["_", "a", "_"]),
2120                    4,
2121                )),
2122            ],
2123            Default::default(),
2124            None,
2125        );
2126
2127        let res = diff_records.sort_by_columns(vec![3, 1]);
2128
2129        assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
2130
2131        // it is still sorted by the second column
2132        let expected = DiffByteRecords::new(
2133            vec![
2134                DiffByteRecord::Add(ByteRecordLineInfo::new(
2135                    csv::ByteRecord::from(vec!["_", "a", "_"]),
2136                    4,
2137                )),
2138                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2139                    csv::ByteRecord::from(vec!["_", "b", "_"]),
2140                    3,
2141                )),
2142            ],
2143            Default::default(),
2144            None,
2145        );
2146
2147        assert_eq!(diff_records, expected);
2148
2149        Ok(())
2150    }
2151
2152    #[test]
2153    fn sort_by_col_first_idx_out_of_bounds_err_second_idx_ok_third_idx_out_of_bounds_sort_by_second_idx(
2154    ) -> Result<(), Box<dyn Error>> {
2155        let mut diff_records = DiffByteRecords::new(
2156            vec![
2157                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2158                    csv::ByteRecord::from(vec!["_", "b", "_"]),
2159                    3,
2160                )),
2161                DiffByteRecord::Add(ByteRecordLineInfo::new(
2162                    csv::ByteRecord::from(vec!["_", "a", "_"]),
2163                    4,
2164                )),
2165            ],
2166            Default::default(),
2167            None,
2168        );
2169
2170        let res = diff_records.sort_by_columns(vec![3, 1, 4]);
2171
2172        // we only get the first error that is encountered during the sort
2173        assert_eq!(res, Err(ColumnIdxError::IdxOutOfBounds { idx: 3, len: 3 }));
2174
2175        // but it is still sorted by the second column
2176        let expected = DiffByteRecords::new(
2177            vec![
2178                DiffByteRecord::Add(ByteRecordLineInfo::new(
2179                    csv::ByteRecord::from(vec!["_", "a", "_"]),
2180                    4,
2181                )),
2182                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2183                    csv::ByteRecord::from(vec!["_", "b", "_"]),
2184                    3,
2185                )),
2186            ],
2187            Default::default(),
2188            None,
2189        );
2190
2191        assert_eq!(diff_records, expected);
2192
2193        Ok(())
2194    }
2195}