csv_diff/
csv_diff.rs

1use crate::csv::Csv;
2use crate::csv_hash_comparer::CsvHashComparer;
3use crate::csv_hash_receiver_comparer::CsvHashReceiverStreamComparer;
4#[cfg(not(feature = "rayon-threads"))]
5use crate::csv_hash_task_spawner::CsvHashTaskSpawnerBuilder;
6use crate::csv_hash_task_spawner::CsvHashTaskSpawnerLocalBuilder;
7#[cfg(feature = "rayon-threads")]
8use crate::csv_hash_task_spawner::CsvHashTaskSpawnerLocalRayon;
9use crate::csv_hash_task_spawner::{
10    CsvHashTaskLineSenders, CsvHashTaskSenderWithRecycleReceiver, CsvHashTaskSpawner,
11    CsvHashTaskSpawnerLocal,
12};
13#[cfg(feature = "crossbeam-threads")]
14use crate::csv_hash_task_spawner::{
15    CsvHashTaskSpawnerLocalBuilderCrossbeam, CsvHashTaskSpawnerLocalCrossbeam,
16};
17#[cfg(feature = "rayon-threads")]
18use crate::csv_hash_task_spawner::{CsvHashTaskSpawnerLocalBuilderRayon, CsvHashTaskSpawnerRayon};
19use crate::csv_parse_result::{CsvLeftRightParseResult, RecordHashWithPosition};
20use crate::diff_result::{DiffByteRecords, DiffByteRecordsIterator};
21use crate::thread_scope_strategy::*;
22use crossbeam_channel::{bounded, Receiver};
23use csv::Reader;
24use std::cell::RefCell;
25use std::io::{Read, Seek};
26use std::marker::PhantomData;
27use std::sync::Arc;
28use std::{collections::HashSet, iter::Iterator};
29use thiserror::Error;
30
31/// Compare two [CSVs](https://en.wikipedia.org/wiki/Comma-separated_values) lazily with each other (for the eager-/blocking-based variant, see [`CsvByteDiffLocal`](crate::csv_diff::CsvByteDiffLocal)).
32///
33/// Use this instead of [`CsvByteDiffLocal`](crate::csv_diff::CsvByteDiffLocal), when:
34/// - you own your CSV data and you want to use an [`Iterator`](`crate::diff_result::DiffByteRecordsIterator`) for the differences,
35/// so you don't have to read all differences into memory
36/// - your CSV data structure does __not__ support [`Seek`].
37///
38/// By default, `CsvByteDiff` uses a [rayon thread-pool](https://docs.rs/rayon/1.5.0/rayon/struct.ThreadPool.html) to compare differences.
39/// If you already have an existing rayon thread-pool that you want to use for `CsvByteDiff`, you can construct it with a builder (see also [`rayon_thread_pool`](crate::csv_diff::CsvByteDiffBuilder::rayon_thread_pool) on [`CsvByteDiffBuilder`](CsvByteDiffBuilder)).
40/// for using an existing [rayon thread-pool](https://docs.rs/rayon/1.5.0/rayon/struct.ThreadPool.html)
41/// when creating `CsvByteDiff`.
42///
43/// # Example: create `CsvByteDiff` with default values and compare two CSVs byte-wise lazily
44#[cfg_attr(
45    feature = "rayon-threads",
46    doc = r##"
47```
48use csv_diff::{csv_diff::CsvByteDiff, csv::Csv};
49use csv_diff::diff_row::{ByteRecordLineInfo, DiffByteRecord};
50use std::collections::HashSet;
51use std::iter::FromIterator;
52# fn main() -> Result<(), Box<dyn std::error::Error>> {
53// some csv data with a header, where the first column is a unique id
54let csv_left = "\
55header1,header2,header3\n\
56a,b,c";
57let csv_right = "\
58header1,header2,header3\n\
59a,b,d";
60
61let csv_diff = CsvByteDiff::new()?;
62
63let mut diff_iterator = csv_diff.diff(
64    Csv::with_reader(csv_left.as_bytes()),
65    Csv::with_reader(csv_right.as_bytes()),
66);
67
68let diff_row_actual = diff_iterator
69    .next()
70    .ok_or("Expected a difference between the two CSVs, but got none".to_string())??;
71
72let diff_row_expected = DiffByteRecord::Modify {
73    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
74    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "d"]), 2),
75    field_indices: vec![2],
76};
77
78assert_eq!(diff_row_actual, diff_row_expected);
79
80Ok(())
81# }
82```
83"##
84)]
85#[derive(Debug)]
86pub struct CsvByteDiff<T: CsvHashTaskSpawner> {
87    primary_key_columns: HashSet<usize>,
88    // TODO: try to find a way to remove interior mutability in `diff` method
89    hash_task_spawner: RefCell<Option<T>>,
90}
91
92#[cfg(feature = "rayon-threads")]
93impl CsvByteDiff<CsvHashTaskSpawnerRayon> {
94    pub fn new() -> Result<Self, CsvDiffNewError> {
95        let mut instance = Self {
96            primary_key_columns: HashSet::new(),
97            hash_task_spawner: RefCell::new(Some(CsvHashTaskSpawnerRayon::with_thread_pool_owned(
98                rayon::ThreadPoolBuilder::new().build()?,
99            ))),
100        };
101        instance.primary_key_columns.insert(0);
102        Ok(instance)
103    }
104}
105
106impl<T> CsvByteDiff<T>
107where
108    T: CsvHashTaskSpawner,
109{
110    pub fn diff<R: Read + Send + 'static>(
111        &self,
112        mut csv_left: Csv<R>,
113        mut csv_right: Csv<R>,
114    ) -> DiffByteRecordsIterator {
115        use crossbeam_channel::unbounded;
116
117        let (sender_right, receiver) = bounded(10_000);
118        let sender_left = sender_right.clone();
119
120        let (sender_csv_recycle, receiver_csv_recycle) = unbounded();
121
122        let hts = self.hash_task_spawner.take().take();
123
124        let (csv_reader_left, csv_reader_right) =
125            (csv_left.csv_reader_mut(), csv_right.csv_reader_mut());
126        let headers = (
127            csv_reader_left
128                .has_headers()
129                .then(|| csv_reader_left.byte_headers().cloned()),
130            csv_reader_right
131                .has_headers()
132                .then(|| csv_reader_right.byte_headers().cloned()),
133        )
134            .into();
135
136        let (hash_task_spawner, receiver_diff_byte_record_iter) =
137            // TODO: remove unwrap!!!
138            hts.unwrap().spawn_hashing_tasks_and_send_result(
139                CsvHashTaskSenderWithRecycleReceiver::new(
140                    sender_left,
141                    csv_left,
142                    receiver_csv_recycle.clone()
143                ),
144                CsvHashTaskSenderWithRecycleReceiver::new(
145                    sender_right,
146                    csv_right,
147                    receiver_csv_recycle
148                ),
149                CsvHashReceiverStreamComparer::new(receiver, sender_csv_recycle).headers(headers),
150                self.primary_key_columns.clone(),
151            );
152
153        let mut hash_task_spawner_mut = self.hash_task_spawner.borrow_mut();
154        *hash_task_spawner_mut = Some(hash_task_spawner);
155
156        receiver_diff_byte_record_iter.recv().unwrap()
157    }
158}
159
160/// Create a [`CsvByteDiff`](CsvByteDiff) with configuration options.
161/// # Example: create a `CsvByteDiff`, where column 1 and column 3 are treated as a compound primary key.
162#[cfg_attr(
163    feature = "rayon-threads",
164    doc = r##"
165```
166use csv_diff::{csv_diff::{CsvByteDiff, CsvByteDiffBuilder}, csv::Csv};
167use csv_diff::diff_row::{ByteRecordLineInfo, DiffByteRecord};
168use csv_diff::diff_result::DiffByteRecords;
169use std::convert::TryInto;
170# fn main() -> Result<(), Box<dyn std::error::Error>> {
171// some csv data with a header, where the first column and third column represent a compound key
172let csv_data_left = "\
173    id,name,commit_sha\n\
174    1,lemon,efae52\n\
175    2,strawberry,a33411"; // this csv line is seen as "Deleted" and not "Modified"
176                          // because "id" and "commit_sha" are different and both columns
177                          // _together_ represent the primary key
178let csv_data_right = "\
179    id,name,commit_sha\n\
180    1,lemon,efae52\n\
181    2,strawberry,ddef23"; // this csv line is seen as "Added" and not "Modified",
182                          // because "id" and "commit_sha" are different and both columns
183                          // _together_ represent the primary key
184
185let csv_byte_diff = CsvByteDiffBuilder::new()
186    .primary_key_columns(vec![0usize, 2])
187    .build()?;
188
189let mut diff_byte_records: DiffByteRecords = csv_byte_diff
190    .diff(
191        Csv::with_reader(csv_data_left.as_bytes()),
192        Csv::with_reader(csv_data_right.as_bytes()),
193    )
194    .try_to_diff_byte_records()?;
195
196diff_byte_records.sort_by_line();
197
198let diff_byte_rows = diff_byte_records.as_slice();
199
200assert_eq!(
201    diff_byte_rows,
202    &[
203        DiffByteRecord::Delete(ByteRecordLineInfo::new(
204            csv::ByteRecord::from(vec!["2", "strawberry", "a33411"]),
205            3
206        ),),
207        DiffByteRecord::Add(ByteRecordLineInfo::new(
208            csv::ByteRecord::from(vec!["2", "strawberry", "ddef23"]),
209            3
210        ),)
211    ]
212);
213Ok(())
214# }
215```
216"##
217)]
218#[derive(Debug)]
219#[cfg_attr(feature = "rayon-threads", derive(Default))]
220pub struct CsvByteDiffBuilder<T: CsvHashTaskSpawner> {
221    primary_key_columns: HashSet<usize>,
222    #[cfg(feature = "rayon-threads")]
223    hash_task_spawner: Option<CsvHashTaskSpawnerRayon>,
224    #[cfg(feature = "rayon-threads")]
225    _phantom: PhantomData<T>,
226    #[cfg(not(feature = "rayon-threads"))]
227    hash_task_spawner: T,
228}
229
230impl<T> CsvByteDiffBuilder<T>
231where
232    T: CsvHashTaskSpawner,
233{
234    #[cfg(not(feature = "rayon-threads"))]
235    pub fn new<B>(csv_hash_task_spawner_builder: B) -> Self
236    where
237        B: CsvHashTaskSpawnerBuilder<T>,
238    {
239        Self {
240            primary_key_columns: std::iter::once(0).collect(),
241            hash_task_spawner: csv_hash_task_spawner_builder.build(),
242        }
243    }
244
245    pub fn primary_key_columns(mut self, columns: impl IntoIterator<Item = usize>) -> Self {
246        self.primary_key_columns = columns.into_iter().collect();
247        self
248    }
249
250    #[cfg(not(feature = "rayon-threads"))]
251    pub fn build(self) -> Result<CsvByteDiff<T>, CsvByteDiffBuilderError> {
252        if !self.primary_key_columns.is_empty() {
253            Ok(CsvByteDiff {
254                primary_key_columns: self.primary_key_columns,
255                hash_task_spawner: RefCell::new(Some(self.hash_task_spawner)),
256            })
257        } else {
258            Err(CsvByteDiffBuilderError::NoPrimaryKeyColumns)
259        }
260    }
261}
262
263#[cfg(feature = "rayon-threads")]
264impl CsvByteDiffBuilder<CsvHashTaskSpawnerRayon> {
265    pub fn new() -> Self {
266        Self {
267            primary_key_columns: std::iter::once(0).collect(),
268            hash_task_spawner: None,
269            _phantom: PhantomData::default(),
270        }
271    }
272
273    pub fn rayon_thread_pool(mut self, thread_pool: Arc<rayon::ThreadPool>) -> Self {
274        self.hash_task_spawner = Some(CsvHashTaskSpawnerRayon::with_thread_pool_arc(thread_pool));
275        self
276    }
277
278    #[cfg(feature = "rayon-threads")]
279    pub fn build(self) -> Result<CsvByteDiff<CsvHashTaskSpawnerRayon>, CsvByteDiffBuilderError> {
280        if !self.primary_key_columns.is_empty() {
281            Ok(CsvByteDiff {
282                primary_key_columns: self.primary_key_columns,
283                hash_task_spawner: match self.hash_task_spawner {
284                    Some(x) => RefCell::new(Some(x)),
285                    None => RefCell::new(Some(CsvHashTaskSpawnerRayon::with_thread_pool_owned(
286                        rayon::ThreadPoolBuilder::new().build()?,
287                    ))),
288                },
289            })
290        } else {
291            Err(CsvByteDiffBuilderError::NoPrimaryKeyColumns)
292        }
293    }
294}
295
296/// Compare two [CSVs](https://en.wikipedia.org/wiki/Comma-separated_values) eagerly with each other (for the lazy/iterator-based variant, see [`CsvByteDiff`](crate::csv_diff::CsvByteDiff)).
297///
298/// Use this instead of [`CsvByteDiff`](crate::csv_diff::CsvByteDiff), when your CSV data is a local reference and you don't own it.
299///
300/// This requires your CSV data to be [`Seek`]able. If it isn't `Seek`able out of the box, it might still auto implement
301/// the trait [`CsvReadSeek`](crate::csv::CsvReadSeek) (see [`Csv::with_reader_seek`](crate::csv::Csv::with_reader_seek)).
302/// If your CSV data can't be made `Seek`able, consider using [`CsvByteDiff`](crate::csv_diff::CsvByteDiff) instead.
303///
304/// `CsvByteDiffLocal` uses scoped threads internally for comparison.
305/// By default, it uses [rayon's scoped threads within a rayon thread pool](https://docs.rs/rayon/1.5.0/rayon/struct.ThreadPool.html#method.scope).
306/// See also [`rayon_thread_pool`](CsvByteDiffLocalBuilder::rayon_thread_pool) on [`CsvByteDiffLocalBuilder`](CsvByteDiffLocalBuilder)
307/// for using an existing [rayon thread-pool](https://docs.rs/rayon/1.5.0/rayon/struct.ThreadPool.html)
308/// when creating `CsvByteDiffLocal`.
309///
310/// # Example: create `CsvByteDiffLocal` with default values and compare two CSVs byte-wise eagerly
311#[cfg_attr(
312    feature = "rayon-threads",
313    doc = r##"
314```
315use csv_diff::{csv_diff::CsvByteDiffLocal, csv::Csv};
316use csv_diff::diff_row::{ByteRecordLineInfo, DiffByteRecord};
317use std::collections::HashSet;
318use std::iter::FromIterator;
319# fn main() -> Result<(), Box<dyn std::error::Error>> {
320// some csv data with a header, where the first column is a unique id
321let csv_data_left = "id,name,kind\n\
322                     1,lemon,fruit\n\
323                     2,strawberry,fruit";
324let csv_data_right = "id,name,kind\n\
325                      1,lemon,fruit\n\
326                      2,strawberry,nut";
327
328let csv_byte_diff = CsvByteDiffLocal::new()?;
329
330let mut diff_byte_records = csv_byte_diff.diff(
331    Csv::with_reader_seek(csv_data_left.as_bytes()),
332    Csv::with_reader_seek(csv_data_right.as_bytes()),
333)?;
334
335let diff_byte_rows = diff_byte_records.as_slice();
336
337assert_eq!(
338    diff_byte_rows,
339    &[DiffByteRecord::Modify {
340        delete: ByteRecordLineInfo::new(
341            csv::ByteRecord::from(vec!["2", "strawberry", "fruit"]),
342            3
343        ),
344        add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["2", "strawberry", "nut"]), 3),
345        field_indices: vec![2]
346    }]
347);
348Ok(())
349# }
350```
351"##
352)]
353#[derive(Debug)]
354pub struct CsvByteDiffLocal<T: CsvHashTaskSpawnerLocal> {
355    primary_key_columns: HashSet<usize>,
356    hash_task_spawner: T,
357}
358
359/// Create a [`CsvByteDiffLocal`](CsvByteDiffLocal) with configuration options.
360/// # Example: create a `CsvByteDiffLocal`, where column 1 and column 3 are treated as a compound primary key.
361#[cfg_attr(
362    feature = "rayon-threads",
363    doc = r##"
364```
365use csv_diff::{csv_diff::{CsvByteDiffLocal, CsvByteDiffLocalBuilder}, csv::Csv};
366use csv_diff::diff_row::{ByteRecordLineInfo, DiffByteRecord};
367use std::collections::HashSet;
368use std::iter::FromIterator;
369# fn main() -> Result<(), Box<dyn std::error::Error>> {
370// some csv data with a header, where the first column and third column represent a compound key
371let csv_data_left = "\
372                    id,name,commit_sha\n\
373                    1,lemon,efae52\n\
374                    2,strawberry,a33411"; // this csv line is seen as "Deleted" and not "Modified"
375                                          // because "id" and "commit_sha" are different and both columns
376                                          // _together_ represent the primary key
377let csv_data_right = "\
378                    id,name,commit_sha\n\
379                    1,lemon,efae52\n\
380                    2,strawberry,ddef23"; // this csv line is seen as "Added" and not "Modified",
381                                          // because "id" and "commit_sha" are different and both columns
382                                          // _together_ represent the primary key
383
384let csv_byte_diff = CsvByteDiffLocalBuilder::new()
385    .primary_key_columns(vec![0usize, 2])
386    .build()?;
387
388let mut diff_byte_records = csv_byte_diff.diff(
389    Csv::with_reader_seek(csv_data_left.as_bytes()),
390    Csv::with_reader_seek(csv_data_right.as_bytes()),
391)?;
392
393diff_byte_records.sort_by_line();
394
395let diff_byte_rows = diff_byte_records.as_slice();
396
397assert_eq!(
398    diff_byte_rows,
399    &[
400        DiffByteRecord::Delete(ByteRecordLineInfo::new(
401            csv::ByteRecord::from(vec!["2", "strawberry", "a33411"]),
402            3
403        ),),
404        DiffByteRecord::Add(ByteRecordLineInfo::new(
405            csv::ByteRecord::from(vec!["2", "strawberry", "ddef23"]),
406            3
407        ),)
408    ]
409);
410Ok(())
411# }
412```
413"##
414)]
415#[derive(Debug)]
416#[cfg_attr(feature = "rayon-threads", derive(Default))]
417pub struct CsvByteDiffLocalBuilder<'tp, T: CsvHashTaskSpawnerLocal> {
418    primary_key_columns: HashSet<usize>,
419    #[cfg(feature = "rayon-threads")]
420    hash_task_spawner: Option<CsvHashTaskSpawnerLocalRayon<'tp>>,
421    #[cfg(feature = "rayon-threads")]
422    _phantom: PhantomData<T>,
423    #[cfg(not(feature = "rayon-threads"))]
424    _phantom: PhantomData<&'tp T>,
425    #[cfg(not(feature = "rayon-threads"))]
426    hash_task_spawner: T,
427}
428
429impl<'tp, T> CsvByteDiffLocalBuilder<'tp, T>
430where
431    T: CsvHashTaskSpawnerLocal,
432{
433    #[cfg(not(feature = "rayon-threads"))]
434    pub fn new<B>(csv_hash_task_spawner_builder: B) -> Self
435    where
436        B: CsvHashTaskSpawnerLocalBuilder<T>,
437    {
438        Self {
439            primary_key_columns: std::iter::once(0).collect(),
440            hash_task_spawner: csv_hash_task_spawner_builder.build(),
441            _phantom: PhantomData::default(),
442        }
443    }
444
445    pub fn primary_key_columns(mut self, columns: impl IntoIterator<Item = usize>) -> Self {
446        self.primary_key_columns = columns.into_iter().collect();
447        self
448    }
449
450    #[cfg(not(feature = "rayon-threads"))]
451    pub fn build(self) -> Result<CsvByteDiffLocal<T>, CsvByteDiffBuilderError> {
452        if !self.primary_key_columns.is_empty() {
453            Ok(CsvByteDiffLocal {
454                primary_key_columns: self.primary_key_columns,
455                hash_task_spawner: self.hash_task_spawner,
456            })
457        } else {
458            Err(CsvByteDiffBuilderError::NoPrimaryKeyColumns)
459        }
460    }
461}
462
463#[cfg(feature = "rayon-threads")]
464impl<'tp> CsvByteDiffLocalBuilder<'tp, CsvHashTaskSpawnerLocalRayon<'tp>> {
465    pub fn new() -> Self {
466        Self {
467            primary_key_columns: std::iter::once(0).collect(),
468            hash_task_spawner: None,
469            _phantom: PhantomData::default(),
470        }
471    }
472
473    pub fn rayon_thread_pool(mut self, thread_pool: &'tp rayon::ThreadPool) -> Self {
474        self.hash_task_spawner =
475            Some(CsvHashTaskSpawnerLocalBuilderRayon::new(thread_pool).build());
476        self
477    }
478
479    #[cfg(feature = "rayon-threads")]
480    pub fn build(
481        self,
482    ) -> Result<CsvByteDiffLocal<CsvHashTaskSpawnerLocalRayon<'tp>>, CsvByteDiffBuilderError> {
483        if !self.primary_key_columns.is_empty() {
484            Ok(CsvByteDiffLocal {
485                primary_key_columns: self.primary_key_columns,
486                hash_task_spawner: match self.hash_task_spawner {
487                    Some(x) => x,
488                    None => CsvHashTaskSpawnerLocalRayon::new(RayonScope::with_thread_pool_owned(
489                        rayon::ThreadPoolBuilder::new().build()?,
490                    )),
491                },
492            })
493        } else {
494            Err(CsvByteDiffBuilderError::NoPrimaryKeyColumns)
495        }
496    }
497}
498
499#[derive(Debug, Error)]
500pub enum CsvByteDiffBuilderError {
501    #[error("No primary key columns have been specified. You need to provide at least one column index.")]
502    NoPrimaryKeyColumns,
503    #[cfg(feature = "rayon-threads")]
504    #[error("An error occured when trying to build the rayon thread pool.")]
505    ThreadPoolBuildError(#[from] rayon::ThreadPoolBuildError),
506}
507
508#[derive(Debug, Error)]
509#[cfg(feature = "rayon-threads")]
510pub enum CsvDiffNewError {
511    #[error("An error occured when trying to build the rayon thread pool.")]
512    ThreadPoolBuildError(#[from] rayon::ThreadPoolBuildError),
513}
514
515#[cfg(feature = "rayon-threads")]
516impl CsvByteDiffLocal<CsvHashTaskSpawnerLocalRayon<'_>> {
517    /// Constructs a new `CsvByteDiffLocal<CsvHashTaskSpawnerRayon<'_>>` with a default configuration.
518    /// The values in the first column of each CSV will be declared as the primary key, in order
519    /// to match the CSV records against each other.
520    /// During the construction, a new [rayon thread-pool](https://docs.rs/rayon/1.5.0/rayon/struct.ThreadPool.html)
521    /// is created, which will be used later during the [comparison of CSVs](CsvByteDiffLocal::diff).
522    ///
523    /// If you need to have more control over the configuration of `CsvByteDiffLocal<CsvHashTaskSpawnerRayon<'_>>`,
524    /// consider using a [`CsvByteDiffLocalBuilder`](CsvByteDiffLocalBuilder) instead.
525    pub fn new() -> Result<Self, CsvDiffNewError> {
526        let mut instance = Self {
527            primary_key_columns: HashSet::new(),
528            hash_task_spawner: CsvHashTaskSpawnerLocalRayon::new(
529                RayonScope::with_thread_pool_owned(rayon::ThreadPoolBuilder::new().build()?),
530            ),
531        };
532        instance.primary_key_columns.insert(0);
533        Ok(instance)
534    }
535}
536
537#[cfg(feature = "crossbeam-threads")]
538impl CsvByteDiffLocal<CsvHashTaskSpawnerLocalCrossbeam> {
539    pub fn new() -> Self {
540        let mut instance = Self {
541            primary_key_columns: HashSet::new(),
542            hash_task_spawner: CsvHashTaskSpawnerLocalCrossbeam::new(CrossbeamScope::new()),
543        };
544        instance.primary_key_columns.insert(0);
545        instance
546    }
547}
548
549impl<T> CsvByteDiffLocal<T>
550where
551    T: CsvHashTaskSpawnerLocal,
552{
553    /// Compares `csv_left` with `csv_right` and returns a [`csv::Result`] with the [CSV byte records](crate::diff_result::DiffByteRecords) that are different.
554    ///
555    /// [`Csv<R>`](Csv<R>) is a wrapper around a CSV reader with some configuration options.
556    ///
557    /// # Example
558    #[cfg_attr(
559        feature = "rayon-threads",
560        doc = r##"
561    use csv_diff::{csv_diff::CsvByteDiffLocal, csv::Csv};
562    use csv_diff::diff_row::{ByteRecordLineInfo, DiffByteRecord};
563    use std::collections::HashSet;
564    use std::iter::FromIterator;
565    # fn main() -> Result<(), Box<dyn std::error::Error>> {
566    // some csv data with a header, where the first column is a unique id
567    let csv_data_left = "id,name,kind\n\
568                         1,lemon,fruit\n\
569                         2,strawberry,fruit";
570    let csv_data_right = "id,name,kind\n\
571                          1,lemon,fruit\n\
572                          2,strawberry,nut";
573
574    let csv_byte_diff = CsvByteDiffLocal::new()?;
575
576    let mut diff_byte_records = csv_byte_diff.diff(
577        Csv::with_reader_seek(csv_data_left.as_bytes()),
578        Csv::with_reader_seek(csv_data_right.as_bytes()),
579    )?;
580
581    diff_byte_records.sort_by_line();
582
583    let diff_byte_rows = diff_byte_records.as_slice();
584
585    assert_eq!(
586        diff_byte_rows,
587        &[DiffByteRecord::Modify {
588            delete: ByteRecordLineInfo::new(
589                csv::ByteRecord::from(vec!["2", "strawberry", "fruit"]),
590                3
591            ),
592            add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["2", "strawberry", "nut"]), 3),
593            field_indices: vec![2]
594        }]
595    );
596    Ok(())
597    # }
598    "##
599    )]
600    pub fn diff<R: Read + Seek + Send>(
601        &self,
602        csv_left: Csv<R>,
603        csv_right: Csv<R>,
604    ) -> csv::Result<DiffByteRecords> {
605        use crossbeam_channel::unbounded;
606
607        let (sender_total_lines_right, receiver_total_lines_right) = bounded(1);
608        let (sender_total_lines_left, receiver_total_lines_left) = bounded(1);
609        let (sender_csv_reader_right, receiver_csv_reader_right) = bounded(1);
610        let (sender_csv_reader_left, receiver_csv_reader_left) = bounded(1);
611        let (sender_right, receiver) = unbounded();
612        let sender_left = sender_right.clone();
613
614        self.hash_task_spawner.spawn_hashing_tasks_and_send_result(
615            CsvHashTaskLineSenders::new(
616                sender_left,
617                sender_total_lines_left,
618                sender_csv_reader_left,
619                csv_left,
620            ),
621            CsvHashTaskLineSenders::new(
622                sender_right,
623                sender_total_lines_right,
624                sender_csv_reader_right,
625                csv_right,
626            ),
627            &self.primary_key_columns,
628        );
629
630        self.recv_hashes_and_compare(
631            receiver_total_lines_left,
632            receiver_total_lines_right,
633            receiver_csv_reader_left,
634            receiver_csv_reader_right,
635            receiver,
636        )
637    }
638
639    fn recv_hashes_and_compare<R>(
640        &self,
641        receiver_total_lines_left: Receiver<u64>,
642        receiver_total_lines_right: Receiver<u64>,
643        receiver_csv_reader_left: Receiver<csv::Result<Reader<R>>>,
644        receiver_csv_reader_right: Receiver<csv::Result<Reader<R>>>,
645        receiver: Receiver<CsvLeftRightParseResult<RecordHashWithPosition>>,
646    ) -> csv::Result<DiffByteRecords>
647    where
648        R: Read + Seek + Send,
649    {
650        let (total_lines_right, total_lines_left) = (
651            receiver_total_lines_right.recv().unwrap_or_default(),
652            receiver_total_lines_left.recv().unwrap_or_default(),
653        );
654        let (csv_reader_right_for_diff_seek, csv_reader_left_for_diff_seek) = (
655            receiver_csv_reader_right.recv().unwrap()?,
656            receiver_csv_reader_left.recv().unwrap()?,
657        );
658        let max_capacity_for_hash_map_right =
659            if total_lines_right / 100 < total_lines_right && total_lines_right / 100 == 0 {
660                total_lines_right
661            } else {
662                total_lines_right / 100
663            } as usize;
664        let max_capacity_for_hash_map_left =
665            if total_lines_left / 100 < total_lines_left && total_lines_left / 100 == 0 {
666                total_lines_left
667            } else {
668                total_lines_left / 100
669            } as usize;
670
671        let mut csv_hash_comparer = CsvHashComparer::with_capacity_and_reader(
672            max_capacity_for_hash_map_left,
673            max_capacity_for_hash_map_right,
674            csv_reader_left_for_diff_seek,
675            csv_reader_right_for_diff_seek,
676        );
677        csv_hash_comparer.compare_csv_left_right_parse_result(receiver)
678    }
679}
680
681#[cfg(test)]
682mod tests {
683
684    use super::*;
685    use crate::csv::CsvReaderBuilderExt;
686    use crate::diff_result::DiffByteRecords;
687    use crate::diff_row::{ByteRecordLineInfo, DiffByteRecord};
688    use pretty_assertions::assert_eq;
689    use std::error::Error;
690
691    fn csv_diff_local_with_sorting<T: CsvHashTaskSpawnerLocal, R: Read + Seek + Send>(
692        csv_left: Csv<R>,
693        csv_right: Csv<R>,
694        mut expected: DiffByteRecords,
695        csv_diff: CsvByteDiffLocal<T>,
696    ) -> Result<(), Box<dyn Error>> {
697        let mut diff_res_actual = csv_diff.diff(csv_left, csv_right)?;
698
699        diff_res_actual.sort_by_line();
700        expected.sort_by_line();
701
702        assert_eq!(diff_res_actual, expected, "csv_diff_local failed");
703
704        Ok(())
705    }
706
707    fn csv_diff_with_sorting<T: CsvHashTaskSpawner, R: Read + Send + 'static>(
708        csv_left: Csv<R>,
709        csv_right: Csv<R>,
710        mut expected: DiffByteRecords,
711        csv_diff: CsvByteDiff<T>,
712    ) -> Result<(), Box<dyn Error>> {
713        let diff_iter = csv_diff.diff(csv_left, csv_right);
714
715        let mut actual = diff_iter.try_to_diff_byte_records()?;
716        actual.sort_by_line();
717        expected.sort_by_line();
718
719        assert_eq!(actual, expected, "csv_diff failed");
720        Ok(())
721    }
722
723    #[cfg(feature = "rayon-threads")]
724    #[test]
725    fn diff_empty_headers_flag_true_no_diff() -> Result<(), Box<dyn Error>> {
726        let csv_left = "";
727        let csv_right = "";
728        let expected = DiffByteRecords::new(
729            vec![],
730            (Some(csv::ByteRecord::new()), Some(csv::ByteRecord::new())).into(),
731            Some(0),
732        );
733
734        csv_diff_local_with_sorting(
735            Csv::with_reader_seek(csv_left.as_bytes()),
736            Csv::with_reader_seek(csv_right.as_bytes()),
737            expected.clone(),
738            CsvByteDiffLocal::new()?,
739        )?;
740
741        csv_diff_with_sorting(
742            Csv::with_reader(csv_left.as_bytes()),
743            Csv::with_reader(csv_right.as_bytes()),
744            expected,
745            CsvByteDiff::new()?,
746        )
747    }
748
749    #[cfg(feature = "rayon-threads")]
750    #[test]
751    fn diff_empty_headers_flag_false_no_diff() -> Result<(), Box<dyn Error>> {
752        let csv_left = "";
753        let csv_right = "";
754
755        let diff_res_actual = CsvByteDiffLocal::new()?
756            .diff(
757                csv::ReaderBuilder::new()
758                    .has_headers(false)
759                    .from_reader_seek(csv_left.as_bytes())
760                    .into(),
761                csv::ReaderBuilder::new()
762                    .has_headers(false)
763                    .from_reader_seek(csv_right.as_bytes())
764                    .into(),
765            )
766            .unwrap();
767        let diff_res_expected = DiffByteRecords::new(vec![], (None, None).into(), None);
768
769        assert_eq!(diff_res_actual, diff_res_expected);
770        Ok(())
771    }
772
773    #[cfg(feature = "rayon-threads")]
774    #[test]
775    fn diff_empty_with_header_no_diff() -> Result<(), Box<dyn Error>> {
776        let csv_left = "header1,header2,header3";
777        let csv_right = "header1,header2,header3";
778
779        let expected = DiffByteRecords::new(
780            vec![],
781            (
782                Some(Vec::from_iter(csv_left.split(",")).into()),
783                Some(Vec::from_iter(csv_left.split(",")).into()),
784            )
785                .into(),
786            Some(3),
787        );
788
789        csv_diff_local_with_sorting(
790            Csv::with_reader_seek(csv_left.as_bytes()),
791            Csv::with_reader_seek(csv_right.as_bytes()),
792            expected.clone(),
793            CsvByteDiffLocal::new()?,
794        )?;
795
796        csv_diff_with_sorting(
797            Csv::with_reader(csv_left.as_bytes()),
798            Csv::with_reader(csv_right.as_bytes()),
799            expected,
800            CsvByteDiff::new()?,
801        )
802    }
803
804    #[cfg(feature = "rayon-threads")]
805    #[test]
806    fn diff_one_line_with_header_no_diff() -> Result<(), Box<dyn Error>> {
807        let csv_left = "\
808                        header1,header2,header3\n\
809                        a,b,c";
810        let csv_right = "\
811                        header1,header2,header3\n\
812                        a,b,c";
813
814        let expected = DiffByteRecords::new(
815            vec![],
816            (
817                Some(vec!["header1", "header2", "header3"].into()),
818                Some(vec!["header1", "header2", "header3"].into()),
819            )
820                .into(),
821            Some(3),
822        );
823
824        csv_diff_local_with_sorting(
825            Csv::with_reader_seek(csv_left.as_bytes()),
826            Csv::with_reader_seek(csv_right.as_bytes()),
827            expected.clone(),
828            CsvByteDiffLocal::new()?,
829        )?;
830
831        csv_diff_with_sorting(
832            Csv::with_reader(csv_left.as_bytes()),
833            Csv::with_reader(csv_right.as_bytes()),
834            expected,
835            CsvByteDiff::new()?,
836        )
837    }
838
839    #[cfg(feature = "rayon-threads")]
840    #[test]
841    fn diff_one_line_no_header_no_diff() -> Result<(), Box<dyn Error>> {
842        let csv_left = "\
843                        a,b,c";
844        let csv_right = "\
845                        a,b,c";
846
847        let diff_res_actual = CsvByteDiffLocal::new()?
848            .diff(
849                csv::ReaderBuilder::new()
850                    .has_headers(false)
851                    .from_reader_seek(csv_left.as_bytes())
852                    .into(),
853                csv::ReaderBuilder::new()
854                    .has_headers(false)
855                    .from_reader_seek(csv_right.as_bytes())
856                    .into(),
857            )
858            .unwrap();
859        let diff_res_expected = DiffByteRecords::new(vec![], (None, None).into(), Some(3));
860
861        assert_eq!(diff_res_actual, diff_res_expected);
862        Ok(())
863    }
864
865    #[cfg(feature = "rayon-threads")]
866    #[test]
867    fn diff_both_empty_but_one_has_header_and_the_other_has_none_both_with_correct_header_flag_no_diff(
868    ) -> Result<(), Box<dyn Error>> {
869        let csv_left = "\
870                        header1,header2,header3";
871        let csv_right = "";
872
873        let diff_res_actual = CsvByteDiffLocal::new()?
874            .diff(
875                csv::ReaderBuilder::new()
876                    .has_headers(true)
877                    .from_reader_seek(csv_left.as_bytes())
878                    .into(),
879                csv::ReaderBuilder::new()
880                    .has_headers(false)
881                    .from_reader_seek(csv_right.as_bytes())
882                    .into(),
883            )
884            .unwrap();
885        let diff_res_expected = DiffByteRecords::new(
886            vec![],
887            (Some(vec!["header1", "header2", "header3"].into()), None).into(),
888            Some(3),
889        );
890
891        assert_eq!(diff_res_actual, diff_res_expected);
892        Ok(())
893    }
894
895    #[cfg(feature = "rayon-threads")]
896    #[test]
897    fn diff_both_empty_but_one_has_header_and_the_other_has_none_both_with_header_flag_true_no_diff(
898    ) -> Result<(), Box<dyn Error>> {
899        let csv_left = "\
900                        header1,header2,header3";
901        let csv_right = "";
902
903        let expected = DiffByteRecords::new(
904            vec![],
905            (
906                Some(vec!["header1", "header2", "header3"].into()),
907                Some(csv::ByteRecord::new()),
908            )
909                .into(),
910            Some(3),
911        );
912
913        csv_diff_local_with_sorting(
914            Csv::with_reader_seek(csv_left.as_bytes()),
915            Csv::with_reader_seek(csv_right.as_bytes()),
916            expected.clone(),
917            CsvByteDiffLocal::new()?,
918        )?;
919
920        csv_diff_with_sorting(
921            Csv::with_reader(csv_left.as_bytes()),
922            Csv::with_reader(csv_right.as_bytes()),
923            expected,
924            CsvByteDiff::new()?,
925        )
926    }
927
928    #[cfg(feature = "rayon-threads")]
929    #[test]
930    fn diff_one_line_with_header_crazy_characters_no_diff() -> Result<(), Box<dyn Error>> {
931        let csv_left = "\
932                        header1,header2,header3\n\
933                        ༼,౪,༽";
934        let csv_right = "\
935                        header1,header2,header3\n\
936                        ༼,౪,༽";
937
938        let expected = DiffByteRecords::new(
939            vec![],
940            (
941                Some(vec!["header1", "header2", "header3"].into()),
942                Some(vec!["header1", "header2", "header3"].into()),
943            )
944                .into(),
945            Some(3),
946        );
947
948        csv_diff_local_with_sorting(
949            Csv::with_reader_seek(csv_left.as_bytes()),
950            Csv::with_reader_seek(csv_right.as_bytes()),
951            expected.clone(),
952            CsvByteDiffLocal::new()?,
953        )?;
954
955        csv_diff_with_sorting(
956            Csv::with_reader(csv_left.as_bytes()),
957            Csv::with_reader(csv_right.as_bytes()),
958            expected,
959            CsvByteDiff::new()?,
960        )
961    }
962
963    #[cfg(feature = "rayon-threads")]
964    #[test]
965    fn diff_one_line_one_has_headers_one_does_not_no_diff() -> Result<(), Box<dyn Error>> {
966        let csv_left = "\
967                        header1,header2,header3\n\
968                        a,b,c";
969        let csv_right = "\
970                        a,b,c";
971
972        let diff_res_actual = CsvByteDiffLocal::new()?
973            .diff(
974                csv::ReaderBuilder::new()
975                    .has_headers(true)
976                    .from_reader_seek(csv_left.as_bytes())
977                    .into(),
978                csv::ReaderBuilder::new()
979                    .has_headers(false)
980                    .from_reader_seek(csv_right.as_bytes())
981                    .into(),
982            )
983            .unwrap();
984        let diff_res_expected = DiffByteRecords::new(
985            vec![],
986            (Some(vec!["header1", "header2", "header3"].into()), None).into(),
987            Some(3),
988        );
989
990        assert_eq!(diff_res_actual, diff_res_expected);
991        Ok(())
992    }
993
994    #[cfg(feature = "rayon-threads")]
995    #[test]
996    fn diff_one_line_with_header_crazy_characters_modified() -> Result<(), Box<dyn Error>> {
997        let csv_left = "\
998                        header1,header2,header3\n\
999                        ༼,౪,༽";
1000        let csv_right = "\
1001                        header1,header2,header3\n\
1002                        ༼,౪,༼";
1003
1004        let expected = DiffByteRecords::new(
1005            vec![DiffByteRecord::Modify {
1006                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["༼", "౪", "༽"]), 2),
1007                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["༼", "౪", "༼"]), 2),
1008                field_indices: vec![2],
1009            }],
1010            (
1011                Some(vec!["header1", "header2", "header3"].into()),
1012                Some(vec!["header1", "header2", "header3"].into()),
1013            )
1014                .into(),
1015            Some(3),
1016        );
1017
1018        csv_diff_local_with_sorting(
1019            Csv::with_reader_seek(csv_left.as_bytes()),
1020            Csv::with_reader_seek(csv_right.as_bytes()),
1021            expected.clone(),
1022            CsvByteDiffLocal::new()?,
1023        )?;
1024
1025        csv_diff_with_sorting(
1026            Csv::with_reader(csv_left.as_bytes()),
1027            Csv::with_reader(csv_right.as_bytes()),
1028            expected,
1029            CsvByteDiff::new()?,
1030        )
1031    }
1032
1033    #[cfg(feature = "rayon-threads")]
1034    #[test]
1035    fn diff_one_line_with_header_added_one_line() -> Result<(), Box<dyn Error>> {
1036        let csv_left = "\
1037                        header1,header2,header3\n\
1038                        ";
1039        let csv_right = "\
1040                        header1,header2,header3\n\
1041                        a,b,c";
1042
1043        let expected = DiffByteRecords::new(
1044            vec![DiffByteRecord::Add(ByteRecordLineInfo::new(
1045                csv::ByteRecord::from(vec!["a", "b", "c"]),
1046                2,
1047            ))],
1048            (
1049                Some(vec!["header1", "header2", "header3"].into()),
1050                Some(vec!["header1", "header2", "header3"].into()),
1051            )
1052                .into(),
1053            Some(3),
1054        );
1055
1056        csv_diff_local_with_sorting(
1057            Csv::with_reader_seek(csv_left.as_bytes()),
1058            Csv::with_reader_seek(csv_right.as_bytes()),
1059            expected.clone(),
1060            CsvByteDiffLocal::new()?,
1061        )?;
1062
1063        csv_diff_with_sorting(
1064            Csv::with_reader(csv_left.as_bytes()),
1065            Csv::with_reader(csv_right.as_bytes()),
1066            expected,
1067            CsvByteDiff::new()?,
1068        )
1069    }
1070
1071    #[cfg(feature = "rayon-threads")]
1072    #[test]
1073    fn diff_one_line_one_with_header_and_one_not_added_one_line() -> Result<(), Box<dyn Error>> {
1074        let csv_left = "\
1075                        header1,header2,header3\n\
1076                        ";
1077        let csv_right = "\
1078                        a,b,c";
1079
1080        let diff_res_actual = CsvByteDiffLocal::new()?
1081            .diff(
1082                csv::ReaderBuilder::new()
1083                    .has_headers(true)
1084                    .from_reader_seek(csv_left.as_bytes())
1085                    .into(),
1086                csv::ReaderBuilder::new()
1087                    .has_headers(false)
1088                    .from_reader_seek(csv_right.as_bytes())
1089                    .into(),
1090            )
1091            .unwrap();
1092        let diff_res_expected = DiffByteRecords::new(
1093            vec![DiffByteRecord::Add(ByteRecordLineInfo::new(
1094                csv::ByteRecord::from(vec!["a", "b", "c"]),
1095                1,
1096            ))],
1097            (Some(vec!["header1", "header2", "header3"].into()), None).into(),
1098            Some(3),
1099        );
1100
1101        assert_eq!(diff_res_actual, diff_res_expected);
1102        Ok(())
1103    }
1104
1105    #[cfg(feature = "rayon-threads")]
1106    #[test]
1107    fn diff_one_line_with_header_deleted_one_line() -> Result<(), Box<dyn Error>> {
1108        let csv_left = "\
1109                        header1,header2,header3\n\
1110                        a,b,c";
1111        let csv_right = "\
1112                        header1,header2,header3\n\
1113                        ";
1114
1115        let expected = DiffByteRecords::new(
1116            vec![DiffByteRecord::Delete(ByteRecordLineInfo::new(
1117                csv::ByteRecord::from(vec!["a", "b", "c"]),
1118                2,
1119            ))],
1120            (
1121                Some(vec!["header1", "header2", "header3"].into()),
1122                Some(vec!["header1", "header2", "header3"].into()),
1123            )
1124                .into(),
1125            Some(3),
1126        );
1127
1128        csv_diff_local_with_sorting(
1129            Csv::with_reader_seek(csv_left.as_bytes()),
1130            Csv::with_reader_seek(csv_right.as_bytes()),
1131            expected.clone(),
1132            CsvByteDiffLocal::new()?,
1133        )?;
1134
1135        csv_diff_with_sorting(
1136            Csv::with_reader(csv_left.as_bytes()),
1137            Csv::with_reader(csv_right.as_bytes()),
1138            expected,
1139            CsvByteDiff::new()?,
1140        )
1141    }
1142
1143    #[cfg(feature = "rayon-threads")]
1144    #[test]
1145    fn diff_one_line_one_with_header_and_one_not_deleted_one_line() -> Result<(), Box<dyn Error>> {
1146        let csv_left = "\
1147                        a,b,c";
1148        let csv_right = "\
1149                        header1,header2,header3\n\
1150                        ";
1151
1152        let diff_res_actual = CsvByteDiffLocal::new()?
1153            .diff(
1154                csv::ReaderBuilder::new()
1155                    .has_headers(false)
1156                    .from_reader_seek(csv_left.as_bytes())
1157                    .into(),
1158                csv::ReaderBuilder::new()
1159                    .has_headers(true)
1160                    .from_reader_seek(csv_right.as_bytes())
1161                    .into(),
1162            )
1163            .unwrap();
1164        let diff_res_expected = DiffByteRecords::new(
1165            vec![DiffByteRecord::Delete(ByteRecordLineInfo::new(
1166                csv::ByteRecord::from(vec!["a", "b", "c"]),
1167                1,
1168            ))],
1169            (None, Some(vec!["header1", "header2", "header3"].into())).into(),
1170            Some(3),
1171        );
1172
1173        assert_eq!(diff_res_actual, diff_res_expected);
1174        Ok(())
1175    }
1176
1177    #[cfg(feature = "rayon-threads")]
1178    #[test]
1179    fn diff_one_line_with_header_modified_one_field() -> Result<(), Box<dyn Error>> {
1180        let csv_left = "\
1181                        header1,header2,header3\n\
1182                        a,b,c";
1183        let csv_right = "\
1184                        header1,header2,header3\n\
1185                        a,b,d";
1186
1187        let expected = DiffByteRecords::new(
1188            vec![DiffByteRecord::Modify {
1189                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
1190                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "d"]), 2),
1191                field_indices: vec![2],
1192            }],
1193            (
1194                Some(vec!["header1", "header2", "header3"].into()),
1195                Some(vec!["header1", "header2", "header3"].into()),
1196            )
1197                .into(),
1198            Some(3),
1199        );
1200
1201        csv_diff_local_with_sorting(
1202            Csv::with_reader_seek(csv_left.as_bytes()),
1203            Csv::with_reader_seek(csv_right.as_bytes()),
1204            expected.clone(),
1205            CsvByteDiffLocal::new()?,
1206        )?;
1207
1208        csv_diff_with_sorting(
1209            Csv::with_reader(csv_left.as_bytes()),
1210            Csv::with_reader(csv_right.as_bytes()),
1211            expected,
1212            CsvByteDiff::new()?,
1213        )
1214    }
1215
1216    #[cfg(feature = "rayon-threads")]
1217    #[test]
1218    fn diff_one_line_with_header_modified_all_fields() -> Result<(), Box<dyn Error>> {
1219        let csv_left = "\
1220                        header1,header2,header3\n\
1221                        a,b,c";
1222        let csv_right = "\
1223                        header1,header2,header3\n\
1224                        a,c,d";
1225
1226        let expected = DiffByteRecords::new(
1227            vec![DiffByteRecord::Modify {
1228                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
1229                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "c", "d"]), 2),
1230                field_indices: vec![1, 2],
1231            }],
1232            (
1233                Some(vec!["header1", "header2", "header3"].into()),
1234                Some(vec!["header1", "header2", "header3"].into()),
1235            )
1236                .into(),
1237            Some(3),
1238        );
1239
1240        csv_diff_local_with_sorting(
1241            Csv::with_reader_seek(csv_left.as_bytes()),
1242            Csv::with_reader_seek(csv_right.as_bytes()),
1243            expected.clone(),
1244            CsvByteDiffLocal::new()?,
1245        )?;
1246
1247        csv_diff_with_sorting(
1248            Csv::with_reader(csv_left.as_bytes()),
1249            Csv::with_reader(csv_right.as_bytes()),
1250            expected,
1251            CsvByteDiff::new()?,
1252        )
1253    }
1254
1255    #[cfg(feature = "rayon-threads")]
1256    #[test]
1257    fn diff_one_line_with_header_modified_all_fields_long() -> Result<(), Box<dyn Error>> {
1258        let csv_left = "\
1259                        header1,header2,header3,header4,header5,header6,header7,header8\n\
1260                        a,b,c,d,e,f,g,h";
1261        let csv_right = "\
1262                        header1,header2,header3,header4,header5,header6,header7,header8\n\
1263                        a,c,d,e,f,g,h,i";
1264
1265        let expected = DiffByteRecords::new(
1266            vec![DiffByteRecord::Modify {
1267                delete: ByteRecordLineInfo::new(
1268                    csv::ByteRecord::from(vec!["a", "b", "c", "d", "e", "f", "g", "h"]),
1269                    2,
1270                ),
1271                add: ByteRecordLineInfo::new(
1272                    csv::ByteRecord::from(vec!["a", "c", "d", "e", "f", "g", "h", "i"]),
1273                    2,
1274                ),
1275                field_indices: vec![1, 2, 3, 4, 5, 6, 7],
1276            }],
1277            (
1278                Some(
1279                    vec![
1280                        "header1", "header2", "header3", "header4", "header5", "header6",
1281                        "header7", "header8",
1282                    ]
1283                    .into(),
1284                ),
1285                Some(
1286                    vec![
1287                        "header1", "header2", "header3", "header4", "header5", "header6",
1288                        "header7", "header8",
1289                    ]
1290                    .into(),
1291                ),
1292            )
1293                .into(),
1294            Some(8),
1295        );
1296
1297        csv_diff_local_with_sorting(
1298            Csv::with_reader_seek(csv_left.as_bytes()),
1299            Csv::with_reader_seek(csv_right.as_bytes()),
1300            expected.clone(),
1301            CsvByteDiffLocal::new()?,
1302        )?;
1303
1304        csv_diff_with_sorting(
1305            Csv::with_reader(csv_left.as_bytes()),
1306            Csv::with_reader(csv_right.as_bytes()),
1307            expected,
1308            CsvByteDiff::new()?,
1309        )
1310    }
1311
1312    #[cfg(feature = "rayon-threads")]
1313    #[test]
1314    fn diff_multiple_lines_with_header_no_diff() -> Result<(), Box<dyn Error>> {
1315        let csv_left = "\
1316                        header1,header2,header3\n\
1317                        a,b,c\n\
1318                        d,e,f";
1319        let csv_right = "\
1320                        header1,header2,header3\n\
1321                        a,b,c\n\
1322                        d,e,f";
1323
1324        let expected = DiffByteRecords::new(
1325            vec![],
1326            (
1327                Some(vec!["header1", "header2", "header3"].into()),
1328                Some(vec!["header1", "header2", "header3"].into()),
1329            )
1330                .into(),
1331            Some(3),
1332        );
1333
1334        csv_diff_local_with_sorting(
1335            Csv::with_reader_seek(csv_left.as_bytes()),
1336            Csv::with_reader_seek(csv_right.as_bytes()),
1337            expected,
1338            CsvByteDiffLocal::new()?,
1339        )
1340    }
1341
1342    #[cfg(feature = "rayon-threads")]
1343    #[test]
1344    fn diff_multiple_lines_with_header_different_order_no_diff() -> Result<(), Box<dyn Error>> {
1345        let csv_left = "\
1346                        header1,header2,header3\n\
1347                        a,b,c\n\
1348                        d,e,f";
1349        let csv_right = "\
1350                        header1,header2,header3\n\
1351                        d,e,f\n\
1352                        a,b,c";
1353
1354        let expected = DiffByteRecords::new(
1355            vec![],
1356            (
1357                Some(vec!["header1", "header2", "header3"].into()),
1358                Some(vec!["header1", "header2", "header3"].into()),
1359            )
1360                .into(),
1361            Some(3),
1362        );
1363
1364        csv_diff_local_with_sorting(
1365            Csv::with_reader_seek(csv_left.as_bytes()),
1366            Csv::with_reader_seek(csv_right.as_bytes()),
1367            expected.clone(),
1368            CsvByteDiffLocal::new()?,
1369        )?;
1370
1371        csv_diff_with_sorting(
1372            Csv::with_reader(csv_left.as_bytes()),
1373            Csv::with_reader(csv_right.as_bytes()),
1374            expected,
1375            CsvByteDiff::new()?,
1376        )
1377    }
1378
1379    #[cfg(feature = "rayon-threads")]
1380    #[test]
1381    fn diff_multiple_lines_with_header_added_one_line_at_start() -> Result<(), Box<dyn Error>> {
1382        let csv_left = "\
1383                        header1,header2,header3\n\
1384                        a,b,c\n\
1385                        d,e,f";
1386        let csv_right = "\
1387                        header1,header2,header3\n\
1388                        x,y,z\n\
1389                        a,b,c\n\
1390                        d,e,f";
1391
1392        let expected = DiffByteRecords::new(
1393            vec![DiffByteRecord::Add(ByteRecordLineInfo::new(
1394                csv::ByteRecord::from(vec!["x", "y", "z"]),
1395                2,
1396            ))],
1397            (
1398                Some(vec!["header1", "header2", "header3"].into()),
1399                Some(vec!["header1", "header2", "header3"].into()),
1400            )
1401                .into(),
1402            Some(3),
1403        );
1404
1405        csv_diff_local_with_sorting(
1406            Csv::with_reader_seek(csv_left.as_bytes()),
1407            Csv::with_reader_seek(csv_right.as_bytes()),
1408            expected.clone(),
1409            CsvByteDiffLocal::new()?,
1410        )?;
1411
1412        csv_diff_with_sorting(
1413            Csv::with_reader(csv_left.as_bytes()),
1414            Csv::with_reader(csv_right.as_bytes()),
1415            expected,
1416            CsvByteDiff::new()?,
1417        )
1418    }
1419
1420    #[cfg(feature = "rayon-threads")]
1421    #[test]
1422    fn diff_multiple_lines_with_header_added_one_line_at_middle() -> Result<(), Box<dyn Error>> {
1423        let csv_left = "\
1424                        header1,header2,header3\n\
1425                        a,b,c\n\
1426                        d,e,f";
1427        let csv_right = "\
1428                        header1,header2,header3\n\
1429                        a,b,c\n\
1430                        x,y,z\n\
1431                        d,e,f";
1432
1433        let expected = DiffByteRecords::new(
1434            vec![DiffByteRecord::Add(ByteRecordLineInfo::new(
1435                csv::ByteRecord::from(vec!["x", "y", "z"]),
1436                3,
1437            ))],
1438            (
1439                Some(vec!["header1", "header2", "header3"].into()),
1440                Some(vec!["header1", "header2", "header3"].into()),
1441            )
1442                .into(),
1443            Some(3),
1444        );
1445
1446        csv_diff_local_with_sorting(
1447            Csv::with_reader_seek(csv_left.as_bytes()),
1448            Csv::with_reader_seek(csv_right.as_bytes()),
1449            expected.clone(),
1450            CsvByteDiffLocal::new()?,
1451        )?;
1452
1453        csv_diff_with_sorting(
1454            Csv::with_reader(csv_left.as_bytes()),
1455            Csv::with_reader(csv_right.as_bytes()),
1456            expected,
1457            CsvByteDiff::new()?,
1458        )
1459    }
1460
1461    #[cfg(feature = "rayon-threads")]
1462    #[test]
1463    fn diff_multiple_lines_with_header_added_one_line_at_end() -> Result<(), Box<dyn Error>> {
1464        let csv_left = "\
1465                        header1,header2,header3\n\
1466                        a,b,c\n\
1467                        d,e,f";
1468        let csv_right = "\
1469                        header1,header2,header3\n\
1470                        a,b,c\n\
1471                        d,e,f\n\
1472                        x,y,z";
1473
1474        let expected = DiffByteRecords::new(
1475            vec![DiffByteRecord::Add(ByteRecordLineInfo::new(
1476                csv::ByteRecord::from(vec!["x", "y", "z"]),
1477                4,
1478            ))],
1479            (
1480                Some(vec!["header1", "header2", "header3"].into()),
1481                Some(vec!["header1", "header2", "header3"].into()),
1482            )
1483                .into(),
1484            Some(3),
1485        );
1486
1487        csv_diff_local_with_sorting(
1488            Csv::with_reader_seek(csv_left.as_bytes()),
1489            Csv::with_reader_seek(csv_right.as_bytes()),
1490            expected.clone(),
1491            CsvByteDiffLocal::new()?,
1492        )?;
1493
1494        csv_diff_with_sorting(
1495            Csv::with_reader(csv_left.as_bytes()),
1496            Csv::with_reader(csv_right.as_bytes()),
1497            expected,
1498            CsvByteDiff::new()?,
1499        )
1500    }
1501
1502    #[cfg(feature = "rayon-threads")]
1503    #[test]
1504    fn diff_multiple_lines_with_header_deleted_one_line_at_start() -> Result<(), Box<dyn Error>> {
1505        let csv_left = "\
1506                        header1,header2,header3\n\
1507                        x,y,z\n\
1508                        a,b,c\n\
1509                        d,e,f";
1510        let csv_right = "\
1511                        header1,header2,header3\n\
1512                        a,b,c\n\
1513                        d,e,f";
1514
1515        let expected = DiffByteRecords::new(
1516            vec![DiffByteRecord::Delete(ByteRecordLineInfo::new(
1517                csv::ByteRecord::from(vec!["x", "y", "z"]),
1518                2,
1519            ))],
1520            (
1521                Some(vec!["header1", "header2", "header3"].into()),
1522                Some(vec!["header1", "header2", "header3"].into()),
1523            )
1524                .into(),
1525            Some(3),
1526        );
1527
1528        csv_diff_local_with_sorting(
1529            Csv::with_reader_seek(csv_left.as_bytes()),
1530            Csv::with_reader_seek(csv_right.as_bytes()),
1531            expected.clone(),
1532            CsvByteDiffLocal::new()?,
1533        )?;
1534
1535        csv_diff_with_sorting(
1536            Csv::with_reader(csv_left.as_bytes()),
1537            Csv::with_reader(csv_right.as_bytes()),
1538            expected,
1539            CsvByteDiff::new()?,
1540        )
1541    }
1542
1543    #[cfg(feature = "rayon-threads")]
1544    #[test]
1545    fn diff_multiple_lines_with_header_deleted_one_line_at_middle() -> Result<(), Box<dyn Error>> {
1546        let csv_left = "\
1547                        header1,header2,header3\n\
1548                        a,b,c\n\
1549                        x,y,z\n\
1550                        d,e,f";
1551        let csv_right = "\
1552                        header1,header2,header3\n\
1553                        a,b,c\n\
1554                        d,e,f";
1555
1556        let expected = DiffByteRecords::new(
1557            vec![DiffByteRecord::Delete(ByteRecordLineInfo::new(
1558                csv::ByteRecord::from(vec!["x", "y", "z"]),
1559                3,
1560            ))],
1561            (
1562                Some(vec!["header1", "header2", "header3"].into()),
1563                Some(vec!["header1", "header2", "header3"].into()),
1564            )
1565                .into(),
1566            Some(3),
1567        );
1568
1569        csv_diff_local_with_sorting(
1570            Csv::with_reader_seek(csv_left.as_bytes()),
1571            Csv::with_reader_seek(csv_right.as_bytes()),
1572            expected.clone(),
1573            CsvByteDiffLocal::new()?,
1574        )?;
1575
1576        csv_diff_with_sorting(
1577            Csv::with_reader(csv_left.as_bytes()),
1578            Csv::with_reader(csv_right.as_bytes()),
1579            expected,
1580            CsvByteDiff::new()?,
1581        )
1582    }
1583
1584    #[cfg(feature = "rayon-threads")]
1585    #[test]
1586    fn diff_multiple_lines_with_header_deleted_one_line_at_end() -> Result<(), Box<dyn Error>> {
1587        let csv_left = "\
1588                        header1,header2,header3\n\
1589                        a,b,c\n\
1590                        d,e,f\n\
1591                        x,y,z";
1592        let csv_right = "\
1593                        header1,header2,header3\n\
1594                        a,b,c\n\
1595                        d,e,f";
1596
1597        let expected = DiffByteRecords::new(
1598            vec![DiffByteRecord::Delete(ByteRecordLineInfo::new(
1599                csv::ByteRecord::from(vec!["x", "y", "z"]),
1600                4,
1601            ))],
1602            (
1603                Some(vec!["header1", "header2", "header3"].into()),
1604                Some(vec!["header1", "header2", "header3"].into()),
1605            )
1606                .into(),
1607            Some(3),
1608        );
1609
1610        csv_diff_local_with_sorting(
1611            Csv::with_reader_seek(csv_left.as_bytes()),
1612            Csv::with_reader_seek(csv_right.as_bytes()),
1613            expected.clone(),
1614            CsvByteDiffLocal::new()?,
1615        )?;
1616
1617        csv_diff_with_sorting(
1618            Csv::with_reader(csv_left.as_bytes()),
1619            Csv::with_reader(csv_right.as_bytes()),
1620            expected,
1621            CsvByteDiff::new()?,
1622        )
1623    }
1624
1625    #[cfg(feature = "rayon-threads")]
1626    #[test]
1627    fn diff_multiple_lines_with_header_modified_one_line_at_start() -> Result<(), Box<dyn Error>> {
1628        let csv_left = "\
1629                        header1,header2,header3\n\
1630                        a,b,c\n\
1631                        d,e,f\n\
1632                        x,y,z";
1633        let csv_right = "\
1634                        header1,header2,header3\n\
1635                        a,x,c\n\
1636                        d,e,f\n\
1637                        x,y,z";
1638
1639        let expected = DiffByteRecords::new(
1640            vec![DiffByteRecord::Modify {
1641                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
1642                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "x", "c"]), 2),
1643                field_indices: vec![1],
1644            }],
1645            (
1646                Some(vec!["header1", "header2", "header3"].into()),
1647                Some(vec!["header1", "header2", "header3"].into()),
1648            )
1649                .into(),
1650            Some(3),
1651        );
1652
1653        csv_diff_local_with_sorting(
1654            Csv::with_reader_seek(csv_left.as_bytes()),
1655            Csv::with_reader_seek(csv_right.as_bytes()),
1656            expected.clone(),
1657            CsvByteDiffLocal::new()?,
1658        )?;
1659
1660        csv_diff_with_sorting(
1661            Csv::with_reader(csv_left.as_bytes()),
1662            Csv::with_reader(csv_right.as_bytes()),
1663            expected,
1664            CsvByteDiff::new()?,
1665        )
1666    }
1667
1668    #[cfg(feature = "rayon-threads")]
1669    #[test]
1670    fn diff_multiple_lines_with_header_modified_one_line_at_start_different_order(
1671    ) -> Result<(), Box<dyn Error>> {
1672        let csv_left = "\
1673                        header1,header2,header3\n\
1674                        a,b,c\n\
1675                        d,e,f\n\
1676                        x,y,z";
1677        let csv_right = "\
1678                        header1,header2,header3\n\
1679                        d,e,f\n\
1680                        a,x,c\n\
1681                        x,y,z";
1682
1683        let expected = DiffByteRecords::new(
1684            vec![DiffByteRecord::Modify {
1685                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
1686                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "x", "c"]), 3),
1687                field_indices: vec![1],
1688            }],
1689            (
1690                Some(vec!["header1", "header2", "header3"].into()),
1691                Some(vec!["header1", "header2", "header3"].into()),
1692            )
1693                .into(),
1694            Some(3),
1695        );
1696
1697        csv_diff_local_with_sorting(
1698            Csv::with_reader_seek(csv_left.as_bytes()),
1699            Csv::with_reader_seek(csv_right.as_bytes()),
1700            expected.clone(),
1701            CsvByteDiffLocal::new()?,
1702        )?;
1703
1704        csv_diff_with_sorting(
1705            Csv::with_reader(csv_left.as_bytes()),
1706            Csv::with_reader(csv_right.as_bytes()),
1707            expected,
1708            CsvByteDiff::new()?,
1709        )
1710    }
1711
1712    #[cfg(feature = "rayon-threads")]
1713    #[test]
1714    fn diff_multiple_lines_with_header_modified_one_line_at_middle() -> Result<(), Box<dyn Error>> {
1715        let csv_left = "\
1716                        header1,header2,header3\n\
1717                        a,b,c\n\
1718                        d,e,f\n\
1719                        x,y,z";
1720        let csv_right = "\
1721                        header1,header2,header3\n\
1722                        a,b,c\n\
1723                        d,x,f\n\
1724                        x,y,z";
1725
1726        let expected = DiffByteRecords::new(
1727            vec![DiffByteRecord::Modify {
1728                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["d", "e", "f"]), 3),
1729                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["d", "x", "f"]), 3),
1730                field_indices: vec![1],
1731            }],
1732            (
1733                Some(vec!["header1", "header2", "header3"].into()),
1734                Some(vec!["header1", "header2", "header3"].into()),
1735            )
1736                .into(),
1737            Some(3),
1738        );
1739
1740        csv_diff_local_with_sorting(
1741            Csv::with_reader_seek(csv_left.as_bytes()),
1742            Csv::with_reader_seek(csv_right.as_bytes()),
1743            expected.clone(),
1744            CsvByteDiffLocal::new()?,
1745        )?;
1746
1747        csv_diff_with_sorting(
1748            Csv::with_reader(csv_left.as_bytes()),
1749            Csv::with_reader(csv_right.as_bytes()),
1750            expected,
1751            CsvByteDiff::new()?,
1752        )
1753    }
1754
1755    #[cfg(feature = "rayon-threads")]
1756    #[test]
1757    fn diff_multiple_lines_with_header_modified_one_line_at_middle_different_order(
1758    ) -> Result<(), Box<dyn Error>> {
1759        let csv_left = "\
1760                        header1,header2,header3\n\
1761                        a,b,c\n\
1762                        d,e,f\n\
1763                        x,y,z";
1764        let csv_right = "\
1765                        header1,header2,header3\n\
1766                        d,x,f\n\
1767                        a,b,c\n\
1768                        x,y,z";
1769
1770        let expected = DiffByteRecords::new(
1771            vec![DiffByteRecord::Modify {
1772                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["d", "e", "f"]), 3),
1773                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["d", "x", "f"]), 2),
1774                field_indices: vec![1],
1775            }],
1776            (
1777                Some(vec!["header1", "header2", "header3"].into()),
1778                Some(vec!["header1", "header2", "header3"].into()),
1779            )
1780                .into(),
1781            Some(3),
1782        );
1783
1784        csv_diff_local_with_sorting(
1785            Csv::with_reader_seek(csv_left.as_bytes()),
1786            Csv::with_reader_seek(csv_right.as_bytes()),
1787            expected.clone(),
1788            CsvByteDiffLocal::new()?,
1789        )?;
1790
1791        csv_diff_with_sorting(
1792            Csv::with_reader(csv_left.as_bytes()),
1793            Csv::with_reader(csv_right.as_bytes()),
1794            expected,
1795            CsvByteDiff::new()?,
1796        )
1797    }
1798
1799    #[cfg(feature = "rayon-threads")]
1800    #[test]
1801    fn diff_multiple_lines_with_header_modified_one_line_at_end() -> Result<(), Box<dyn Error>> {
1802        let csv_left = "\
1803                        header1,header2,header3\n\
1804                        a,b,c\n\
1805                        d,e,f\n\
1806                        x,y,z";
1807        let csv_right = "\
1808                        header1,header2,header3\n\
1809                        a,b,c\n\
1810                        d,e,f\n\
1811                        x,x,z";
1812
1813        let expected = DiffByteRecords::new(
1814            vec![DiffByteRecord::Modify {
1815                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["x", "y", "z"]), 4),
1816                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["x", "x", "z"]), 4),
1817                field_indices: vec![1],
1818            }],
1819            (
1820                Some(vec!["header1", "header2", "header3"].into()),
1821                Some(vec!["header1", "header2", "header3"].into()),
1822            )
1823                .into(),
1824            Some(3),
1825        );
1826
1827        csv_diff_local_with_sorting(
1828            Csv::with_reader_seek(csv_left.as_bytes()),
1829            Csv::with_reader_seek(csv_right.as_bytes()),
1830            expected.clone(),
1831            CsvByteDiffLocal::new()?,
1832        )?;
1833
1834        csv_diff_with_sorting(
1835            Csv::with_reader(csv_left.as_bytes()),
1836            Csv::with_reader(csv_right.as_bytes()),
1837            expected,
1838            CsvByteDiff::new()?,
1839        )
1840    }
1841
1842    #[cfg(feature = "rayon-threads")]
1843    #[test]
1844    fn diff_multiple_lines_with_header_modified_one_line_at_end_different_order(
1845    ) -> Result<(), Box<dyn Error>> {
1846        let csv_left = "\
1847                        header1,header2,header3\n\
1848                        a,b,c\n\
1849                        d,e,f\n\
1850                        x,y,z";
1851        let csv_right = "\
1852                        header1,header2,header3\n\
1853                        x,x,z\n\
1854                        a,b,c\n\
1855                        d,e,f";
1856
1857        let expected = DiffByteRecords::new(
1858            vec![DiffByteRecord::Modify {
1859                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["x", "y", "z"]), 4),
1860                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["x", "x", "z"]), 2),
1861                field_indices: vec![1],
1862            }],
1863            (
1864                Some(vec!["header1", "header2", "header3"].into()),
1865                Some(vec!["header1", "header2", "header3"].into()),
1866            )
1867                .into(),
1868            Some(3),
1869        );
1870
1871        csv_diff_local_with_sorting(
1872            Csv::with_reader_seek(csv_left.as_bytes()),
1873            Csv::with_reader_seek(csv_right.as_bytes()),
1874            expected.clone(),
1875            CsvByteDiffLocal::new()?,
1876        )?;
1877
1878        csv_diff_with_sorting(
1879            Csv::with_reader(csv_left.as_bytes()),
1880            Csv::with_reader(csv_right.as_bytes()),
1881            expected,
1882            CsvByteDiff::new()?,
1883        )
1884    }
1885
1886    #[cfg(feature = "rayon-threads")]
1887    #[test]
1888    fn diff_multiple_lines_with_header_added_and_deleted_same_lines() -> Result<(), Box<dyn Error>>
1889    {
1890        let csv_left = "\
1891                        header1,header2,header3\n\
1892                        a,b,c\n\
1893                        d,e,f\n\
1894                        x,y,z";
1895        let csv_right = "\
1896                        header1,header2,header3\n\
1897                        a,b,c\n\
1898                        g,h,i\n\
1899                        x,y,z";
1900
1901        let expected = DiffByteRecords::new(
1902            vec![
1903                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1904                    csv::ByteRecord::from(vec!["d", "e", "f"]),
1905                    3,
1906                )),
1907                DiffByteRecord::Add(ByteRecordLineInfo::new(
1908                    csv::ByteRecord::from(vec!["g", "h", "i"]),
1909                    3,
1910                )),
1911            ],
1912            (
1913                Some(vec!["header1", "header2", "header3"].into()),
1914                Some(vec!["header1", "header2", "header3"].into()),
1915            )
1916                .into(),
1917            Some(3),
1918        );
1919
1920        csv_diff_local_with_sorting(
1921            Csv::with_reader_seek(csv_left.as_bytes()),
1922            Csv::with_reader_seek(csv_right.as_bytes()),
1923            expected.clone(),
1924            CsvByteDiffLocal::new()?,
1925        )?;
1926
1927        csv_diff_with_sorting(
1928            Csv::with_reader(csv_left.as_bytes()),
1929            Csv::with_reader(csv_right.as_bytes()),
1930            expected,
1931            CsvByteDiff::new()?,
1932        )
1933    }
1934
1935    #[cfg(feature = "rayon-threads")]
1936    #[test]
1937    fn diff_multiple_lines_with_header_added_and_deleted_different_lines(
1938    ) -> Result<(), Box<dyn Error>> {
1939        let csv_left = "\
1940                        header1,header2,header3\n\
1941                        a,b,c\n\
1942                        d,e,f\n\
1943                        x,y,z";
1944        let csv_right = "\
1945                        header1,header2,header3\n\
1946                        a,b,c\n\
1947                        x,y,z\n\
1948                        g,h,i";
1949
1950        let expected = DiffByteRecords::new(
1951            vec![
1952                DiffByteRecord::Delete(ByteRecordLineInfo::new(
1953                    csv::ByteRecord::from(vec!["d", "e", "f"]),
1954                    3,
1955                )),
1956                DiffByteRecord::Add(ByteRecordLineInfo::new(
1957                    csv::ByteRecord::from(vec!["g", "h", "i"]),
1958                    4,
1959                )),
1960            ],
1961            (
1962                Some(vec!["header1", "header2", "header3"].into()),
1963                Some(vec!["header1", "header2", "header3"].into()),
1964            )
1965                .into(),
1966            Some(3),
1967        );
1968
1969        csv_diff_local_with_sorting(
1970            Csv::with_reader_seek(csv_left.as_bytes()),
1971            Csv::with_reader_seek(csv_right.as_bytes()),
1972            expected.clone(),
1973            CsvByteDiffLocal::new()?,
1974        )?;
1975
1976        csv_diff_with_sorting(
1977            Csv::with_reader(csv_left.as_bytes()),
1978            Csv::with_reader(csv_right.as_bytes()),
1979            expected,
1980            CsvByteDiff::new()?,
1981        )
1982    }
1983
1984    #[cfg(feature = "rayon-threads")]
1985    #[test]
1986    fn diff_multiple_lines_with_header_added_modified_and_deleted() -> Result<(), Box<dyn Error>> {
1987        let csv_left = "\
1988                        header1,header2,header3\n\
1989                        a,b,c\n\
1990                        d,e,f\n\
1991                        x,y,z";
1992        let csv_right = "\
1993                        header1,header2,header3\n\
1994                        g,h,i\n\
1995                        a,b,d\n\
1996                        x,y,z";
1997
1998        let expected = DiffByteRecords::new(
1999            vec![
2000                DiffByteRecord::Modify {
2001                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
2002                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "d"]), 3),
2003                    field_indices: vec![2],
2004                },
2005                DiffByteRecord::Add(ByteRecordLineInfo::new(
2006                    csv::ByteRecord::from(vec!["g", "h", "i"]),
2007                    2,
2008                )),
2009                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2010                    csv::ByteRecord::from(vec!["d", "e", "f"]),
2011                    3,
2012                )),
2013            ],
2014            (
2015                Some(vec!["header1", "header2", "header3"].into()),
2016                Some(vec!["header1", "header2", "header3"].into()),
2017            )
2018                .into(),
2019            Some(3),
2020        );
2021
2022        csv_diff_local_with_sorting(
2023            Csv::with_reader_seek(csv_left.as_bytes()),
2024            Csv::with_reader_seek(csv_right.as_bytes()),
2025            expected.clone(),
2026            CsvByteDiffLocal::new()?,
2027        )?;
2028
2029        csv_diff_with_sorting(
2030            Csv::with_reader(csv_left.as_bytes()),
2031            Csv::with_reader(csv_right.as_bytes()),
2032            expected,
2033            CsvByteDiff::new()?,
2034        )
2035    }
2036
2037    #[cfg(feature = "rayon-threads")]
2038    #[test]
2039    fn diff_multiple_lines_different_delimiters_with_header_added_modified_and_deleted(
2040    ) -> Result<(), Box<dyn Error>> {
2041        let csv_left = "\
2042                        header1,header2,header3\n\
2043                        a,b,c\n\
2044                        d,e,f\n\
2045                        x,y,z";
2046        let csv_right = "\
2047                        header1;header2;header3\n\
2048                        g;h;i\n\
2049                        a;b;d\n\
2050                        x;y;z";
2051
2052        let expected = DiffByteRecords::new(
2053            vec![
2054                DiffByteRecord::Modify {
2055                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
2056                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "d"]), 3),
2057                    field_indices: vec![2],
2058                },
2059                DiffByteRecord::Add(ByteRecordLineInfo::new(
2060                    csv::ByteRecord::from(vec!["g", "h", "i"]),
2061                    2,
2062                )),
2063                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2064                    csv::ByteRecord::from(vec!["d", "e", "f"]),
2065                    3,
2066                )),
2067            ],
2068            (
2069                Some(vec!["header1", "header2", "header3"].into()),
2070                Some(vec!["header1", "header2", "header3"].into()),
2071            )
2072                .into(),
2073            Some(3),
2074        );
2075
2076        csv_diff_local_with_sorting(
2077            csv::ReaderBuilder::new()
2078                .has_headers(true)
2079                .from_reader_seek(csv_left.as_bytes())
2080                .into(),
2081            csv::ReaderBuilder::new()
2082                .has_headers(true)
2083                .delimiter(b';')
2084                .from_reader_seek(csv_right.as_bytes())
2085                .into(),
2086            expected.clone(),
2087            CsvByteDiffLocal::new()?,
2088        )?;
2089
2090        csv_diff_with_sorting(
2091            csv::ReaderBuilder::new()
2092                .has_headers(true)
2093                .from_reader(csv_left.as_bytes())
2094                .into(),
2095            csv::ReaderBuilder::new()
2096                .has_headers(true)
2097                .delimiter(b';')
2098                .from_reader(csv_right.as_bytes())
2099                .into(),
2100            expected,
2101            CsvByteDiff::new()?,
2102        )
2103    }
2104
2105    #[cfg(feature = "rayon-threads")]
2106    #[test]
2107    fn diff_multiple_lines_with_header_modified_at_end_added_at_end() -> Result<(), Box<dyn Error>>
2108    {
2109        let csv_left = "\
2110                        header1,header2,header3\n\
2111                        a,b,c\n\
2112                        x,y,z";
2113        let csv_right = "\
2114                        header1,header2,header3\n\
2115                        a,b,c\n\
2116                        x,y,a\n\
2117                        g,h,i";
2118
2119        let expected = DiffByteRecords::new(
2120            vec![
2121                DiffByteRecord::Modify {
2122                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["x", "y", "z"]), 3),
2123                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["x", "y", "a"]), 3),
2124                    field_indices: vec![2],
2125                },
2126                DiffByteRecord::Add(ByteRecordLineInfo::new(
2127                    csv::ByteRecord::from(vec!["g", "h", "i"]),
2128                    4,
2129                )),
2130            ],
2131            (
2132                Some(vec!["header1", "header2", "header3"].into()),
2133                Some(vec!["header1", "header2", "header3"].into()),
2134            )
2135                .into(),
2136            Some(3),
2137        );
2138
2139        csv_diff_local_with_sorting(
2140            Csv::with_reader_seek(csv_left.as_bytes()),
2141            Csv::with_reader_seek(csv_right.as_bytes()),
2142            expected.clone(),
2143            CsvByteDiffLocal::new()?,
2144        )?;
2145
2146        csv_diff_with_sorting(
2147            Csv::with_reader(csv_left.as_bytes()),
2148            Csv::with_reader(csv_right.as_bytes()),
2149            expected,
2150            CsvByteDiff::new()?,
2151        )
2152    }
2153
2154    #[cfg(feature = "rayon-threads")]
2155    #[test]
2156    fn diff_multiple_lines_with_header_added_multiple() -> Result<(), Box<dyn Error>> {
2157        let csv_left = "\
2158                        header1,header2,header3\n\
2159                        a,b,c\n\
2160                        d,e,f\n\
2161                        g,h,i";
2162        let csv_right = "\
2163                        header1,header2,header3\n\
2164                        a,b,c\n\
2165                        d,e,f\n\
2166                        g,h,i\n\
2167                        j,k,l\n\
2168                        m,n,o";
2169
2170        let expected = DiffByteRecords::new(
2171            vec![
2172                DiffByteRecord::Add(ByteRecordLineInfo::new(
2173                    csv::ByteRecord::from(vec!["j", "k", "l"]),
2174                    5,
2175                )),
2176                DiffByteRecord::Add(ByteRecordLineInfo::new(
2177                    csv::ByteRecord::from(vec!["m", "n", "o"]),
2178                    6,
2179                )),
2180            ],
2181            (
2182                Some(vec!["header1", "header2", "header3"].into()),
2183                Some(vec!["header1", "header2", "header3"].into()),
2184            )
2185                .into(),
2186            Some(3),
2187        );
2188
2189        csv_diff_local_with_sorting(
2190            Csv::with_reader_seek(csv_left.as_bytes()),
2191            Csv::with_reader_seek(csv_right.as_bytes()),
2192            expected.clone(),
2193            CsvByteDiffLocal::new()?,
2194        )?;
2195
2196        csv_diff_with_sorting(
2197            Csv::with_reader(csv_left.as_bytes()),
2198            Csv::with_reader(csv_right.as_bytes()),
2199            expected,
2200            CsvByteDiff::new()?,
2201        )
2202    }
2203
2204    #[cfg(feature = "rayon-threads")]
2205    #[test]
2206    fn diff_multiple_lines_with_header_deleted_multiple() -> Result<(), Box<dyn Error>> {
2207        let csv_left = "\
2208                        header1,header2,header3\n\
2209                        a,b,c\n\
2210                        d,e,f\n\
2211                        g,h,i";
2212        let csv_right = "\
2213                        header1,header2,header3\n\
2214                        a,b,c";
2215
2216        let expected = DiffByteRecords::new(
2217            vec![
2218                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2219                    csv::ByteRecord::from(vec!["d", "e", "f"]),
2220                    3,
2221                )),
2222                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2223                    csv::ByteRecord::from(vec!["g", "h", "i"]),
2224                    4,
2225                )),
2226            ],
2227            (
2228                Some(vec!["header1", "header2", "header3"].into()),
2229                Some(vec!["header1", "header2", "header3"].into()),
2230            )
2231                .into(),
2232            Some(3),
2233        );
2234
2235        csv_diff_local_with_sorting(
2236            Csv::with_reader_seek(csv_left.as_bytes()),
2237            Csv::with_reader_seek(csv_right.as_bytes()),
2238            expected.clone(),
2239            CsvByteDiffLocal::new()?,
2240        )?;
2241
2242        csv_diff_with_sorting(
2243            Csv::with_reader(csv_left.as_bytes()),
2244            Csv::with_reader(csv_right.as_bytes()),
2245            expected,
2246            CsvByteDiff::new()?,
2247        )
2248    }
2249
2250    #[cfg(feature = "rayon-threads")]
2251    #[test]
2252    fn diff_multiple_lines_with_header_modified_multiple() -> Result<(), Box<dyn Error>> {
2253        let csv_left = "\
2254                        header1,header2,header3\n\
2255                        a,b,c\n\
2256                        d,e,f\n\
2257                        g,h,i";
2258        let csv_right = "\
2259                        header1,header2,header3\n\
2260                        a,b,x\n\
2261                        d,e,f\n\
2262                        g,h,x";
2263
2264        let expected = DiffByteRecords::new(
2265            vec![
2266                DiffByteRecord::Modify {
2267                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
2268                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "x"]), 2),
2269                    field_indices: vec![2],
2270                },
2271                DiffByteRecord::Modify {
2272                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["g", "h", "i"]), 4),
2273                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["g", "h", "x"]), 4),
2274                    field_indices: vec![2],
2275                },
2276            ],
2277            (
2278                Some(vec!["header1", "header2", "header3"].into()),
2279                Some(vec!["header1", "header2", "header3"].into()),
2280            )
2281                .into(),
2282            Some(3),
2283        );
2284
2285        csv_diff_local_with_sorting(
2286            Csv::with_reader_seek(csv_left.as_bytes()),
2287            Csv::with_reader_seek(csv_right.as_bytes()),
2288            expected.clone(),
2289            CsvByteDiffLocal::new()?,
2290        )?;
2291
2292        csv_diff_with_sorting(
2293            Csv::with_reader(csv_left.as_bytes()),
2294            Csv::with_reader(csv_right.as_bytes()),
2295            expected,
2296            CsvByteDiff::new()?,
2297        )
2298    }
2299
2300    #[cfg(feature = "rayon-threads")]
2301    #[test]
2302    fn diff_multiple_lines_with_header_added_modified_deleted_multiple(
2303    ) -> Result<(), Box<dyn Error>> {
2304        let csv_left = "\
2305                        header1,header2,header3\n\
2306                        a,b,c\n\
2307                        d,e,f\n\
2308                        g,h,i\n\
2309                        j,k,l\n\
2310                        m,n,o";
2311        let csv_right = "\
2312                        header1,header2,header3\n\
2313                        a,b,x\n\
2314                        p,q,r\n\
2315                        m,n,o\n\
2316                        x,y,z\n\
2317                        j,k,x\n";
2318
2319        let expected = DiffByteRecords::new(
2320            vec![
2321                DiffByteRecord::Modify {
2322                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
2323                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "x"]), 2),
2324                    field_indices: vec![2],
2325                },
2326                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2327                    csv::ByteRecord::from(vec!["d", "e", "f"]),
2328                    3,
2329                )),
2330                DiffByteRecord::Add(ByteRecordLineInfo::new(
2331                    csv::ByteRecord::from(vec!["p", "q", "r"]),
2332                    3,
2333                )),
2334                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2335                    csv::ByteRecord::from(vec!["g", "h", "i"]),
2336                    4,
2337                )),
2338                DiffByteRecord::Modify {
2339                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["j", "k", "l"]), 5),
2340                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["j", "k", "x"]), 6),
2341                    field_indices: vec![2],
2342                },
2343                DiffByteRecord::Add(ByteRecordLineInfo::new(
2344                    csv::ByteRecord::from(vec!["x", "y", "z"]),
2345                    5,
2346                )),
2347            ],
2348            (
2349                Some(vec!["header1", "header2", "header3"].into()),
2350                Some(vec!["header1", "header2", "header3"].into()),
2351            )
2352                .into(),
2353            Some(3),
2354        );
2355
2356        csv_diff_local_with_sorting(
2357            Csv::with_reader_seek(csv_left.as_bytes()),
2358            Csv::with_reader_seek(csv_right.as_bytes()),
2359            expected.clone(),
2360            CsvByteDiffLocal::new()?,
2361        )?;
2362
2363        csv_diff_with_sorting(
2364            Csv::with_reader(csv_left.as_bytes()),
2365            Csv::with_reader(csv_right.as_bytes()),
2366            expected,
2367            CsvByteDiff::new()?,
2368        )
2369    }
2370
2371    #[cfg(feature = "rayon-threads")]
2372    #[test]
2373    fn builder_without_primary_key_columns_is_no_primary_key_columns_err(
2374    ) -> Result<(), Box<dyn Error>> {
2375        let thread_pool = rayon::ThreadPoolBuilder::new().build()?;
2376        let expected = CsvByteDiffBuilderError::NoPrimaryKeyColumns;
2377        let actual = CsvByteDiffLocalBuilder::new()
2378            .rayon_thread_pool(&thread_pool)
2379            .primary_key_columns(std::iter::empty())
2380            .build();
2381
2382        assert!(actual.is_err());
2383        assert!(matches!(
2384            actual,
2385            Err(CsvByteDiffBuilderError::NoPrimaryKeyColumns)
2386        ));
2387        assert_eq!(expected.to_string(), "No primary key columns have been specified. You need to provide at least one column index.");
2388        Ok(())
2389    }
2390
2391    #[cfg(feature = "rayon-threads")]
2392    #[test]
2393    fn builder_without_specified_primary_key_columns_is_ok() -> Result<(), Box<dyn Error>> {
2394        // it is ok, because it gets a sensible default value
2395        assert!(CsvByteDiffLocalBuilder::new()
2396            .rayon_thread_pool(&rayon::ThreadPoolBuilder::new().build()?)
2397            .build()
2398            .is_ok());
2399        Ok(())
2400    }
2401
2402    #[cfg(feature = "rayon-threads")]
2403    #[test]
2404    fn diff_created_with_existing_thread_pool_works() -> Result<(), Box<dyn Error>> {
2405        let thread_pool = rayon::ThreadPoolBuilder::new().build()?;
2406        let csv_diff_local = CsvByteDiffLocalBuilder::new()
2407            .rayon_thread_pool(&thread_pool)
2408            .build()?;
2409
2410        let csv_left = "\
2411                        header1,header2,header3\n\
2412                        a,b,c";
2413        let csv_right = "\
2414                        header1,header2,header3\n\
2415                        a,b,d";
2416
2417        let expected = DiffByteRecords::new(
2418            vec![DiffByteRecord::Modify {
2419                delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
2420                add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "d"]), 2),
2421                field_indices: vec![2],
2422            }],
2423            (
2424                Some(vec!["header1", "header2", "header3"].into()),
2425                Some(vec!["header1", "header2", "header3"].into()),
2426            )
2427                .into(),
2428            Some(3),
2429        );
2430
2431        csv_diff_local_with_sorting(
2432            Csv::with_reader_seek(csv_left.as_bytes()),
2433            Csv::with_reader_seek(csv_right.as_bytes()),
2434            expected,
2435            csv_diff_local,
2436        )
2437
2438        // TODO: also create a builder for `CsvByteDiff`, so that we can test the following
2439        // csv_diff_with_sorting(
2440        // Csv::with_reader(csv_left.as_bytes()),
2441        // Csv::with_reader(csv_right.as_bytes()), expected, csv_diff)?
2442    }
2443
2444    #[cfg(feature = "rayon-threads")]
2445    #[test]
2446    fn diff_multiple_lines_with_header_combined_key_added_deleted_modified(
2447    ) -> Result<(), Box<dyn Error>> {
2448        let csv_left = "\
2449                        header1,header2,header3\n\
2450                        a,b,c\n\
2451                        d,e,f\n\
2452                        g,h,i\n\
2453                        m,n,o";
2454        let csv_right = "\
2455                        header1,header2,header3\n\
2456                        a,b,x\n\
2457                        g,h,i\n\
2458                        d,f,f\n\
2459                        m,n,o";
2460
2461        let thread_pool = &rayon::ThreadPoolBuilder::new().build()?;
2462        let csv_diff = CsvByteDiffLocalBuilder::new()
2463            .rayon_thread_pool(&thread_pool)
2464            .primary_key_columns(vec![0, 1])
2465            .build()?;
2466
2467        let expected = DiffByteRecords::new(
2468            vec![
2469                DiffByteRecord::Modify {
2470                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
2471                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "x"]), 2),
2472                    field_indices: vec![2],
2473                },
2474                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2475                    csv::ByteRecord::from(vec!["d", "e", "f"]),
2476                    3,
2477                )),
2478                DiffByteRecord::Add(ByteRecordLineInfo::new(
2479                    csv::ByteRecord::from(vec!["d", "f", "f"]),
2480                    4,
2481                )),
2482            ],
2483            (
2484                Some(vec!["header1", "header2", "header3"].into()),
2485                Some(vec!["header1", "header2", "header3"].into()),
2486            )
2487                .into(),
2488            Some(3),
2489        );
2490
2491        csv_diff_local_with_sorting(
2492            Csv::with_reader_seek(csv_left),
2493            Csv::with_reader_seek(csv_right),
2494            expected,
2495            csv_diff,
2496        )
2497
2498        // TODO: also create a builder for `CsvByteDiff`, so that we can test the following
2499        // csv_diff_with_sorting(
2500        // Csv::with_reader(csv_left.as_bytes()),
2501        // Csv::with_reader(csv_right.as_bytes()), expected, csv_diff)?
2502    }
2503
2504    #[cfg(feature = "rayon-threads")]
2505    #[test]
2506    fn diff_local_one_line_with_header_error_left_has_different_num_of_fields(
2507    ) -> Result<(), Box<dyn Error>> {
2508        let csv_left = "\
2509                        header1,header2,header3,header4\n\
2510                        a,b,c";
2511        let csv_right = "\
2512                        header1,header2,header3\n\
2513                        a,b,d";
2514
2515        let diff_res_actual = CsvByteDiffLocal::new()?.diff(
2516            Csv::with_reader_seek(csv_left.as_bytes()),
2517            Csv::with_reader_seek(csv_right.as_bytes()),
2518        );
2519
2520        let err_kind = diff_res_actual.map_err(|err| err.into_kind());
2521        let mut pos_expected = csv::Position::new();
2522        let pos_expected = pos_expected.set_byte(32).set_line(2).set_record(1);
2523        match err_kind {
2524            Err(csv::ErrorKind::UnequalLengths {
2525                pos: Some(pos),
2526                expected_len,
2527                len,
2528            }) => {
2529                assert_eq!(pos, *pos_expected);
2530                assert_eq!(expected_len, 4);
2531                assert_eq!(len, 3);
2532            }
2533            res => panic!("match mismatch: got {:#?}", res),
2534        }
2535        Ok(())
2536    }
2537
2538    #[cfg(feature = "rayon-threads")]
2539    #[test]
2540    fn diff_streaming_one_line_with_header_error_left_has_different_num_of_fields(
2541    ) -> Result<(), Box<dyn Error>> {
2542        let csv_left = "\
2543                        header1,header2,header3,header4\n\
2544                        a,b,c";
2545        let csv_right = "\
2546                        header1,header2,header3\n\
2547                        a,b,d";
2548
2549        let diff_res_actual = CsvByteDiff::new()?.diff(
2550            Csv::with_reader(csv_left.as_bytes()),
2551            Csv::with_reader(csv_right.as_bytes()),
2552        );
2553
2554        let diff_res_vec: csv::Result<Vec<_>> = diff_res_actual.collect();
2555
2556        let err_kind = diff_res_vec.map_err(|err| err.into_kind());
2557        let mut pos_expected = csv::Position::new();
2558        let pos_expected = pos_expected.set_byte(32).set_line(2).set_record(1);
2559        match err_kind {
2560            Err(csv::ErrorKind::UnequalLengths {
2561                pos: Some(pos),
2562                expected_len,
2563                len,
2564            }) => {
2565                assert_eq!(pos, *pos_expected);
2566                assert_eq!(expected_len, 4);
2567                assert_eq!(len, 3);
2568            }
2569            res => panic!("match mismatch: got {:#?}", res),
2570        }
2571        Ok(())
2572    }
2573
2574    #[cfg(feature = "rayon-threads")]
2575    #[test]
2576    fn diff_local_one_line_with_header_error_right_has_different_num_of_fields(
2577    ) -> Result<(), Box<dyn Error>> {
2578        let csv_left = "\
2579                        header1,header2,header3\n\
2580                        a,b,c";
2581        let csv_right = "\
2582                        header1,header2,header3,header4\n\
2583                        a,b,d";
2584
2585        let diff_res_actual = CsvByteDiffLocal::new()?.diff(
2586            Csv::with_reader_seek(csv_left.as_bytes()),
2587            Csv::with_reader_seek(csv_right.as_bytes()),
2588        );
2589
2590        let err_kind = diff_res_actual.map_err(|err| err.into_kind());
2591        let mut pos_expected = csv::Position::new();
2592        let pos_expected = pos_expected.set_byte(32).set_line(2).set_record(1);
2593        match err_kind {
2594            Err(csv::ErrorKind::UnequalLengths {
2595                pos: Some(pos),
2596                expected_len,
2597                len,
2598            }) => {
2599                assert_eq!(pos, *pos_expected);
2600                assert_eq!(expected_len, 4);
2601                assert_eq!(len, 3);
2602            }
2603            res => panic!("match mismatch: got {:#?}", res),
2604        }
2605        Ok(())
2606    }
2607
2608    #[cfg(feature = "rayon-threads")]
2609    #[test]
2610    fn diff_streaming_one_line_with_header_error_right_has_different_num_of_fields(
2611    ) -> Result<(), Box<dyn Error>> {
2612        let csv_left = "\
2613                        header1,header2,header3\n\
2614                        a,b,c";
2615        let csv_right = "\
2616                        header1,header2,header3,header4\n\
2617                        a,b,d";
2618
2619        let diff_res_actual = CsvByteDiff::new()?.diff(
2620            Csv::with_reader_seek(csv_left.as_bytes()),
2621            Csv::with_reader_seek(csv_right.as_bytes()),
2622        );
2623
2624        let diff_res_vec: csv::Result<Vec<_>> = diff_res_actual.collect();
2625
2626        let err_kind = diff_res_vec.map_err(|err| err.into_kind());
2627        let mut pos_expected = csv::Position::new();
2628        let pos_expected = pos_expected.set_byte(32).set_line(2).set_record(1);
2629        match err_kind {
2630            Err(csv::ErrorKind::UnequalLengths {
2631                pos: Some(pos),
2632                expected_len,
2633                len,
2634            }) => {
2635                assert_eq!(pos, *pos_expected);
2636                assert_eq!(expected_len, 4);
2637                assert_eq!(len, 3);
2638            }
2639            res => panic!("match mismatch: got {:#?}", res),
2640        }
2641        Ok(())
2642    }
2643
2644    #[cfg(feature = "crossbeam-threads")]
2645    #[test]
2646    // TODO: we should write a macro, so that we can reuse test code for both "rayon" and "crossbeam-threads"
2647    fn diff_crossbeam_multiple_lines_with_header_combined_key_added_deleted_modified(
2648    ) -> Result<(), CsvByteDiffBuilderError> {
2649        let csv_left = "\
2650                        header1,header2,header3\n\
2651                        a,b,c\n\
2652                        d,e,f\n\
2653                        g,h,i\n\
2654                        m,n,o";
2655        let csv_right = "\
2656                        header1,header2,header3\n\
2657                        a,b,x\n\
2658                        g,h,i\n\
2659                        d,f,f\n\
2660                        m,n,o";
2661
2662        let mut diff_res_actual = CsvByteDiffLocalBuilder::<CsvHashTaskSpawnerLocalCrossbeam>::new(
2663            CsvHashTaskSpawnerLocalBuilderCrossbeam::new(),
2664        )
2665        .primary_key_columns(vec![0, 1])
2666        .build()?
2667        .diff(
2668            Csv::with_reader_seek(csv_left.as_bytes()),
2669            Csv::with_reader_seek(csv_right.as_bytes()),
2670        )
2671        .unwrap();
2672        let mut diff_res_expected = DiffByteRecords::new(
2673            vec![
2674                DiffByteRecord::Modify {
2675                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
2676                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "x"]), 2),
2677                    field_indices: vec![2],
2678                },
2679                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2680                    csv::ByteRecord::from(vec!["d", "e", "f"]),
2681                    3,
2682                )),
2683                DiffByteRecord::Add(ByteRecordLineInfo::new(
2684                    csv::ByteRecord::from(vec!["d", "f", "f"]),
2685                    4,
2686                )),
2687            ],
2688            (
2689                Some(vec!["header1", "header2", "header3"].into()),
2690                Some(vec!["header1", "header2", "header3"].into()),
2691            )
2692                .into(),
2693            Some(3),
2694        );
2695
2696        diff_res_actual.sort_by_line();
2697        diff_res_expected.sort_by_line();
2698        assert_eq!(diff_res_actual, diff_res_expected);
2699        Ok(())
2700    }
2701
2702    #[cfg(not(feature = "rayon-threads"))]
2703    #[test]
2704    // TODO: we should write a macro, so that we can reuse test code for both "rayon" and "crossbeam-threads"
2705    fn diff_streaming_std_threads_multiple_lines_with_header_combined_key_added_deleted_modified(
2706    ) -> Result<(), Box<dyn Error>> {
2707        use crate::csv_hash_task_spawner::{
2708            CsvHashTaskSpawnerBuilderStdThreads, CsvHashTaskSpawnerStdThreads,
2709        };
2710
2711        let csv_left = "\
2712                        header1,header2,header3\n\
2713                        a,b,c\n\
2714                        d,e,f\n\
2715                        g,h,i\n\
2716                        m,n,o";
2717        let csv_right = "\
2718                        header1,header2,header3\n\
2719                        a,b,x\n\
2720                        g,h,i\n\
2721                        d,f,f\n\
2722                        m,n,o";
2723
2724        let diff_res_iter = CsvByteDiffBuilder::<CsvHashTaskSpawnerStdThreads>::new(
2725            CsvHashTaskSpawnerBuilderStdThreads::new(),
2726        )
2727        .primary_key_columns(vec![0, 1])
2728        .build()?
2729        .diff(
2730            Csv::with_reader_seek(csv_left.as_bytes()),
2731            Csv::with_reader_seek(csv_right.as_bytes()),
2732        );
2733
2734        let mut diff_res_actual: DiffByteRecords = diff_res_iter.try_to_diff_byte_records()?;
2735
2736        let mut diff_res_expected = DiffByteRecords::new(
2737            vec![
2738                DiffByteRecord::Modify {
2739                    delete: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "c"]), 2),
2740                    add: ByteRecordLineInfo::new(csv::ByteRecord::from(vec!["a", "b", "x"]), 2),
2741                    field_indices: vec![2],
2742                },
2743                DiffByteRecord::Delete(ByteRecordLineInfo::new(
2744                    csv::ByteRecord::from(vec!["d", "e", "f"]),
2745                    3,
2746                )),
2747                DiffByteRecord::Add(ByteRecordLineInfo::new(
2748                    csv::ByteRecord::from(vec!["d", "f", "f"]),
2749                    4,
2750                )),
2751            ],
2752            (
2753                Some(vec!["header1", "header2", "header3"].into()),
2754                Some(vec!["header1", "header2", "header3"].into()),
2755            )
2756                .into(),
2757            Some(3),
2758        );
2759
2760        diff_res_actual.sort_by_line();
2761        diff_res_expected.sort_by_line();
2762        assert_eq!(diff_res_actual, diff_res_expected);
2763        Ok(())
2764    }
2765}